convert : match ssm_conv tensors by type

convert : fix squeeze for ssm_conv tensors
2026-04-16 16:27:32 +03:00 · 2025-03-25 14:29:22 -04:00 · 2025-03-25 19:54:18 +02:00
1 changed files with 4 additions and 2 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3803,8 +3803,6 @@ class MambaModel(Model):
    _tok_embd = None

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
-        del bid  # unused
-
        output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
        tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)

@@ -3814,6 +3812,10 @@ class MambaModel(Model):
            logger.debug("A_log --> A ==> " + new_name)
            data_torch = -torch.exp(data_torch)

+        # [4 1 8192 1] -> [4 8192 1 1]
+        if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid):
+            data_torch = data_torch.squeeze()
+
        # assuming token_embd.weight is seen before output.weight
        if self._tok_embd is not None and new_name == output_name:
            if torch.equal(self._tok_embd, data_torch):
Author	SHA1	Message	Date
Francis Couture-Harpin	20b256e0fd	convert : match ssm_conv tensors by type	2025-03-25 14:29:22 -04:00
Georgi Gerganov	9c60fc4c78	convert : fix squeeze for ssm_conv tensors	2025-03-25 19:54:18 +02:00