[Models][OP][Optimization] Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM (#6689)

* Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM
2026-04-23 08:21:53 +08:00 · 2026-03-10 15:05:14 +08:00
parent 25c479312d
commit c3aceb6bdc
22 changed files with 8022 additions and 143 deletions
@@ -20,7 +20,6 @@ from typing import Optional

 import paddle
 from paddle.nn.quant import weight_quantize
-from paddleformers.utils.log import logger

 from fastdeploy import envs
 from fastdeploy.model_executor.layers.linear import (
@@ -181,7 +180,7 @@ class WeightOnlyConfig(QuantConfigBase):
                    and check_machete_supports_shape(layer.weight_shape[0], layer.weight_shape[1])
                ):
                    self.group_size = query_machete_supported_group_size(layer.weight_shape[0])
-                    logger.info(f"Using Machete kernel for WeightOnlyLinearMethod, group size: {self.group_size}")
+                    # logger.info(f"Using Machete kernel for WeightOnlyLinearMethod, group size: {self.group_size}")
                    return MacheteWeightOnlyLinearMethod(self)
                return GPUWeightOnlyLinearMethod(self)