mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[Models][OP][Optimization] Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM (#6689)
* Support DeepSeek-v3.2 model, integrate DSA & Indexer architecture with FlashMLA/DeepGEMM
This commit is contained in:
@@ -20,7 +20,6 @@ from typing import Optional
|
||||
|
||||
import paddle
|
||||
from paddle.nn.quant import weight_quantize
|
||||
from paddleformers.utils.log import logger
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.model_executor.layers.linear import (
|
||||
@@ -181,7 +180,7 @@ class WeightOnlyConfig(QuantConfigBase):
|
||||
and check_machete_supports_shape(layer.weight_shape[0], layer.weight_shape[1])
|
||||
):
|
||||
self.group_size = query_machete_supported_group_size(layer.weight_shape[0])
|
||||
logger.info(f"Using Machete kernel for WeightOnlyLinearMethod, group size: {self.group_size}")
|
||||
# logger.info(f"Using Machete kernel for WeightOnlyLinearMethod, group size: {self.group_size}")
|
||||
return MacheteWeightOnlyLinearMethod(self)
|
||||
return GPUWeightOnlyLinearMethod(self)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user