From 420a8c1af55516c4f276a8fa33a2df58bf6b31b1 Mon Sep 17 00:00:00 2001 From: RichardWooSJTU <37864677+RichardWooSJTU@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:56:56 +0800 Subject: [PATCH] fix deep gemm import (#7425) --- fastdeploy/model_executor/models/deepseek_v3.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fastdeploy/model_executor/models/deepseek_v3.py b/fastdeploy/model_executor/models/deepseek_v3.py index 4d6e3ee1f8..c837e3f265 100644 --- a/fastdeploy/model_executor/models/deepseek_v3.py +++ b/fastdeploy/model_executor/models/deepseek_v3.py @@ -77,8 +77,6 @@ if current_platform.is_cuda(): radix_topk_ragged_transform, ) - paddle.enable_compat(scope={"deep_gemm"}) - class DeepSeekV3MLP(nn.Layer): """ @@ -700,7 +698,7 @@ class Indexer(nn.Layer): # indexer write_cache indexer_k_quant_and_cache(k, self.indexer_cache, slot_mapping, self.quant_block_size, self.scale_fmt) - import deep_gemm + from fastdeploy.model_executor.layers.quantization.fp8_utils import deep_gemm if forward_meta.max_len_tensor_cpu[1]: