fix deep gemm import (#7425)

2026-04-22 16:07:51 +08:00 · 2026-04-16 17:56:56 +08:00
parent e9527208d9
commit 420a8c1af5
1 changed files with 1 additions and 3 deletions
@@ -77,8 +77,6 @@ if current_platform.is_cuda():
        radix_topk_ragged_transform,
    )

-    paddle.enable_compat(scope={"deep_gemm"})
-

 class DeepSeekV3MLP(nn.Layer):
    """
@@ -700,7 +698,7 @@ class Indexer(nn.Layer):
        # indexer write_cache
        indexer_k_quant_and_cache(k, self.indexer_cache, slot_mapping, self.quant_block_size, self.scale_fmt)

-        import deep_gemm
+        from fastdeploy.model_executor.layers.quantization.fp8_utils import deep_gemm

        if forward_meta.max_len_tensor_cpu[1]: