[XPU] [bugfix] fix moe_ffn_quant_type_map bugs about datatype and tensorshape (#6337)

This commit is contained in:
zccjjj
2026-02-27 09:55:41 +08:00
committed by GitHub
parent 7b1d787b4b
commit c34cb2a8c2
2 changed files with 11 additions and 2 deletions
@@ -466,8 +466,9 @@ class XPUMoEMethod(MoEMethodBase):
topk_idx, topk_weights = self.ep_prefill_runner.moe_select(layer, gate_out)
# 2. Dynamic compute blockwise quantization scales
if "a_tokenwise_int8" in self.xpu_moe_quant_type and x.shape[0] > 0:
if "a_tokenwise_int8" in self.xpu_moe_quant_type:
x, x_scale = quant2d_per_token(x)
x_scale = x_scale.unsqueeze(1)
else:
x_scale = None
@@ -24,6 +24,7 @@ EP4TP1在线服务测试 - Expert Parallel + Tensor Parallel
"""
import os
import subprocess
import time
@@ -96,6 +97,10 @@ def test_ep4tp1_online(xpu_env):
# 设置EP环境变量
original_env = setup_ep_env()
# 设置MOE量化环境变量
os.environ["FD_XPU_MOE_FFN_QUANT_TYPE_MAP"] = "w_channelwise_int4_a_tokenwise_int8:8->53"
print(f"设置环境变量: FD_XPU_MOE_FFN_QUANT_TYPE_MAP={os.environ['FD_XPU_MOE_FFN_QUANT_TYPE_MAP']}")
stop_processes()
cleanup_resources()
@@ -135,7 +140,7 @@ def test_ep4tp1_online(xpu_env):
f"{','.join([str(i) for i in metrics_ports])}",
"--args",
"--model",
f"{model_path}/ERNIE-4.5-21B-A3B-Paddle",
f"{model_path}/ERNIE-4.5-300B-A47B-Paddle",
"--engine-worker-queue-port",
f"{','.join([str(i) for i in engine_worker_queue_ports])}",
"--max-model-len",
@@ -196,6 +201,9 @@ def test_ep4tp1_online(xpu_env):
finally:
# 恢复环境变量
if "FD_XPU_MOE_FFN_QUANT_TYPE_MAP" in os.environ:
del os.environ["FD_XPU_MOE_FFN_QUANT_TYPE_MAP"]
print("删除环境变量: FD_XPU_MOE_FFN_QUANT_TYPE_MAP")
restore_env(original_env)