From 73bd4ab318df3f43a5e84c85f156937cef844a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=91=A8=E5=91=A8?= <39978853+zhoutianzi666@users.noreply.github.com> Date: Mon, 13 Apr 2026 20:24:58 +0800 Subject: [PATCH] =?UTF-8?q?[Feature]=20=E4=B8=BA=20FusedMoE=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=20hidden=5Fsize=20=E6=98=BE=E5=BC=8F=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E6=94=AF=E6=8C=81=20(#7361)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Feature] 为 FusedMoE 添加 hidden_size 显式参数支持 --- fastdeploy/model_executor/layers/moe/moe.py | 3 ++- fastdeploy/model_executor/models/deepseek_v3.py | 1 + fastdeploy/model_executor/models/ernie4_5_moe.py | 1 + .../model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py | 1 + fastdeploy/model_executor/models/glm4_moe.py | 1 + fastdeploy/model_executor/models/gpt_oss.py | 1 + fastdeploy/model_executor/models/qwen3moe.py | 1 + tests/layers/test_fusedmoe.py | 1 + tests/layers/test_nvfp4_fusedmoe.py | 1 + tests/layers/test_w4a8_moe.py | 1 + tests/layers/test_w4afp8_moe.py | 1 + 11 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/moe/moe.py b/fastdeploy/model_executor/layers/moe/moe.py index 4a2e2819a9..120a4ecc30 100644 --- a/fastdeploy/model_executor/layers/moe/moe.py +++ b/fastdeploy/model_executor/layers/moe/moe.py @@ -153,6 +153,7 @@ class FusedMoE(nn.Layer): def __init__( self, fd_config, + hidden_size: int = -1, reduce_results: bool = True, renormalize: bool = False, moe_intermediate_size: int = -1, @@ -204,7 +205,7 @@ class FusedMoE(nn.Layer): self.tp_size == 1 and self.ep_size > 1 ), "MoE only support parallelism on TP or EP dimension." - self.hidden_size = fd_config.model_config.hidden_size + self.hidden_size = hidden_size self.num_experts = num_experts self.num_local_experts = self.num_experts // self.ep_size diff --git a/fastdeploy/model_executor/models/deepseek_v3.py b/fastdeploy/model_executor/models/deepseek_v3.py index 4ada12da7e..a975144f73 100644 --- a/fastdeploy/model_executor/models/deepseek_v3.py +++ b/fastdeploy/model_executor/models/deepseek_v3.py @@ -167,6 +167,7 @@ class DeepSeekV3MoE(nn.Layer): self.experts = FusedMoE( fd_config=fd_config, + hidden_size=fd_config.model_config.hidden_size, reduce_results=False, renormalize=self.norm_topk_prob, moe_intermediate_size=fd_config.model_config.moe_intermediate_size, diff --git a/fastdeploy/model_executor/models/ernie4_5_moe.py b/fastdeploy/model_executor/models/ernie4_5_moe.py index 4cc4306de5..eb0aeae110 100644 --- a/fastdeploy/model_executor/models/ernie4_5_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_moe.py @@ -210,6 +210,7 @@ class Ernie4_5_MoE(nn.Layer): self.experts = FusedMoE( fd_config=fd_config, + hidden_size=fd_config.model_config.hidden_size, moe_intermediate_size=fd_config.model_config.moe_intermediate_size, num_experts=fd_config.model_config.moe_num_experts, top_k=fd_config.model_config.moe_k, diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py index f4d70108e4..ad26a386bd 100644 --- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py @@ -148,6 +148,7 @@ class Ernie4_5_VLMoeBlock(nn.Layer): ) self.experts = FusedMoE( fd_config=fd_config, + hidden_size=fd_config.model_config.hidden_size, reduce_results=False, moe_intermediate_size=moe_intermediate_size, num_experts=num_experts, diff --git a/fastdeploy/model_executor/models/glm4_moe.py b/fastdeploy/model_executor/models/glm4_moe.py index 0e8ebd234a..947df7c038 100644 --- a/fastdeploy/model_executor/models/glm4_moe.py +++ b/fastdeploy/model_executor/models/glm4_moe.py @@ -169,6 +169,7 @@ class Glm4Moe(nn.Layer): self.experts = FusedMoE( fd_config, + hidden_size=fd_config.model_config.hidden_size, reduce_results=not self.merge_ffn_tp, renormalize=self.norm_topk_prob, moe_intermediate_size=fd_config.model_config.moe_intermediate_size, diff --git a/fastdeploy/model_executor/models/gpt_oss.py b/fastdeploy/model_executor/models/gpt_oss.py index a6cf231ed2..0ef1b451c0 100644 --- a/fastdeploy/model_executor/models/gpt_oss.py +++ b/fastdeploy/model_executor/models/gpt_oss.py @@ -114,6 +114,7 @@ class GptOssMoe(nn.Layer): self.experts = FusedMoE( fd_config=fd_config, + hidden_size=fd_config.model_config.hidden_size, moe_intermediate_size=fd_config.model_config.intermediate_size, num_experts=num_local_experts, top_k=fd_config.model_config.num_experts_per_tok, diff --git a/fastdeploy/model_executor/models/qwen3moe.py b/fastdeploy/model_executor/models/qwen3moe.py index 6c443d68bc..27f0fd82d3 100644 --- a/fastdeploy/model_executor/models/qwen3moe.py +++ b/fastdeploy/model_executor/models/qwen3moe.py @@ -63,6 +63,7 @@ class Qwen3MoeBlock(nn.Layer): } self.experts = FusedMoE( fd_config, + hidden_size=fd_config.model_config.hidden_size, moe_intermediate_size=fd_config.model_config.moe_intermediate_size, num_experts=fd_config.model_config.num_experts, top_k=fd_config.model_config.num_experts_per_tok, diff --git a/tests/layers/test_fusedmoe.py b/tests/layers/test_fusedmoe.py index d97363fe75..5c8e74e0f6 100644 --- a/tests/layers/test_fusedmoe.py +++ b/tests/layers/test_fusedmoe.py @@ -509,6 +509,7 @@ class FuseMoEWrapper(paddle.nn.Layer): self.fused_moe = FusedMoE( fd_config=self.fd_config, + hidden_size=self.fd_config.model_config.hidden_size, moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size, num_experts=self.fd_config.model_config.moe_num_experts, top_k=self.fd_config.model_config.moe_k, diff --git a/tests/layers/test_nvfp4_fusedmoe.py b/tests/layers/test_nvfp4_fusedmoe.py index ee2b8841df..10def582ef 100644 --- a/tests/layers/test_nvfp4_fusedmoe.py +++ b/tests/layers/test_nvfp4_fusedmoe.py @@ -516,6 +516,7 @@ class FuseMoEWrapper(paddle.nn.Layer): self.fused_moe = FusedMoE( fd_config=self.fd_config, + hidden_size=self.fd_config.model_config.hidden_size, moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size, num_experts=self.fd_config.model_config.moe_num_experts, top_k=self.fd_config.model_config.moe_k, diff --git a/tests/layers/test_w4a8_moe.py b/tests/layers/test_w4a8_moe.py index 9584702223..21a68addb2 100644 --- a/tests/layers/test_w4a8_moe.py +++ b/tests/layers/test_w4a8_moe.py @@ -110,6 +110,7 @@ class FuseMoEWrapper(paddle.nn.Layer): self.fused_moe = FusedMoE( fd_config=self.fd_config, + hidden_size=self.fd_config.model_config.hidden_size, moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size, num_experts=self.fd_config.model_config.moe_num_experts, top_k=self.fd_config.model_config.moe_k, diff --git a/tests/layers/test_w4afp8_moe.py b/tests/layers/test_w4afp8_moe.py index f21834354c..831f54fba4 100644 --- a/tests/layers/test_w4afp8_moe.py +++ b/tests/layers/test_w4afp8_moe.py @@ -96,6 +96,7 @@ class FuseMoEWrapper(paddle.nn.Layer): self.fused_moe = FusedMoE( fd_config=self.fd_config, + hidden_size=self.fd_config.model_config.hidden_size, moe_intermediate_size=self.fd_config.model_config.moe_intermediate_size, num_experts=self.fd_config.model_config.moe_num_experts, top_k=self.fd_config.model_config.moe_k,