mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[TSP] Support qwen3 moe tsp + cudagraph (#4871)
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* support qwen3_moe tsp mode * fix * fix * update * update * update * fix * support external_rmsnorm * update * fix
This commit is contained in:
@@ -124,9 +124,7 @@ class Qwen2_5_VLModel(nn.Layer):
|
||||
residual,
|
||||
)
|
||||
|
||||
hidden_states = hidden_states + residual
|
||||
|
||||
out = self.norm(hidden_states)
|
||||
out = self.norm(hidden_states, residual)[0]
|
||||
|
||||
return out
|
||||
|
||||
@@ -262,21 +260,6 @@ class Qwen2_5_VLForConditionalGeneration(ModelForCasualLM):
|
||||
|
||||
return logits
|
||||
|
||||
def empty_input_forward(self):
|
||||
"""
|
||||
empty_input_forward
|
||||
"""
|
||||
fake_hidden_states = paddle.empty(
|
||||
shape=[0, self.fd_config.model_config.hidden_size],
|
||||
dtype=paddle.get_default_dtype(),
|
||||
)
|
||||
for i in range(
|
||||
self.fd_config.model_config.moe_layer_start_index,
|
||||
self.fd_config.model_config.num_hidden_layers,
|
||||
):
|
||||
self.ernie.layers[i].mlp.text_fused_moe(fake_hidden_states)
|
||||
self.ernie.layers[i].mlp.image_fused_moe(fake_hidden_states)
|
||||
|
||||
def get_input_embeddings(
|
||||
self,
|
||||
ids_remove_padding: paddle.Tensor,
|
||||
|
||||
Reference in New Issue
Block a user