mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[New][RL] Support Rollout Routing Replay (#5405)
* [RL] Support Rollout Routing Replay
* add routing indices cache
* fix config bug and moe forward bug
* R3 Support GLM
* support eb4.5
* fix merge bug
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* add routing replay ci
* support glm topk
* support orther top_k
* fix ci bug
* pre-commit
* only support chatcmpl
* Revert "Revert "[RL] Support Rollout Routing Replay (#5321)" (#5402)"
This reverts commit c45e064f3d.
* Fix XPU and NPU bug
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
@@ -161,7 +161,7 @@ class Glm4Moe(nn.Layer):
|
||||
reduce_results=False,
|
||||
)
|
||||
|
||||
def forward(self, x, forward_meta):
|
||||
def forward(self, x, forward_meta: ForwardMeta = None):
|
||||
shared_experts_out = self.shared_experts(x)
|
||||
out = self.experts(x, self.gate, forward_meta)
|
||||
out = out + shared_experts_out
|
||||
@@ -306,10 +306,7 @@ class Glm4MoeDecoderLayer(nn.Layer):
|
||||
# Fully Connected
|
||||
hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
|
||||
|
||||
hidden_states = self.mlp(
|
||||
hidden_states,
|
||||
forward_meta,
|
||||
)
|
||||
hidden_states = self.mlp(hidden_states, forward_meta)
|
||||
|
||||
return hidden_states, residual
|
||||
|
||||
|
||||
Reference in New Issue
Block a user