mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[RL][CI] Support Async R3 And Add Accuracy Test (#5937)
* add bs1 r3 test case * async put * r3 test case 1.0 * success run eb5 * refine test case * pre-commit * add eb45 & glm testcase * format code * add p2pstore requirements * support only last turn * R3 use worker log * refine code &fix ci bug * refine error mesg * fix empty input bug * Success set acc ci of eb45 and glm45 * refine code * fix bug
This commit is contained in:
@@ -644,14 +644,16 @@ class FusedMoE(nn.Layer):
|
||||
"""
|
||||
topk_ids_hookfunc = None
|
||||
if self.enable_routing_replay:
|
||||
if forward_meta is not None: # forward_meta is None when execute empty_input_forward
|
||||
# When execute empty_input_forward forward_meta is None. When execute mtp layer routing_replay_table is None.
|
||||
if forward_meta is not None and forward_meta.routing_replay_table is not None:
|
||||
moe_layer_idx = self.layer_idx - self.fd_config.model_config.moe_layer_start_index
|
||||
topk_ids_hookfunc = partial(
|
||||
save_routing_to_buffer,
|
||||
routing_replay_table=forward_meta.routing_replay_table,
|
||||
batch_id_per_token=forward_meta.batch_id_per_token,
|
||||
seq_lens_decoder=forward_meta.seq_lens_decoder,
|
||||
cu_seqlens_q=forward_meta.cu_seqlens_q,
|
||||
layer_idx=self.layer_idx,
|
||||
layer_idx=moe_layer_idx,
|
||||
tp_size=self.fd_config.parallel_config.tensor_parallel_size,
|
||||
ep_size=self.fd_config.parallel_config.expert_parallel_size,
|
||||
tp_group=self.fd_config.parallel_config.tp_group,
|
||||
|
||||
Reference in New Issue
Block a user