mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix]Fix attention mask bug in D-Node of PD-split mode (#5245)
This commit is contained in:
@@ -515,6 +515,12 @@ class MTPProposer(Proposer):
|
||||
self.model_inputs["attn_mask_offsets_decoder"][idx : idx + 1] = (
|
||||
inputs["attention_mask_offset"][prefill_end_index - 1] + 1
|
||||
)
|
||||
if (
|
||||
self.fd_config.scheduler_config.splitwise_role == "decode"
|
||||
): # In PD, we continue to decode after P generates first token
|
||||
self.model_inputs["seq_lens_encoder"][idx : idx + 1] = 0
|
||||
# P-D split need rollback one step
|
||||
self.model_inputs["mask_rollback"][idx : idx + 1] = 1
|
||||
|
||||
# has_prefill_task = True
|
||||
elif request.task_type.value == RequestType.DECODE.value: # decode task
|
||||
|
||||
Reference in New Issue
Block a user