mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Speculate Decoding] Fix reasoning_phase_token_constraint call args in SpeculativeSampler (#7402)
This commit is contained in:
@@ -996,7 +996,8 @@ class SpeculativeSampler(nn.Layer):
|
||||
if self.enf_gen_phase_tag:
|
||||
reasoning_phase_token_constraint(
|
||||
logits,
|
||||
sampling_metadata.pre_token_ids,
|
||||
token_ids_all,
|
||||
prompt_lens,
|
||||
share_inputs["stop_flags"],
|
||||
share_inputs["seq_lens_this_time"],
|
||||
share_inputs["seq_lens_encoder"],
|
||||
|
||||
Reference in New Issue
Block a user