From f7a2418ce20c7006b89029e4d40379acc61b0b38 Mon Sep 17 00:00:00 2001 From: lonelygsh <80582973+lonelygsh@users.noreply.github.com> Date: Wed, 15 Apr 2026 12:45:23 +0800 Subject: [PATCH] [Speculate Decoding] Fix reasoning_phase_token_constraint call args in SpeculativeSampler (#7402) --- fastdeploy/model_executor/layers/sample/sampler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/sample/sampler.py b/fastdeploy/model_executor/layers/sample/sampler.py index 08a33c1109..c08395c964 100644 --- a/fastdeploy/model_executor/layers/sample/sampler.py +++ b/fastdeploy/model_executor/layers/sample/sampler.py @@ -996,7 +996,8 @@ class SpeculativeSampler(nn.Layer): if self.enf_gen_phase_tag: reasoning_phase_token_constraint( logits, - sampling_metadata.pre_token_ids, + token_ids_all, + prompt_lens, share_inputs["stop_flags"], share_inputs["seq_lens_this_time"], share_inputs["seq_lens_encoder"],