fix performance drop while no spec (#6866)

This commit is contained in:
huicongyao
2026-03-17 13:06:36 +08:00
committed by GitHub
parent fe8d58a094
commit eab429d05e
+1 -1
View File
@@ -202,7 +202,7 @@ class GPUModelRunner(ModelRunnerBase):
4 if not self.speculative_decoding else (self.speculative_config.num_speculative_tokens + 1) * 4
)
self.infer_seed_increment = paddle.full(
shape=[self.scheduler_config.max_num_seqs, 1], fill_value=self.increment_value, dtype="int64", device="cpu"
shape=[self.scheduler_config.max_num_seqs, 1], fill_value=self.increment_value, dtype="int64"
)
self.restore_chunked_prefill_request = dict()