mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
fix performance drop while no spec (#6866)
This commit is contained in:
@@ -202,7 +202,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
4 if not self.speculative_decoding else (self.speculative_config.num_speculative_tokens + 1) * 4
|
||||
)
|
||||
self.infer_seed_increment = paddle.full(
|
||||
shape=[self.scheduler_config.max_num_seqs, 1], fill_value=self.increment_value, dtype="int64", device="cpu"
|
||||
shape=[self.scheduler_config.max_num_seqs, 1], fill_value=self.increment_value, dtype="int64"
|
||||
)
|
||||
|
||||
self.restore_chunked_prefill_request = dict()
|
||||
|
||||
Reference in New Issue
Block a user