mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
fix fetch request bug (#5712)
This commit is contained in:
@@ -1518,7 +1518,10 @@ class FDConfig:
|
||||
self.long_prefill_token_threshold = int(self.model_config.max_model_len * 0.04)
|
||||
|
||||
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
|
||||
self.cache_config.max_block_num_per_seq = int(self.model_config.max_model_len // self.cache_config.block_size)
|
||||
self.cache_config.max_block_num_per_seq = int(
|
||||
(self.model_config.max_model_len + self.cache_config.block_size - 1) // self.cache_config.block_size
|
||||
+ self.cache_config.enc_dec_block_num
|
||||
)
|
||||
if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
self.cache_config.enable_prefix_caching = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user