mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] fix async download bug (#5349)
* fix async download bug
* update log
* Revert "update log"
This reverts commit 5816e602f4.
* update code
* fix mtp bug
This commit is contained in:
@@ -203,9 +203,7 @@ class MTPProposer(Proposer):
|
||||
if kv_cache_quant_type == "block_wise_fp8":
|
||||
kv_cache_scale_shape = [key_cache_shape[0], key_cache_shape[1], key_cache_shape[2]]
|
||||
local_rank = self.local_rank % self.parallel_config.tensor_parallel_size
|
||||
if not profile and (
|
||||
self.cache_config.enable_prefix_caching or self.scheduler_config.splitwise_role != "mixed"
|
||||
):
|
||||
if not profile and self.scheduler_config.splitwise_role != "mixed":
|
||||
cache_kvs_list = []
|
||||
for i in range(
|
||||
self.num_main_model_layers,
|
||||
|
||||
Reference in New Issue
Block a user