mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
[BugFix][XPU] Fix kv_cache management bug (#7420)
This commit is contained in:
@@ -1259,7 +1259,11 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
# Check if gpu runner needs to create kv cache
|
||||
# 1. During profiling, it creates its own kv cache.
|
||||
# 2. GPU runner creates kv cache tensor unless p/d disaggregation is enabled.
|
||||
create_cache_tensor = profile or self.scheduler_config.splitwise_role == "mixed"
|
||||
create_cache_tensor = profile or not (
|
||||
self.fd_config.cache_config.num_cpu_blocks > 0
|
||||
or self.fd_config.cache_config.kvcache_storage_backend
|
||||
or self.fd_config.scheduler_config.splitwise_role != "mixed"
|
||||
)
|
||||
if not create_cache_tensor:
|
||||
logger.info(f"Waiting for cache managers to create kv cache.. {cache_ready_signal.value}")
|
||||
while cache_ready_signal.value[local_rank] != 1:
|
||||
|
||||
Reference in New Issue
Block a user