From e9527208d91ba56e9f1812fd04a1eff516fe7f83 Mon Sep 17 00:00:00 2001 From: ddchenhao66 <165133255+ddchenhao66@users.noreply.github.com> Date: Thu, 16 Apr 2026 15:45:45 +0800 Subject: [PATCH] [BugFix][XPU] Fix kv_cache management bug (#7420) --- fastdeploy/worker/xpu_model_runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fastdeploy/worker/xpu_model_runner.py b/fastdeploy/worker/xpu_model_runner.py index 0e3e6b4a3e..c6dacb44f8 100644 --- a/fastdeploy/worker/xpu_model_runner.py +++ b/fastdeploy/worker/xpu_model_runner.py @@ -1259,7 +1259,11 @@ class XPUModelRunner(ModelRunnerBase): # Check if gpu runner needs to create kv cache # 1. During profiling, it creates its own kv cache. # 2. GPU runner creates kv cache tensor unless p/d disaggregation is enabled. - create_cache_tensor = profile or self.scheduler_config.splitwise_role == "mixed" + create_cache_tensor = profile or not ( + self.fd_config.cache_config.num_cpu_blocks > 0 + or self.fd_config.cache_config.kvcache_storage_backend + or self.fd_config.scheduler_config.splitwise_role != "mixed" + ) if not create_cache_tensor: logger.info(f"Waiting for cache managers to create kv cache.. {cache_ready_signal.value}") while cache_ready_signal.value[local_rank] != 1: