Unify server-side and model-side Config (Part1) (#3018)

* move cache config * fix mtp
2026-04-23 00:17:25 +08:00 · 2025-07-28 10:51:52 +08:00
parent 8f426c1690
commit 6ccc10ad47
23 changed files with 243 additions and 289 deletions
@@ -104,7 +104,7 @@ class XpuWorker(WorkerBase):
        self.model_runner.prepare_profile()
        self.model_runner.profile_run()

-        total_available_memory = int(total_memory * self.parallel_config.gpu_memory_utilization)
+        total_available_memory = int(total_memory * self.cache_config.gpu_memory_utilization)
        used_memory = xpu_get_used_global_memory(self.local_rank)
        available_kv_cache_memory = total_available_memory - used_memory
        model_block_memory_used = self.cal_theortical_kvcache()