Unify server-side and model-side Config (Part1) (#3018)

* move cache config

* fix mtp
This commit is contained in:
YuanRisheng
2025-07-28 10:51:52 +08:00
committed by GitHub
parent 8f426c1690
commit 6ccc10ad47
23 changed files with 243 additions and 289 deletions
+1 -1
View File
@@ -104,7 +104,7 @@ class XpuWorker(WorkerBase):
self.model_runner.prepare_profile()
self.model_runner.profile_run()
total_available_memory = int(total_memory * self.parallel_config.gpu_memory_utilization)
total_available_memory = int(total_memory * self.cache_config.gpu_memory_utilization)
used_memory = xpu_get_used_global_memory(self.local_rank)
available_kv_cache_memory = total_available_memory - used_memory
model_block_memory_used = self.cal_theortical_kvcache()