[Cherry-Pick] [BugFix] fix num_cpu_blocks computation (#6438) (#6473)

* [BugFix] fix num_cpu_blocks computation

* [fix] fix syntax and log

* [fix] pre-commit

* [fix] use getattr

* [fix] ci test
This commit is contained in:
Yonghua Li
2026-02-13 15:30:13 +08:00
committed by GitHub
parent 574d15f4d6
commit 4092d39fca
8 changed files with 162 additions and 57 deletions
@@ -124,8 +124,9 @@ class PrefixCacheManager:
self.cache_status_lock = Lock()
logger.info(
f"num_gpu_blocks_server_owned {self.num_gpu_blocks} num_cpu_blocks "
+ f"{self.num_cpu_blocks}, bytes_per_layer_per_block {self.cache_config.bytes_per_layer_per_block}"
f"Prefix cache manager is initialized with {self.num_gpu_blocks} gpu blocks "
f"and {self.num_cpu_blocks} cpu blocks, bytes_per_token_per_layer for each rank: "
f"{self.cache_config.bytes_per_token_per_layer / self.config.parallel_config.tensor_parallel_size}"
)
main_process_metrics.max_gpu_block_num.set(self.num_gpu_blocks)