[BugFix] fix num_cpu_blocks computation (#6438)

* [BugFix] fix num_cpu_blocks computation

* [fix] fix syntax and log

* [fix] pre-commit

* [fix] use getattr

* [fix] ci test
This commit is contained in:
Yonghua Li
2026-02-13 11:05:14 +08:00
committed by GitHub
parent 52edf5e9b3
commit e2332a1112
9 changed files with 162 additions and 63 deletions
+2 -1
View File
@@ -35,7 +35,8 @@ def make_prefix_cache_manager(max_num_seqs, enable_mm=False, num_gpu_blocks_over
model_cfg.print = print
model_cfg.architectures = ["test_model"]
model_cfg.mm_max_tokens_per_item = None
cache_cfg.bytes_per_layer_per_block = 1
cache_cfg.bytes_per_token_per_layer = 1
parallel_cfg = ParallelConfig(args)
scheduler_cfg = SchedulerConfig(args)
graph_opt_cfg = engine_args.create_graph_optimization_config()
+3 -3
View File
@@ -81,7 +81,7 @@ def _build_manager(
model_cfg.max_model_len = max_model_len
model_cfg.architectures = architectures or ["test_model"]
model_cfg.mm_max_tokens_per_item = None
cache_cfg.bytes_per_layer_per_block = 1
cache_cfg.bytes_per_token_per_layer = 1
cache_cfg.kv_cache_ratio = 1.0
parallel_cfg = ParallelConfig(args)
scheduler_cfg = SchedulerConfig(args)
@@ -142,7 +142,7 @@ class TestResourceManagerV1(unittest.TestCase):
model_cfg.max_model_len = 3200
model_cfg.architectures = ["test_model"]
model_cfg.mm_max_tokens_per_item = None
cache_cfg.bytes_per_layer_per_block = 1
cache_cfg.bytes_per_token_per_layer = 1
cache_cfg.kv_cache_ratio = 1.0
parallel_cfg = ParallelConfig(args)
scheduler_cfg = SchedulerConfig(args)
@@ -304,7 +304,7 @@ class TestRevertChunkedMMInput(unittest.TestCase):
model_cfg.max_model_len = 3200
model_cfg.architectures = ["test_model"]
model_cfg.mm_max_tokens_per_item = None
cache_cfg.bytes_per_layer_per_block = 1
cache_cfg.bytes_per_token_per_layer = 1
cache_cfg.kv_cache_ratio = 1.0
cache_cfg.block_size = 64
parallel_cfg = ParallelConfig(args)