[BugFix] Refine the preparation of cpu and storage cache (#5777)

* Refine the preparation of cpu and storage cache

* fix error

* fix error

* up

* fix

* up docs

* fix unittest

* remove debug info
This commit is contained in:
jc
2026-01-05 10:13:30 +08:00
committed by GitHub
parent 95257c1dbd
commit e911ac2ce7
10 changed files with 156 additions and 149 deletions
@@ -376,7 +376,7 @@ class PrefixCacheManagerTest(unittest.TestCase):
self.assertEqual(common, [])
self.assertEqual(matched_tokens, 0)
self.assertEqual(hit_info["gpu_cache_blocks"], 0)
self.assertEqual(hit_info["gpu_match_token_num"], 0)
manager.metrics.reset_metrics.assert_called_once()
def test_get_required_block_num_rounds_up(self):
@@ -809,7 +809,7 @@ class PrefixCacheManagerTest(unittest.TestCase):
manager = _create_manager(num_gpu_blocks=2)
req_id = "update-req"
last_node = BlockNode(1, [], 0, 1, 0, 2, 0, 0, parent=manager.radix_tree_root)
manager.cache_info[req_id] = (last_node, 0)
manager.req_to_radix_tree_info[req_id] = (last_node, 0)
manager.leaf_req_map[last_node].add(req_id)
new_leaf = BlockNode(2, [], 0, 1, 0, 2, 1, 0, parent=last_node)
@@ -819,7 +819,7 @@ class PrefixCacheManagerTest(unittest.TestCase):
self.assertIs(manager.req_leaf_map[req_id], new_leaf)
self.assertIn(req_id, manager.leaf_req_map[new_leaf])
self.assertEqual(task.cached_block_num, 2)
self.assertEqual(task.num_cached_blocks, 2)
def test_is_chunked_mm_input_detects_overlap(self):
manager = _create_manager()
@@ -1044,8 +1044,8 @@ class PrefixCacheManagerTest(unittest.TestCase):
self.assertEqual(common_blocks[0], 0)
self.assertGreaterEqual(matched_tokens, 4)
mock_prepare_cpu.assert_called()
self.assertEqual(hit_info["gpu_cache_blocks"], 1)
self.assertEqual(hit_info["cpu_cache_blocks"], 1)
self.assertEqual(hit_info["gpu_match_token_num"], block_size)
self.assertEqual(hit_info["cpu_match_token_num"], block_size)
def test_release_block_ids_cleans_request_state(self):
manager = _create_manager(num_gpu_blocks=4)