mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] Refine the preparation of cpu and storage cache (#5777)
* Refine the preparation of cpu and storage cache * fix error * fix error * up * fix * up docs * fix unittest * remove debug info
This commit is contained in:
@@ -376,7 +376,7 @@ class PrefixCacheManagerTest(unittest.TestCase):
|
||||
|
||||
self.assertEqual(common, [])
|
||||
self.assertEqual(matched_tokens, 0)
|
||||
self.assertEqual(hit_info["gpu_cache_blocks"], 0)
|
||||
self.assertEqual(hit_info["gpu_match_token_num"], 0)
|
||||
manager.metrics.reset_metrics.assert_called_once()
|
||||
|
||||
def test_get_required_block_num_rounds_up(self):
|
||||
@@ -809,7 +809,7 @@ class PrefixCacheManagerTest(unittest.TestCase):
|
||||
manager = _create_manager(num_gpu_blocks=2)
|
||||
req_id = "update-req"
|
||||
last_node = BlockNode(1, [], 0, 1, 0, 2, 0, 0, parent=manager.radix_tree_root)
|
||||
manager.cache_info[req_id] = (last_node, 0)
|
||||
manager.req_to_radix_tree_info[req_id] = (last_node, 0)
|
||||
manager.leaf_req_map[last_node].add(req_id)
|
||||
|
||||
new_leaf = BlockNode(2, [], 0, 1, 0, 2, 1, 0, parent=last_node)
|
||||
@@ -819,7 +819,7 @@ class PrefixCacheManagerTest(unittest.TestCase):
|
||||
|
||||
self.assertIs(manager.req_leaf_map[req_id], new_leaf)
|
||||
self.assertIn(req_id, manager.leaf_req_map[new_leaf])
|
||||
self.assertEqual(task.cached_block_num, 2)
|
||||
self.assertEqual(task.num_cached_blocks, 2)
|
||||
|
||||
def test_is_chunked_mm_input_detects_overlap(self):
|
||||
manager = _create_manager()
|
||||
@@ -1044,8 +1044,8 @@ class PrefixCacheManagerTest(unittest.TestCase):
|
||||
self.assertEqual(common_blocks[0], 0)
|
||||
self.assertGreaterEqual(matched_tokens, 4)
|
||||
mock_prepare_cpu.assert_called()
|
||||
self.assertEqual(hit_info["gpu_cache_blocks"], 1)
|
||||
self.assertEqual(hit_info["cpu_cache_blocks"], 1)
|
||||
self.assertEqual(hit_info["gpu_match_token_num"], block_size)
|
||||
self.assertEqual(hit_info["cpu_match_token_num"], block_size)
|
||||
|
||||
def test_release_block_ids_cleans_request_state(self):
|
||||
manager = _create_manager(num_gpu_blocks=4)
|
||||
|
||||
Reference in New Issue
Block a user