mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] Add safety checks in recycle_gpu_blocks to prevent block allocation errors (#6531)
* [BugFix] Add safety checks in recycle_gpu_blocks to prevent block allocation errors - Check prefix tree status before recycling GPU blocks - Validate gpu_block_ids is a list - Add overflow check to prevent free block count exceeding total blocks Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [BugFix] Fix AttributeError in recycle_gpu_blocks when prefix_tree_status_signal not initialized - Add hasattr check before accessing prefix_tree_status_signal - The signal is only initialized in launch_cache_messager, not in __init__ - Fixes CI test failure in test_prefix_cache_manager.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * [BugFix] Reset prefix cache when model weights are updating - Call self.reset() before setting status to NORMAL in UPDATING state - Ensure cache consistency when model weights change - Consistent with CLEARING state handling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -498,6 +498,24 @@ class PrefixCacheManager:
|
||||
"""
|
||||
recycle gpu blocks.
|
||||
"""
|
||||
if (
|
||||
hasattr(self, "prefix_tree_status_signal")
|
||||
and self.prefix_tree_status_signal.value[0] != PrefixTreeStatus.NORMAL
|
||||
):
|
||||
# Prefix Tree Clearing, skip recycle gpu blocks
|
||||
logger.warning("Prefix tree is not normal, skip recycle gpu blocks")
|
||||
return
|
||||
if not isinstance(gpu_block_ids, list):
|
||||
gpu_block_ids = [gpu_block_ids]
|
||||
if len(self.gpu_free_block_list) + len(gpu_block_ids) > self.num_gpu_blocks:
|
||||
# The block allocation and recycling are abnormal, and the test results are not convincing
|
||||
logger.error(
|
||||
f"The number of free gpu blocks {len(self.gpu_free_block_list)} plus the number of recycled "
|
||||
f"gpu blocks {len(gpu_block_ids)} exceeds the total number of gpu blocks {self.num_gpu_blocks} \n"
|
||||
f"this indicates a block allocation and deallocation error, recycled blocks will be discarded {gpu_block_ids}"
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"req_id:{req_id} recycle_gpu_blocks: {gpu_block_ids}, len(self.gpu_free_block_list) {len(self.gpu_free_block_list)}"
|
||||
)
|
||||
@@ -2143,6 +2161,7 @@ class PrefixCacheManager:
|
||||
prefix_tree_status_signal.value[0] = PrefixTreeStatus.CLEARED
|
||||
logger.info("Prefix cache tree is cleared.")
|
||||
if prefix_tree_status_signal.value[0] == PrefixTreeStatus.UPDATING:
|
||||
self.reset()
|
||||
prefix_tree_status_signal.value[0] = PrefixTreeStatus.NORMAL
|
||||
logger.info("Prefix cache tree is updated.")
|
||||
time.sleep(0.01)
|
||||
|
||||
Reference in New Issue
Block a user