mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support report token index by attention store (#6285)
* [Feature] Support report token index by attention store * fix format
This commit is contained in:
@@ -632,6 +632,15 @@ class CacheTransferManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to read cache for task {task.task_id}, error: {e}")
|
||||
valid_gpu_block_ids = []
|
||||
finally:
|
||||
try:
|
||||
if (self.rank == 0) and self.storage_backend_type == "attention_store":
|
||||
self.storage_backend.flush_token_index(task.task_id, task.token_ids, 0, True)
|
||||
logger.info(f"Report cache index in HBM to cache storage for task {task.task_id}")
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
f"Failed to report cache index in HBM to cache storage for task {task.task_id}, error: {e}"
|
||||
)
|
||||
|
||||
result = (CacheStatus.STORAGE2GPU, task.task_id, task.keys, valid_gpu_block_ids)
|
||||
self.cache_task_queue.swap_storage_to_gpu_barrier.wait()
|
||||
@@ -770,6 +779,15 @@ class CacheTransferManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Error in write back storage task: {e}")
|
||||
gpu_block_ids = []
|
||||
finally:
|
||||
try:
|
||||
if (self.rank == 0) and self.storage_backend_type == "attention_store":
|
||||
self.storage_backend.flush_token_index(task.task_id, task.token_ids, 0, False)
|
||||
logger.info(f"Report cache index out HBM to cache storage for task {task.task_id}")
|
||||
except Exception as e:
|
||||
logger.info(
|
||||
f"Failed to report cache index out HBM to cache storage for task {task.task_id}, error: {e}"
|
||||
)
|
||||
|
||||
result = (CacheStatus.GPU2STORAGE, task.task_id, task.keys, gpu_block_ids)
|
||||
self.cache_task_queue.swap_to_storage_barrier.wait()
|
||||
|
||||
Reference in New Issue
Block a user