[Feature] Support report token index by attention store (#6285)

* [Feature] Support report token index by attention store

* fix format
This commit is contained in:
chenjian
2026-02-02 10:41:11 +08:00
committed by GitHub
parent afee0b9c5e
commit af1b1d2d56
3 changed files with 51 additions and 1 deletions
@@ -632,6 +632,15 @@ class CacheTransferManager:
except Exception as e:
logger.error(f"Failed to read cache for task {task.task_id}, error: {e}")
valid_gpu_block_ids = []
finally:
try:
if (self.rank == 0) and self.storage_backend_type == "attention_store":
self.storage_backend.flush_token_index(task.task_id, task.token_ids, 0, True)
logger.info(f"Report cache index in HBM to cache storage for task {task.task_id}")
except Exception as e:
logger.info(
f"Failed to report cache index in HBM to cache storage for task {task.task_id}, error: {e}"
)
result = (CacheStatus.STORAGE2GPU, task.task_id, task.keys, valid_gpu_block_ids)
self.cache_task_queue.swap_storage_to_gpu_barrier.wait()
@@ -770,6 +779,15 @@ class CacheTransferManager:
except Exception as e:
logger.error(f"Error in write back storage task: {e}")
gpu_block_ids = []
finally:
try:
if (self.rank == 0) and self.storage_backend_type == "attention_store":
self.storage_backend.flush_token_index(task.task_id, task.token_ids, 0, False)
logger.info(f"Report cache index out HBM to cache storage for task {task.task_id}")
except Exception as e:
logger.info(
f"Failed to report cache index out HBM to cache storage for task {task.task_id}, error: {e}"
)
result = (CacheStatus.GPU2STORAGE, task.task_id, task.keys, gpu_block_ids)
self.cache_task_queue.swap_to_storage_barrier.wait()