[Feature] Support report token index by attention store (#6285)

* [Feature] Support report token index by attention store * fix format
2026-04-23 00:17:25 +08:00 · 2026-02-02 10:41:11 +08:00
parent afee0b9c5e
commit af1b1d2d56
3 changed files with 51 additions and 1 deletions
@@ -632,6 +632,15 @@ class CacheTransferManager:
                except Exception as e:
                    logger.error(f"Failed to read cache for task {task.task_id}, error: {e}")
                    valid_gpu_block_ids = []
+                finally:
+                    try:
+                        if (self.rank == 0) and self.storage_backend_type == "attention_store":
+                            self.storage_backend.flush_token_index(task.task_id, task.token_ids, 0, True)
+                        logger.info(f"Report cache index in HBM to cache storage for task {task.task_id}")
+                    except Exception as e:
+                        logger.info(
+                            f"Failed to report cache index in HBM to cache storage for task {task.task_id}, error: {e}"
+                        )

            result = (CacheStatus.STORAGE2GPU, task.task_id, task.keys, valid_gpu_block_ids)
            self.cache_task_queue.swap_storage_to_gpu_barrier.wait()
@@ -770,6 +779,15 @@ class CacheTransferManager:
                except Exception as e:
                    logger.error(f"Error in write back storage task: {e}")
                    gpu_block_ids = []
+                finally:
+                    try:
+                        if (self.rank == 0) and self.storage_backend_type == "attention_store":
+                            self.storage_backend.flush_token_index(task.task_id, task.token_ids, 0, False)
+                        logger.info(f"Report cache index out HBM to cache storage for task {task.task_id}")
+                    except Exception as e:
+                        logger.info(
+                            f"Failed to report cache index out HBM to cache storage for task {task.task_id}, error: {e}"
+                        )

            result = (CacheStatus.GPU2STORAGE, task.task_id, task.keys, gpu_block_ids)
            self.cache_task_queue.swap_to_storage_barrier.wait()