Write the cache of preempted req to storage (#7113)

This commit is contained in:
jc
2026-04-01 13:16:12 +08:00
committed by GitHub
parent 6d0d404a9b
commit bd48640b4b
5 changed files with 16 additions and 7 deletions
@@ -796,7 +796,7 @@ class CacheTransferManager:
try:
valid_gpu_block_ids = self._run_read_storage(
task.task_id,
task.token_ids[: match_block_num * self.block_size],
task.token_ids[: match_block_num * self.block_size] if task.token_ids else None,
task.start_read_block_idx,
k_cache_keys,
v_cache_keys,