mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[PD Disaggregation] Write the cache of preempted req to storage and refine PD Disaggregation (#7107)
* Write the cache of preempted req to storage * up * fix
This commit is contained in:
@@ -796,7 +796,7 @@ class CacheTransferManager:
|
||||
try:
|
||||
valid_gpu_block_ids = self._run_read_storage(
|
||||
task.task_id,
|
||||
task.token_ids[: match_block_num * self.block_size],
|
||||
task.token_ids[: match_block_num * self.block_size] if task.token_ids else None,
|
||||
task.start_read_block_idx,
|
||||
k_cache_keys,
|
||||
v_cache_keys,
|
||||
|
||||
Reference in New Issue
Block a user