mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[RL] [KVCache] let cache transfer managers update key prefix after weight update and add unit tests (#7083)
* [test] add a few unit tests * [feat] update key prefix when model weights are updated * [test] try to fix test_worker_process
This commit is contained in:
@@ -1443,10 +1443,16 @@ class EngineService:
|
||||
# pause cache transfer
|
||||
if self.cfg.cache_config.num_cpu_blocks > 0 or self.cfg.cache_config.kvcache_storage_backend:
|
||||
self.llm_logger.info("Start to pause cache transfer.")
|
||||
pause_transfer_request = ControlRequest(request_id="pause_transfer", method="pause")
|
||||
pause_transfer_request = ControlRequest(
|
||||
request_id=f"{control_request.request_id}_pause_transfer", method="pause"
|
||||
)
|
||||
self.cache_task_queue.put_transfer_task((CacheStatus.CTRL, pause_transfer_request))
|
||||
# Wait for cache_transfer responses
|
||||
asyncio.run(self._wait_for_control_responses("pause_transfer", 60, executors=["cache_transfer"]))
|
||||
asyncio.run(
|
||||
self._wait_for_control_responses(
|
||||
f"{pause_transfer_request.request_id}", 60, executors=["cache_transfer"]
|
||||
)
|
||||
)
|
||||
self.llm_logger.info("Successfully paused cache transfer.")
|
||||
|
||||
self.resource_manager.cache_manager.reset()
|
||||
@@ -1473,10 +1479,14 @@ class EngineService:
|
||||
# resume cache transfer
|
||||
if self.cfg.cache_config.num_cpu_blocks > 0 or self.cfg.cache_config.kvcache_storage_backend:
|
||||
self.llm_logger.info("Start to resume cache transfer.")
|
||||
resume_transfer_request = ControlRequest(request_id="resume_transfer", method="resume")
|
||||
resume_transfer_request = ControlRequest(
|
||||
request_id=f"{control_request.request_id}_resume_transfer", method="resume"
|
||||
)
|
||||
self.cache_task_queue.put_transfer_task((CacheStatus.CTRL, resume_transfer_request))
|
||||
# Wait for cache_transfer responses
|
||||
asyncio.run(self._wait_for_control_responses("resume_transfer", 60, executors=["cache_transfer"]))
|
||||
asyncio.run(
|
||||
self._wait_for_control_responses(resume_transfer_request.request_id, 60, executors=["cache_transfer"])
|
||||
)
|
||||
self.llm_logger.info("Successfully resumed cache transfer.")
|
||||
|
||||
self.llm_logger.info("Successfully resumed request generation.")
|
||||
@@ -1531,6 +1541,19 @@ class EngineService:
|
||||
if new_version is not None:
|
||||
self.cfg.model_config.version = new_version
|
||||
|
||||
if self.cfg.cache_config.num_cpu_blocks > 0 or self.cfg.cache_config.kvcache_storage_backend:
|
||||
self.llm_logger.info("Start to update cache-transfer metadata after weight update.")
|
||||
update_cache_request = ControlRequest(
|
||||
request_id=f"{control_request.request_id}_update_weights",
|
||||
method="update_weights",
|
||||
args=copy.deepcopy(control_request.args),
|
||||
)
|
||||
self.cache_task_queue.put_transfer_task((CacheStatus.CTRL, update_cache_request))
|
||||
asyncio.run(
|
||||
self._wait_for_control_responses(update_cache_request.request_id, 60, executors=["cache_transfer"])
|
||||
)
|
||||
self.llm_logger.info("Successfully updated cache-transfer metadata after weight update.")
|
||||
|
||||
return responses
|
||||
|
||||
def _control_abort_requests(self, control_req: ControlRequest):
|
||||
|
||||
Reference in New Issue
Block a user