[Feature] support v1 update/clear api for RL (#6761)

* [Feature] support v1 update/clear api for RL * [fix] fix execute_model and add sleep/wakeup api * [fix] fix mtp and key_prefix * [chore] move _update_key_prefix to resume method * [fix] make the interface safe to call multiple times * [fix] fix some tiny bugs * [chore] make small changes against pr review * [docs] add docs for weight update * [test] add some tests and update docs * [style] fix code style check * [test] fix ci * [fix] fix stale control responses when control method timed out * [chore] remove unused code * [chore] fix code style * [chore] optimize tags and key_prefix * [test] fix ci * [chore] fix code style * [test] fix ci * [fix] fix ep control * [fix] fix ep control for engine cache queue
2026-04-23 00:17:25 +08:00 · 2026-03-25 19:18:46 +08:00
parent 48cfb608aa
commit a7f52c300d
26 changed files with 1857 additions and 392 deletions
@@ -217,7 +217,7 @@ class PrefixCacheManager:
            is_server=False,
            num_client=tensor_parallel_size,
            client_id=0,
-            local_data_parallel_id=self.local_data_parallel_id,
+            local_data_parallel_id=0,
        )

        current_dir_path = os.path.split(os.path.abspath(__file__))[0]
@@ -293,7 +293,7 @@ class PrefixCacheManager:
        else:
            storage_arg_str = " "

-        if self.cache_config.swap_space or self.cache_config.kvcache_storage_backend:
+        if self.cache_config.num_cpu_blocks > 0 or self.cache_config.kvcache_storage_backend:
            for i in range(tensor_parallel_size):
                launch_cmd = (
                    "FLAGS_allocator_strategy=auto_growth "
@@ -314,7 +314,6 @@ class PrefixCacheManager:
                    + f" --pod_ip {pod_ip}"
                    + f" --engine_worker_queue_port {engine_worker_queue_port}"
                    + f" --num_cpu_blocks {cache_config.num_cpu_blocks}"
-                    + f" --ipc_suffix {ipc_suffix}"
                    + f" --protocol {cache_config.cache_transfer_protocol}"
                    + f" --local_data_parallel_id {self.local_data_parallel_id}"
                    + f" --rdma_port {cache_config.local_rdma_comm_ports[i] if cache_config.local_rdma_comm_ports is not None else '0'}"
@@ -353,9 +352,8 @@ class PrefixCacheManager:

        # Start additional threads
        if cache_config.kvcache_storage_backend or self.num_cpu_blocks > 0:
-            logger.info("Enable hierarchical cache.")
            threading.Thread(target=self.recv_data_transfer_result, daemon=True).start()
-        if cache_config.enable_prefix_caching:
+        if cache_config.enable_prefix_caching and not envs.FD_ENABLE_V1_UPDATE_WEIGHTS:
            threading.Thread(target=self.clear_prefix_cache, daemon=True).start()

        all_cache_processes = cache_messager_processes + cache_manager_processes