[PD Disaggregation] support DP via v1 router and decouple DP and EP (#5197)

* [fix] support DP via v1 router and decouple DP and EP * [fix] fix scripts * [fix] reset model path * [fix] dp use get_output_ep, fix router port type, update scripts * [merge] merge with latest code * [chore] remove some debug log * [fix] fix code style check * [fix] fix test_multi_api_server for log_dir name * [chore] reduce logs * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-23 00:17:25 +08:00 · 2025-12-04 15:38:43 +08:00
parent 5cd17fd662
commit f4119d51b4
15 changed files with 394 additions and 146 deletions
@@ -91,6 +91,7 @@ class Router:
        self.prefill_servers = []
        self.decode_servers = []
        self.lock = asyncio.Lock()  # async-safe lock
+        logger.info("Router started at http://{}:{}".format(self.host, self.port))

    async def register_instance(self, instance_info_dict: dict):
        """Register an instance asynchronously"""
@@ -172,6 +173,8 @@ class Router:
    async def handle_splitwise_request(self, request_data: dict, endpoint_name: str):
        logger.debug(f"Received request: {request_data}")
        prefill_server, decode_server = await self.select_pd()
+        logger.debug(f"Selected prefill server: {prefill_server}")
+        logger.debug(f"Selected decode server: {decode_server}")

        if prefill_server.tp_size != decode_server.tp_size and decode_server.tp_size != 1:
            raise HTTPException(
@@ -371,4 +374,4 @@ def launch_router(router_args: RouterArgs):
        app.state.router = Router(app.state.router_args)
        asyncio.create_task(app.state.router.monitor_instance_health(interval_secs=5))

-    uvicorn.run(app, host=router_args.host, port=router_args.port)
+    uvicorn.run(app, host=router_args.host, port=int(router_args.port))