[PD Disaggregation] [Refine] Refine splitwise deployment (#5151)

* Refine splitwise deployment

* up
This commit is contained in:
Juncai
2025-11-21 15:30:24 +08:00
committed by GitHub
parent 2d1dade5e2
commit f9b0545a7f
15 changed files with 371 additions and 492 deletions
+4
View File
@@ -13,3 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from .router import RouterArgs, launch_router
__all__ = ["RouterArgs", "launch_router"]
+5 -28
View File
@@ -14,41 +14,18 @@
# limitations under the License.
"""
import argparse
from fastdeploy.router.router import start_router
from fastdeploy.router.router import RouterArgs, launch_router
from fastdeploy.utils import FlexibleArgumentParser
from fastdeploy.utils import router_logger as logger
def main() -> None:
parser = argparse.ArgumentParser(description="Router for splitwise deployment testing")
parser.add_argument(
"--host",
type=str,
default="0.0.0.0",
help="Host address to bind the router server.",
)
parser.add_argument(
"--port",
type=int,
default="9000",
help="Port number to bind the router server",
)
parser.add_argument(
"--splitwise",
action="store_true",
help="Router uses splitwise deployment",
)
parser.add_argument(
"--request-timeout-secs",
type=int,
default=1800,
help="Request timeout in seconds",
)
parser = FlexibleArgumentParser()
parser = RouterArgs.add_cli_args(parser)
args = parser.parse_args()
try:
start_router(args)
launch_router(args)
except Exception as e:
logger.error(f"Error starting router: {e}")
raise e
+52 -1
View File
@@ -6,6 +6,7 @@ This module references the router implementation of slglang and vllm.
import asyncio
import random
from dataclasses import dataclass
from itertools import chain
from uuid import uuid4
@@ -19,11 +20,60 @@ from fastdeploy.router.utils import (
InstanceRole,
check_service_health_async,
)
from fastdeploy.utils import FlexibleArgumentParser
from fastdeploy.utils import router_logger as logger
app = FastAPI()
@dataclass
class RouterArgs:
host: str = "0.0.0.0"
"""
Host address to bind the router server
"""
port: str = "9000"
"""
Port to bind the router server.
"""
splitwise: bool = False
"""
Router uses splitwise deployment
"""
request_timeout_secs: int = 1800
"""
Request timeout in seconds
"""
@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument(
"--host",
type=str,
default=RouterArgs.host,
help="Host address to bind the router server.",
)
parser.add_argument(
"--port",
type=str,
default=RouterArgs.port,
help="Port number to bind the router server",
)
parser.add_argument(
"--splitwise",
action="store_true",
default=RouterArgs.splitwise,
help="Router uses splitwise deployment",
)
parser.add_argument(
"--request-timeout-secs",
type=int,
default=RouterArgs.request_timeout_secs,
help="Request timeout in seconds",
)
return parser
class Router:
"""
Router class that handles requests from client and
@@ -306,8 +356,9 @@ async def health_generate():
return Response(status_code=200)
def start_router(router_args):
def launch_router(router_args: RouterArgs):
app.state.router_args = router_args
print(f"Starting router with args: {router_args}")
@app.on_event("startup")
async def startup_event():