mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[PD Disaggregation] [Refine] Refine splitwise deployment (#5151)
* Refine splitwise deployment * up
This commit is contained in:
@@ -13,3 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
from .router import RouterArgs, launch_router
|
||||
|
||||
__all__ = ["RouterArgs", "launch_router"]
|
||||
|
||||
@@ -14,41 +14,18 @@
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
from fastdeploy.router.router import start_router
|
||||
from fastdeploy.router.router import RouterArgs, launch_router
|
||||
from fastdeploy.utils import FlexibleArgumentParser
|
||||
from fastdeploy.utils import router_logger as logger
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Router for splitwise deployment testing")
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
type=str,
|
||||
default="0.0.0.0",
|
||||
help="Host address to bind the router server.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default="9000",
|
||||
help="Port number to bind the router server",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--splitwise",
|
||||
action="store_true",
|
||||
help="Router uses splitwise deployment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--request-timeout-secs",
|
||||
type=int,
|
||||
default=1800,
|
||||
help="Request timeout in seconds",
|
||||
)
|
||||
parser = FlexibleArgumentParser()
|
||||
parser = RouterArgs.add_cli_args(parser)
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
start_router(args)
|
||||
launch_router(args)
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting router: {e}")
|
||||
raise e
|
||||
|
||||
@@ -6,6 +6,7 @@ This module references the router implementation of slglang and vllm.
|
||||
|
||||
import asyncio
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from itertools import chain
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -19,11 +20,60 @@ from fastdeploy.router.utils import (
|
||||
InstanceRole,
|
||||
check_service_health_async,
|
||||
)
|
||||
from fastdeploy.utils import FlexibleArgumentParser
|
||||
from fastdeploy.utils import router_logger as logger
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@dataclass
|
||||
class RouterArgs:
|
||||
host: str = "0.0.0.0"
|
||||
"""
|
||||
Host address to bind the router server
|
||||
"""
|
||||
port: str = "9000"
|
||||
"""
|
||||
Port to bind the router server.
|
||||
"""
|
||||
splitwise: bool = False
|
||||
"""
|
||||
Router uses splitwise deployment
|
||||
"""
|
||||
request_timeout_secs: int = 1800
|
||||
"""
|
||||
Request timeout in seconds
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
type=str,
|
||||
default=RouterArgs.host,
|
||||
help="Host address to bind the router server.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=str,
|
||||
default=RouterArgs.port,
|
||||
help="Port number to bind the router server",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--splitwise",
|
||||
action="store_true",
|
||||
default=RouterArgs.splitwise,
|
||||
help="Router uses splitwise deployment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--request-timeout-secs",
|
||||
type=int,
|
||||
default=RouterArgs.request_timeout_secs,
|
||||
help="Request timeout in seconds",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
class Router:
|
||||
"""
|
||||
Router class that handles requests from client and
|
||||
@@ -306,8 +356,9 @@ async def health_generate():
|
||||
return Response(status_code=200)
|
||||
|
||||
|
||||
def start_router(router_args):
|
||||
def launch_router(router_args: RouterArgs):
|
||||
app.state.router_args = router_args
|
||||
print(f"Starting router with args: {router_args}")
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
|
||||
Reference in New Issue
Block a user