mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
PD deployment support without router (#7412)
This commit is contained in:
@@ -2010,13 +2010,13 @@ class FDConfig:
|
||||
and self.router_config
|
||||
and self.router_config.router
|
||||
):
|
||||
# For RL scenario: version.yaml will be required for models in future releases.
|
||||
# For RL scenario, version.yaml is required for models
|
||||
# Temporarily enforce use router to be enabled.
|
||||
self.model_config.read_model_version()
|
||||
|
||||
self.read_from_config()
|
||||
self.postprocess()
|
||||
self.init_cache_info()
|
||||
self.init_pd_info()
|
||||
if test_mode:
|
||||
return
|
||||
self.check()
|
||||
@@ -2371,18 +2371,17 @@ class FDConfig:
|
||||
logger.info("{:<20}:{:<6}{}".format(k, "", v))
|
||||
logger.info("=============================================================")
|
||||
|
||||
def init_cache_info(self):
|
||||
def init_pd_info(self):
|
||||
"""
|
||||
initialize cache info
|
||||
initialize info for pd deployment
|
||||
"""
|
||||
# TODO: group the splitiwse params
|
||||
# There are two methods for splitwise deployment:
|
||||
# 1. v0 splitwise_scheduler or dp_scheduler
|
||||
# 2. v1 local_scheduler + router
|
||||
# 2. v1 local_scheduler + router (optional)
|
||||
self.splitwise_version = None
|
||||
if self.scheduler_config.name in ("splitwise", "dp"):
|
||||
self.splitwise_version = "v0"
|
||||
elif self.scheduler_config.name == "local" and self.router_config and self.router_config.router:
|
||||
elif self.scheduler_config.name == "local":
|
||||
self.splitwise_version = "v1"
|
||||
|
||||
# the information for registering this server to router or splitwise_scheduler
|
||||
|
||||
@@ -600,10 +600,15 @@ class EngineArgs:
|
||||
raise NotImplementedError("Only ENABLE_V1_KVCACHE_SCHEDULER=1 support max_logprobs=-1")
|
||||
|
||||
if self.splitwise_role != "mixed":
|
||||
if self.scheduler_name == "local" and self.router is None:
|
||||
if self.scheduler_name == "splitwise":
|
||||
raise ValueError(
|
||||
f"When using {self.splitwise_role} role and the {self.scheduler_name} "
|
||||
f"scheduler, please provide --router argument."
|
||||
"Setting scheduler_name as splitwise is not supported in pd deployment, "
|
||||
"please use router as scheduler."
|
||||
)
|
||||
if self.scheduler_name == "local" and self.router is None:
|
||||
console_logger.warning(
|
||||
f"Running {self.splitwise_role} role with {self.scheduler_name} "
|
||||
f"scheduler without --router. Router registration and request routing will be disabled."
|
||||
)
|
||||
|
||||
if not (
|
||||
|
||||
@@ -109,7 +109,7 @@ class ExpertService:
|
||||
if envs.FD_ENABLE_RETURN_TEXT:
|
||||
self.engine.create_data_processor()
|
||||
if self.cfg.scheduler_config.name == "dp":
|
||||
self.cfg.init_cache_info()
|
||||
self.cfg.init_pd_info()
|
||||
self.engine.scheduler.start(local_data_parallel_id)
|
||||
|
||||
if ipc_signal_suffix is not None:
|
||||
@@ -122,7 +122,7 @@ class ExpertService:
|
||||
self.llm_logger.info(f"start expert service {local_data_parallel_id}")
|
||||
|
||||
if self.cfg.scheduler_config.name == "splitwise":
|
||||
self.cfg.init_cache_info()
|
||||
self.cfg.init_pd_info()
|
||||
role = self.cfg.scheduler_config.splitwise_role
|
||||
host_ip = self.cfg.host_ip
|
||||
self.engine.scheduler.start(role, host_ip, self.cfg.register_info)
|
||||
|
||||
Reference in New Issue
Block a user