mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support pd ep deployment with yiyan adapter (#4029)
* [Feature] Support mixed deployment with yiyan adapter in release2.2 * fix metrics * add unit test * add unit test * add unit test * Support pd ep deployment with yiyan adapter * Support pd ep deployment with yiyan adapter * refactor cache messager * support scheduler v1 in PD * suppport pd v1 + chunk prefill * suppport pd v1 + chunk prefill * add eplb * support eplb * support eplb * support eplb * support v1 * fix * fix * fix bug * remove eplb support * support prefix cache in P * fix bug * fix bug * support one stop in V1 * fix bug * fix ci * fix ci * fix * fix * fix * fix * fix --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
@@ -18,6 +18,7 @@ import redis
|
||||
|
||||
from fastdeploy.utils import llm_logger
|
||||
|
||||
from .dp_scheduler import DPScheduler
|
||||
from .global_scheduler import GlobalScheduler
|
||||
from .local_scheduler import LocalScheduler
|
||||
from .splitwise_scheduler import SplitWiseScheduler, SplitWiseSchedulerConfig
|
||||
@@ -89,6 +90,54 @@ class LocalSchedulerConfig:
|
||||
llm_logger.info("=============================================================")
|
||||
|
||||
|
||||
class DPLocalSchedulerConfig(LocalSchedulerConfig):
|
||||
"""
|
||||
Configuration class for DPLocalScheduler.
|
||||
Attributes:
|
||||
max_size: Maximum number of concurrent requests (-1 for unlimited)
|
||||
ttl: Time-to-live in seconds for request expiration
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_size: int = -1,
|
||||
ttl: int = 900,
|
||||
max_model_len: int = 8192,
|
||||
enable_chunked_prefill: bool = False,
|
||||
max_num_partial_prefills: int = 1,
|
||||
max_long_partial_prefills: int = 1,
|
||||
long_prefill_token_threshold: int = 0,
|
||||
splitwise_role: str = "prefill",
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Initialize LocalScheduler configuration.
|
||||
Args:
|
||||
max_size: Maximum concurrent requests (-1 for unlimited, 0 for disabled)
|
||||
ttl: Time-to-live in seconds for request expiration (default 900s)
|
||||
max_model_len: Maximum model context length in tokens
|
||||
enable_chunked_prefill: Whether to enable chunked prefill processing
|
||||
max_num_partial_prefills: Max partial prefill operations allowed
|
||||
max_long_partial_prefills: Max long-running partial prefill ops
|
||||
long_prefill_token_threshold: Token count threshold for long prefill
|
||||
**kwargs: Additional unused arguments (for forward compatibility)
|
||||
Note:
|
||||
- If long_prefill_token_threshold is 0, it's auto-calculated as 4% of max_model_len
|
||||
- See LocalScheduler class for implementation details
|
||||
"""
|
||||
self.max_size = max_size
|
||||
self.ttl = ttl
|
||||
|
||||
self.max_model_len = max_model_len
|
||||
self.enable_chunked_prefill = enable_chunked_prefill
|
||||
self.max_num_partial_prefills = max_num_partial_prefills
|
||||
self.max_long_partial_prefills = max_long_partial_prefills
|
||||
self.long_prefill_token_threshold = long_prefill_token_threshold
|
||||
if self.long_prefill_token_threshold == 0:
|
||||
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
|
||||
self.splitwise_role = splitwise_role
|
||||
|
||||
|
||||
class GlobalSchedulerConfig:
|
||||
"""
|
||||
Configuration class for GlobalScheduler (Redis-based).
|
||||
@@ -235,6 +284,9 @@ class SchedulerConfig:
|
||||
if self.name == "splitwise":
|
||||
self.config = SplitWiseSchedulerConfig(**args)
|
||||
|
||||
if self.name == "dp":
|
||||
self.config = DPLocalSchedulerConfig(**args)
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
Validate the configuration.
|
||||
@@ -242,7 +294,7 @@ class SchedulerConfig:
|
||||
Raises:
|
||||
Exception: If invalid scheduler type is specified
|
||||
"""
|
||||
if self.name not in ["local", "global", "splitwise"]:
|
||||
if self.name not in ["local", "global", "splitwise", "dp"]:
|
||||
raise Exception(f"Unknown scheduler type {self.name}")
|
||||
|
||||
self.config.check()
|
||||
@@ -280,6 +332,17 @@ class SchedulerConfig:
|
||||
if self.name == "splitwise":
|
||||
return SplitWiseScheduler(self.config)
|
||||
|
||||
if self.name == "dp":
|
||||
return DPScheduler(
|
||||
max_size=self.config.max_size,
|
||||
ttl=self.config.ttl,
|
||||
enable_chunked_prefill=self.config.enable_chunked_prefill,
|
||||
max_num_partial_prefills=self.config.max_num_partial_prefills,
|
||||
max_long_partial_prefills=self.config.max_long_partial_prefills,
|
||||
long_prefill_token_threshold=self.config.long_prefill_token_threshold,
|
||||
splitwise_role=self.config.splitwise_role,
|
||||
)
|
||||
|
||||
return LocalScheduler(
|
||||
max_size=self.config.max_size,
|
||||
ttl=self.config.ttl,
|
||||
|
||||
Reference in New Issue
Block a user