[Feature] [PD] add simple router and refine splitwise deployment (#4709)

* add simple router and refine splitwise deployment

* fix
This commit is contained in:
Juncai
2025-11-06 14:56:02 +08:00
committed by GitHub
parent 831266da7a
commit 08ca0f6aea
39 changed files with 2397 additions and 171 deletions
+7 -4
View File
@@ -16,7 +16,9 @@
import redis
from fastdeploy.utils import llm_logger
from fastdeploy.utils import get_logger, llm_logger
config_logger = get_logger("config", "config.log")
from .dp_scheduler import DPScheduler
from .global_scheduler import GlobalScheduler
@@ -84,10 +86,10 @@ class LocalSchedulerConfig:
"""
Print the current configuration to logs.
"""
llm_logger.info("LocalScheduler Configuration Information :")
config_logger.info("LocalScheduler Configuration Information :")
for k, v in self.__dict__.items():
llm_logger.info("{:<20}:{:<6}{}".format(k, "", v))
llm_logger.info("=============================================================")
config_logger.info("{:<20}:{:<6}{}".format(k, "", v))
config_logger.info("=============================================================")
class DPLocalSchedulerConfig(LocalSchedulerConfig):
@@ -312,6 +314,7 @@ class SchedulerConfig:
Returns:
Initialized scheduler instance (LocalScheduler or GlobalScheduler)
"""
llm_logger.info("Scheduler Type: %s" % self.name)
if self.name == "global":
return GlobalScheduler(
+19
View File
@@ -195,6 +195,20 @@ class LocalScheduler:
results += [(request_id, "duplicated request_id") for request_id in duplicated_ids]
return results
def has_request(self, request_id: str) -> bool:
"""
Check if there are any pending requests in the scheduler.
Args:
request_id: Optional specific request ID to check.
If None, checks whether there are any pending requests.
Returns:
True if there are pending requests, False otherwise.
"""
with self.mutex:
return request_id in self.requests
def calc_required_blocks(self, token_num, block_size):
"""
Calculate the number of blocks needed for a given number of tokens.
@@ -292,6 +306,7 @@ class LocalScheduler:
Args:
results: List of RequestOutput objects containing results
"""
scheduler_logger.debug(f"put results: {results}")
responses: List[ScheduledResponse] = [ScheduledResponse(result) for result in results]
finished_responses = [response.request_id for response in responses if response.finished]
@@ -354,4 +369,8 @@ class LocalScheduler:
if finished:
self._recycle(request_id)
scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}")
if results:
scheduler_logger.debug(f"get responses, {results}")
return results