mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] [PD] add simple router and refine splitwise deployment (#4709)
* add simple router and refine splitwise deployment * fix
This commit is contained in:
@@ -16,7 +16,9 @@
|
||||
|
||||
import redis
|
||||
|
||||
from fastdeploy.utils import llm_logger
|
||||
from fastdeploy.utils import get_logger, llm_logger
|
||||
|
||||
config_logger = get_logger("config", "config.log")
|
||||
|
||||
from .dp_scheduler import DPScheduler
|
||||
from .global_scheduler import GlobalScheduler
|
||||
@@ -84,10 +86,10 @@ class LocalSchedulerConfig:
|
||||
"""
|
||||
Print the current configuration to logs.
|
||||
"""
|
||||
llm_logger.info("LocalScheduler Configuration Information :")
|
||||
config_logger.info("LocalScheduler Configuration Information :")
|
||||
for k, v in self.__dict__.items():
|
||||
llm_logger.info("{:<20}:{:<6}{}".format(k, "", v))
|
||||
llm_logger.info("=============================================================")
|
||||
config_logger.info("{:<20}:{:<6}{}".format(k, "", v))
|
||||
config_logger.info("=============================================================")
|
||||
|
||||
|
||||
class DPLocalSchedulerConfig(LocalSchedulerConfig):
|
||||
@@ -312,6 +314,7 @@ class SchedulerConfig:
|
||||
Returns:
|
||||
Initialized scheduler instance (LocalScheduler or GlobalScheduler)
|
||||
"""
|
||||
llm_logger.info("Scheduler Type: %s" % self.name)
|
||||
|
||||
if self.name == "global":
|
||||
return GlobalScheduler(
|
||||
|
||||
@@ -195,6 +195,20 @@ class LocalScheduler:
|
||||
results += [(request_id, "duplicated request_id") for request_id in duplicated_ids]
|
||||
return results
|
||||
|
||||
def has_request(self, request_id: str) -> bool:
|
||||
"""
|
||||
Check if there are any pending requests in the scheduler.
|
||||
|
||||
Args:
|
||||
request_id: Optional specific request ID to check.
|
||||
If None, checks whether there are any pending requests.
|
||||
|
||||
Returns:
|
||||
True if there are pending requests, False otherwise.
|
||||
"""
|
||||
with self.mutex:
|
||||
return request_id in self.requests
|
||||
|
||||
def calc_required_blocks(self, token_num, block_size):
|
||||
"""
|
||||
Calculate the number of blocks needed for a given number of tokens.
|
||||
@@ -292,6 +306,7 @@ class LocalScheduler:
|
||||
Args:
|
||||
results: List of RequestOutput objects containing results
|
||||
"""
|
||||
scheduler_logger.debug(f"put results: {results}")
|
||||
responses: List[ScheduledResponse] = [ScheduledResponse(result) for result in results]
|
||||
|
||||
finished_responses = [response.request_id for response in responses if response.finished]
|
||||
@@ -354,4 +369,8 @@ class LocalScheduler:
|
||||
if finished:
|
||||
self._recycle(request_id)
|
||||
scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}")
|
||||
|
||||
if results:
|
||||
scheduler_logger.debug(f"get responses, {results}")
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user