mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
This reverts commit 90db0bdd0d.
This commit is contained in:
@@ -313,15 +313,6 @@ class EngineService:
|
||||
create=True,
|
||||
)
|
||||
|
||||
engine_forward_signal_data = np.zeros([1], dtype=np.int32)
|
||||
self.engine_forward_signal = IPCSignal(
|
||||
name="engine_forward_signal",
|
||||
array=engine_forward_signal_data,
|
||||
dtype=np.int32,
|
||||
suffix=current_suffix,
|
||||
create=True,
|
||||
)
|
||||
|
||||
# worker_live_signal 用于engine感知各worker进程是否存活,记录每个step 时间
|
||||
worker_healthy_live_recorded_time_array = np.zeros(
|
||||
shape=[min(self.cfg.worker_num_per_node, self.cfg.parallel_config.tensor_parallel_size)], dtype=np.int32
|
||||
@@ -984,23 +975,26 @@ class EngineService:
|
||||
with self._pause_cond:
|
||||
self._pause_cond.wait_for(lambda: not self.is_paused)
|
||||
try:
|
||||
if not is_fetching:
|
||||
# Check if the thread pool is still available to avoid submitting tasks to a shutdown thread pool.
|
||||
try:
|
||||
is_fetching = True
|
||||
get_request_pool.submit(_fetch_request)
|
||||
except RuntimeError as e:
|
||||
if "shutdown" in str(e):
|
||||
self.llm_logger.info("Thread pool shutdown detected, exiting scheduler loop")
|
||||
break
|
||||
else:
|
||||
raise
|
||||
# Continue preprocessing incoming requests and accumulating them in the queue when forward pass not finished.
|
||||
# Once the forward pass finishes, these accumulated requests can be scheduled in larger,
|
||||
# more efficient batches.
|
||||
if not (self.engine_worker_queue.num_tasks() == 0 and self.engine_forward_signal.value[0] == 0):
|
||||
if self.engine_worker_queue.exist_tasks():
|
||||
time.sleep(0.001)
|
||||
continue
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed":
|
||||
if not is_fetching:
|
||||
is_fetching = True
|
||||
get_request_pool.submit(_fetch_request)
|
||||
|
||||
else:
|
||||
if len(self.resource_manager.waiting) == 0 and (not is_fetching):
|
||||
# Check if the thread pool is still available to avoid submitting tasks to a shutdown thread pool.
|
||||
try:
|
||||
is_fetching = True
|
||||
get_request_pool.submit(_fetch_request)
|
||||
except RuntimeError as e:
|
||||
if "shutdown" in str(e):
|
||||
self.llm_logger.info("Thread pool shutdown detected, exiting scheduler loop")
|
||||
break
|
||||
else:
|
||||
raise
|
||||
|
||||
# 2. Schedule requests
|
||||
tasks, error_tasks = self.resource_manager.schedule()
|
||||
@@ -1050,13 +1044,6 @@ class EngineService:
|
||||
else:
|
||||
task.metrics.inference_start_time = time.time()
|
||||
self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
|
||||
else:
|
||||
# When there are no actual tasks to schedule, send an empty task batch to EP workers.
|
||||
# This helps EP workers barrier for syncing tasks not hang.
|
||||
if self.cfg.parallel_config.enable_expert_parallel:
|
||||
self.engine_worker_queue.put_tasks(
|
||||
([], self.resource_manager.real_bsz)
|
||||
) # Empty (as idle tasks for ep)
|
||||
|
||||
# 4. Response error tasks
|
||||
if error_tasks:
|
||||
|
||||
Reference in New Issue
Block a user