Revert "[Optimize] Optimize ttft for ep (#6098)" (#6402)

This reverts commit 90db0bdd0d.
2026-04-23 00:17:25 +08:00 · 2026-02-09 19:01:23 +08:00
parent d60daca4a8
commit 35c24f3f71
10 changed files with 142 additions and 118 deletions
@@ -313,15 +313,6 @@ class EngineService:
            create=True,
        )

-        engine_forward_signal_data = np.zeros([1], dtype=np.int32)
-        self.engine_forward_signal = IPCSignal(
-            name="engine_forward_signal",
-            array=engine_forward_signal_data,
-            dtype=np.int32,
-            suffix=current_suffix,
-            create=True,
-        )
-
        # worker_live_signal 用于engine感知各worker进程是否存活，记录每个step 时间
        worker_healthy_live_recorded_time_array = np.zeros(
            shape=[min(self.cfg.worker_num_per_node, self.cfg.parallel_config.tensor_parallel_size)], dtype=np.int32
@@ -984,23 +975,26 @@ class EngineService:
            with self._pause_cond:
                self._pause_cond.wait_for(lambda: not self.is_paused)
            try:
-                if not is_fetching:
-                    # Check if the thread pool is still available to avoid submitting tasks to a shutdown thread pool.
-                    try:
-                        is_fetching = True
-                        get_request_pool.submit(_fetch_request)
-                    except RuntimeError as e:
-                        if "shutdown" in str(e):
-                            self.llm_logger.info("Thread pool shutdown detected, exiting scheduler loop")
-                            break
-                        else:
-                            raise
-                # Continue preprocessing incoming requests and accumulating them in the queue when forward pass not finished.
-                # Once the forward pass finishes, these accumulated requests can be scheduled in larger,
-                # more efficient batches.
-                if not (self.engine_worker_queue.num_tasks() == 0 and self.engine_forward_signal.value[0] == 0):
+                if self.engine_worker_queue.exist_tasks():
                    time.sleep(0.001)
                    continue
+                if self.cfg.scheduler_config.splitwise_role != "mixed":
+                    if not is_fetching:
+                        is_fetching = True
+                        get_request_pool.submit(_fetch_request)
+
+                else:
+                    if len(self.resource_manager.waiting) == 0 and (not is_fetching):
+                        # Check if the thread pool is still available to avoid submitting tasks to a shutdown thread pool.
+                        try:
+                            is_fetching = True
+                            get_request_pool.submit(_fetch_request)
+                        except RuntimeError as e:
+                            if "shutdown" in str(e):
+                                self.llm_logger.info("Thread pool shutdown detected, exiting scheduler loop")
+                                break
+                            else:
+                                raise

                # 2. Schedule requests
                tasks, error_tasks = self.resource_manager.schedule()
@@ -1050,13 +1044,6 @@ class EngineService:
                            else:
                                task.metrics.inference_start_time = time.time()
                    self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
-                else:
-                    # When there are no actual tasks to schedule, send an empty task batch to EP workers.
-                    # This helps EP workers barrier for syncing tasks not hang.
-                    if self.cfg.parallel_config.enable_expert_parallel:
-                        self.engine_worker_queue.put_tasks(
-                            ([], self.resource_manager.real_bsz)
-                        )  # Empty (as idle tasks for ep)

                # 4. Response error tasks
                if error_tasks: