Revert "[Optimize] Optimize ttft for ep (#6098)" (#6402)

This reverts commit 90db0bdd0d.
2026-04-22 16:07:51 +08:00 · 2026-02-09 19:01:23 +08:00
parent d60daca4a8
commit 35c24f3f71
10 changed files with 142 additions and 118 deletions
@@ -147,6 +147,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Whether to enable the decode caches requests for preallocating resource
    "FD_ENABLE_CACHE_TASK": lambda: os.getenv("FD_ENABLE_CACHE_TASK", "0"),

+    # Batched token timeout in EP
+    "FD_EP_BATCHED_TOKEN_TIMEOUT": lambda: float(os.getenv("FD_EP_BATCHED_TOKEN_TIMEOUT", "0.1")),
+
    # Max pre-fetch requests number in PD
    "FD_EP_MAX_PREFETCH_TASK_NUM": lambda: int(os.getenv("FD_EP_MAX_PREFETCH_TASK_NUM", "8")),

@@ -147,6 +147,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # 是否启用 decode 缓存请求以预分配资源
    "FD_ENABLE_CACHE_TASK": lambda: os.getenv("FD_ENABLE_CACHE_TASK", "0"),

+    # EP 中批处理 token 的超时时间
+    "FD_EP_BATCHED_TOKEN_TIMEOUT": lambda: float(os.getenv("FD_EP_BATCHED_TOKEN_TIMEOUT", "0.1")),
+
    # PD 中最大预取请求数量
    "FD_EP_MAX_PREFETCH_TASK_NUM": lambda: int(os.getenv("FD_EP_MAX_PREFETCH_TASK_NUM", "8")),