[Optimize] Optimize ttft for ep (#6098)

* optimize ttft * fix * fix * fix ci * fix ci * fix * fix bug * fix * add comments * fix ci * fix
2026-04-23 00:17:25 +08:00 · 2026-02-04 15:03:29 +08:00
parent 6e96bd0bd2
commit 90db0bdd0d
10 changed files with 118 additions and 142 deletions
@@ -121,8 +121,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "FD_ZMQ_CONTROL_CMD_SERVER_PORTS": lambda: os.getenv("FD_ZMQ_CONTROL_CMD_SERVER_PORTS", "8202"),
    # Whether to enable the decode caches requests for preallocating resource
    "FD_ENABLE_CACHE_TASK": lambda: os.getenv("FD_ENABLE_CACHE_TASK", "0"),
-    # Batched token timeout in EP
-    "FD_EP_BATCHED_TOKEN_TIMEOUT": lambda: float(os.getenv("FD_EP_BATCHED_TOKEN_TIMEOUT", "0.1")),
    # Max pre-fetch requests number in PD
    "FD_EP_MAX_PREFETCH_TASK_NUM": lambda: int(os.getenv("FD_EP_MAX_PREFETCH_TASK_NUM", "8")),
    # Enable or disable model caching.