[PD Disaggregation] Update usage of pd disaggregation and data parallel (#5742)

* Update usage of pd disaggregation * up * up * up * up * up * up * up * up * up * up dp docs * up * up * up * fix unittest
2026-04-23 00:17:25 +08:00 · 2026-01-05 17:51:29 +08:00
parent 690d4bcdb0
commit 8d384f9fd8
15 changed files with 441 additions and 385 deletions
@@ -140,8 +140,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "ENCODE_FEATURE_BOS_SK": lambda: os.getenv("ENCODE_FEATURE_BOS_SK"),
    # The ENDPOINT of bos storing the features while multi_modal infer
    "ENCODE_FEATURE_ENDPOINT": lambda: os.getenv("ENCODE_FEATURE_ENDPOINT"),
-    # Enable offline perf test mode for PD disaggregation
-    "FD_OFFLINE_PERF_TEST_FOR_PD": lambda: int(os.getenv("FD_OFFLINE_PERF_TEST_FOR_PD", "0")),
+    # Whether the Prefill instance continuously requests Decode resources in PD disaggregation
+    "PREFILL_CONTINUOUS_REQUEST_DECODE_RESOURCES": lambda: int(
+        os.getenv("PREFILL_CONTINUOUS_REQUEST_DECODE_RESOURCES", "1")
+    ),
    "FD_ENABLE_E2W_TENSOR_CONVERT": lambda: int(os.getenv("FD_ENABLE_E2W_TENSOR_CONVERT", "0")),
    "FD_ENGINE_TASK_QUEUE_WITH_SHM": lambda: int(os.getenv("FD_ENGINE_TASK_QUEUE_WITH_SHM", "0")),
    "FD_FILL_BITMASK_BATCH": lambda: int(os.getenv("FD_FILL_BITMASK_BATCH", "4")),