[PD Disaggregation] Update usage of pd disaggregation and data parallel (#5742)

* Update usage of pd disaggregation

* up

* up

* up

* up

* up

* up

* up

* up

* up

* up dp docs

* up

* up

* up

* fix unittest
This commit is contained in:
jc
2026-01-05 17:51:29 +08:00
committed by GitHub
parent 690d4bcdb0
commit 8d384f9fd8
15 changed files with 441 additions and 385 deletions
+4 -2
View File
@@ -140,8 +140,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"ENCODE_FEATURE_BOS_SK": lambda: os.getenv("ENCODE_FEATURE_BOS_SK"),
# The ENDPOINT of bos storing the features while multi_modal infer
"ENCODE_FEATURE_ENDPOINT": lambda: os.getenv("ENCODE_FEATURE_ENDPOINT"),
# Enable offline perf test mode for PD disaggregation
"FD_OFFLINE_PERF_TEST_FOR_PD": lambda: int(os.getenv("FD_OFFLINE_PERF_TEST_FOR_PD", "0")),
# Whether the Prefill instance continuously requests Decode resources in PD disaggregation
"PREFILL_CONTINUOUS_REQUEST_DECODE_RESOURCES": lambda: int(
os.getenv("PREFILL_CONTINUOUS_REQUEST_DECODE_RESOURCES", "1")
),
"FD_ENABLE_E2W_TENSOR_CONVERT": lambda: int(os.getenv("FD_ENABLE_E2W_TENSOR_CONVERT", "0")),
"FD_ENGINE_TASK_QUEUE_WITH_SHM": lambda: int(os.getenv("FD_ENGINE_TASK_QUEUE_WITH_SHM", "0")),
"FD_FILL_BITMASK_BATCH": lambda: int(os.getenv("FD_FILL_BITMASK_BATCH", "4")),