mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[PD Disaggregation] Write the cache of preempted req to storage and refine PD Disaggregation (#7107)
* Write the cache of preempted req to storage * up * fix
This commit is contained in:
@@ -252,6 +252,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# When v1 is enabled, the legacy /clear_load_weight and /update_model_weight
|
||||
# will adopt this new communication pattern.
|
||||
"FD_ENABLE_V1_UPDATE_WEIGHTS": lambda: bool(int(os.getenv("FD_ENABLE_V1_UPDATE_WEIGHTS", "0"))),
|
||||
# Whether to save the cache of output token for preempted request to storage.
|
||||
"FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST": lambda: bool(
|
||||
int(os.getenv("FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST", "1"))
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user