mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
[Feature] Update Counter Release (#6943)
This commit is contained in:
@@ -192,7 +192,7 @@ server:
|
||||
scheduler:
|
||||
policy: "power_of_two" # Scheduling policy (optional): random, power_of_two, round_robin, process_tokens, request_num, cache_aware, remote_cache_aware, fd_metrics_score, fd_remote_metrics_score
|
||||
prefill-policy: "cache_aware" # Prefill scheduling policy in PD mode
|
||||
decode-policy: "fd_metrics_score" # Decode scheduling policy in PD mode
|
||||
decode-policy: "request_num" # Decode scheduling policy in PD mode
|
||||
eviction-interval-secs: 60 # Cache eviction interval for CacheAware scheduling
|
||||
balance-abs-threshold: 1 # Absolute threshold for CacheAware balancing
|
||||
balance-rel-threshold: 0.2 # Relative threshold for CacheAware balancing
|
||||
|
||||
@@ -192,7 +192,7 @@ server:
|
||||
scheduler:
|
||||
policy: "power_of_two" # 调度策略(可选): random, power_of_two, round_robin, process_tokens, request_num, cache_aware, remote_cache_aware, fd_metrics_score, fd_remote_metrics_score; 默认: request_num
|
||||
prefill-policy: "cache_aware" # pd分离模式下prefill节点调度策略; 默认: process_tokens
|
||||
decode-policy: "fd_metrics_score" # pd分离模式下decode节点调度策略; 默认: request_num
|
||||
decode-policy: "request_num" # pd分离模式下decode节点调度策略; 默认: request_num
|
||||
eviction-interval-secs: 60 # cache-aware策略清理过期cache的间隔时间
|
||||
balance-abs-threshold: 1 # cache-aware策略绝对阈值
|
||||
balance-rel-threshold: 0.2 # cache-aware策略相对阈值
|
||||
|
||||
Reference in New Issue
Block a user