[Feature] Update Counter Release (#6943)

This commit is contained in:
mouxin
2026-03-20 10:51:37 +08:00
committed by GitHub
parent f4a79d4c00
commit 96b0ecea6b
7 changed files with 251 additions and 50 deletions
+1 -1
View File
@@ -192,7 +192,7 @@ server:
scheduler:
policy: "power_of_two" # Scheduling policy (optional): random, power_of_two, round_robin, process_tokens, request_num, cache_aware, remote_cache_aware, fd_metrics_score, fd_remote_metrics_score
prefill-policy: "cache_aware" # Prefill scheduling policy in PD mode
decode-policy: "fd_metrics_score" # Decode scheduling policy in PD mode
decode-policy: "request_num" # Decode scheduling policy in PD mode
eviction-interval-secs: 60 # Cache eviction interval for CacheAware scheduling
balance-abs-threshold: 1 # Absolute threshold for CacheAware balancing
balance-rel-threshold: 0.2 # Relative threshold for CacheAware balancing
+1 -1
View File
@@ -192,7 +192,7 @@ server:
scheduler:
policy: "power_of_two" # 调度策略(可选): random, power_of_two, round_robin, process_tokens, request_num, cache_aware, remote_cache_aware, fd_metrics_score, fd_remote_metrics_score; 默认: request_num
prefill-policy: "cache_aware" # pd分离模式下prefill节点调度策略; 默认: process_tokens
decode-policy: "fd_metrics_score" # pd分离模式下decode节点调度策略; 默认: request_num
decode-policy: "request_num" # pd分离模式下decode节点调度策略; 默认: request_num
eviction-interval-secs: 60 # cache-aware策略清理过期cache的间隔时间
balance-abs-threshold: 1 # cache-aware策略绝对阈值
balance-rel-threshold: 0.2 # cache-aware策略相对阈值