[Feature] Config eviction_duration (#7125)

* [Feature] Config eviction_duration

* [Feature] Config eviction_duration

* [Feature] Config eviction_duration

* [Feature] Config eviction_duration

---------

Co-authored-by: mouxin <mouxin@baidu.com>
This commit is contained in:
mouxin
2026-04-01 16:46:21 +08:00
committed by GitHub
parent c29e86fc9d
commit 6cae9b1f50
8 changed files with 427 additions and 5 deletions
+1
View File
@@ -195,6 +195,7 @@ scheduler:
prefill-policy: "cache_aware" # Prefill scheduling policy in PD mode
decode-policy: "request_num" # Decode scheduling policy in PD mode
eviction-interval-secs: 60 # Cache eviction interval for CacheAware scheduling
eviction-duration-mins: 30 # Eviction duration for cache-aware radix tree nodes (minutes); default: 30
balance-abs-threshold: 1 # Absolute threshold for CacheAware balancing
balance-rel-threshold: 0.2 # Relative threshold for CacheAware balancing
hit-ratio-weight: 1.0 # Cache hit ratio weight
+1
View File
@@ -195,6 +195,7 @@ scheduler:
prefill-policy: "cache_aware" # pd分离模式下prefill节点调度策略; 默认: process_tokens
decode-policy: "request_num" # pd分离模式下decode节点调度策略; 默认: request_num
eviction-interval-secs: 60 # cache-aware策略清理过期cache的间隔时间
eviction-duration-mins: 30 # cache-aware策略radix tree节点驱逐时间(分钟); 默认: 30
balance-abs-threshold: 1 # cache-aware策略绝对阈值
balance-rel-threshold: 0.2 # cache-aware策略相对阈值
hit-ratio-weight: 1.0 # cache-aware策略命中率权重