mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Config eviction_duration (#7125)
* [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Config eviction_duration --------- Co-authored-by: mouxin <mouxin@baidu.com>
This commit is contained in:
@@ -195,6 +195,7 @@ scheduler:
|
||||
prefill-policy: "cache_aware" # Prefill scheduling policy in PD mode
|
||||
decode-policy: "request_num" # Decode scheduling policy in PD mode
|
||||
eviction-interval-secs: 60 # Cache eviction interval for CacheAware scheduling
|
||||
eviction-duration-mins: 30 # Eviction duration for cache-aware radix tree nodes (minutes); default: 30
|
||||
balance-abs-threshold: 1 # Absolute threshold for CacheAware balancing
|
||||
balance-rel-threshold: 0.2 # Relative threshold for CacheAware balancing
|
||||
hit-ratio-weight: 1.0 # Cache hit ratio weight
|
||||
|
||||
@@ -195,6 +195,7 @@ scheduler:
|
||||
prefill-policy: "cache_aware" # pd分离模式下prefill节点调度策略; 默认: process_tokens
|
||||
decode-policy: "request_num" # pd分离模式下decode节点调度策略; 默认: request_num
|
||||
eviction-interval-secs: 60 # cache-aware策略清理过期cache的间隔时间
|
||||
eviction-duration-mins: 30 # cache-aware策略radix tree节点驱逐时间(分钟); 默认: 30
|
||||
balance-abs-threshold: 1 # cache-aware策略绝对阈值
|
||||
balance-rel-threshold: 0.2 # cache-aware策略相对阈值
|
||||
hit-ratio-weight: 1.0 # cache-aware策略命中率权重
|
||||
|
||||
Reference in New Issue
Block a user