mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
31 lines
820 B
YAML
31 lines
820 B
YAML
server:
|
|
port: "8080"
|
|
host: "0.0.0.0"
|
|
mode: "debug" # debug, release, test
|
|
splitwise: true # true means pd mode, false means mixed mode
|
|
|
|
scheduler:
|
|
policy: "power_of_two"
|
|
prefill-policy: "cache_aware"
|
|
decode-policy: "request_num"
|
|
eviction-interval-secs: 60
|
|
balance-abs-threshold: 1
|
|
balance-rel-threshold: 0.2
|
|
hit-ratio-weight: 1.0
|
|
load-balance-weight: 0.05
|
|
cache-block-size: 4
|
|
waiting-weight: 10
|
|
stats-interval-secs: 5 # interval in seconds for periodic stats logging (running requests, cache hit rate)
|
|
|
|
manager:
|
|
health-failure-threshold: 3
|
|
health-success-threshold: 2
|
|
health-check-timeout-secs: 5
|
|
health-check-interval-secs: 5
|
|
health-check-endpoint: /health
|
|
register-path: "config/register.yaml"
|
|
|
|
log:
|
|
level: "info" # debug, info, warn, error
|
|
output: "file" # stdout, file
|