[Feature][Docs] Adjust prefill release & expose load metrics (#6884)

This commit is contained in:
mouxin
2026-03-17 15:23:13 +08:00
committed by GitHub
parent daaf498213
commit b61731bb96
20 changed files with 576 additions and 59 deletions
@@ -14,9 +14,8 @@ scheduler:
hit-ratio-weight: 1.0
load-balance-weight: 0.05
cache-block-size: 4
tokenizer-url: "http://0.0.0.0:8098" # optional tokenizer service endpoint
tokenizer-timeout-secs: 2
waiting-weight: 10
stats-interval-secs: 5 # interval in seconds for periodic stats logging (running requests, cache hit rate)
manager:
health-failure-threshold: 3