[BugFix] Refine the preparation of cpu and storage cache (#5777)

* Refine the preparation of cpu and storage cache

* fix error

* fix error

* up

* fix

* up docs

* fix unittest

* remove debug info
This commit is contained in:
jc
2026-01-05 10:13:30 +08:00
committed by GitHub
parent 95257c1dbd
commit e911ac2ce7
10 changed files with 156 additions and 149 deletions
+2 -1
View File
@@ -115,6 +115,7 @@ class Request:
# model specific token ids: end of sentence token ids
self.eos_token_ids = eos_token_ids
self.num_cached_tokens = 0
self.num_cached_blocks = 0
self.disable_chat_template = disable_chat_template
self.disaggregate_info = disaggregate_info
@@ -528,7 +529,7 @@ class RequestMetrics:
gpu_cache_token_num: Optional[int] = 0
cpu_cache_token_num: Optional[int] = 0
storage_cache_token_num: Optional[int] = 0
gpu_cpu_cache_prepare_time: Optional[float] = None
cpu_cache_prepare_time: Optional[float] = None
storage_cache_prepare_time: Optional[float] = None
def __post_init__(self):