mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] Refine the preparation of cpu and storage cache (#5777)
* Refine the preparation of cpu and storage cache * fix error * fix error * up * fix * up docs * fix unittest * remove debug info
This commit is contained in:
@@ -115,6 +115,7 @@ class Request:
|
||||
# model specific token ids: end of sentence token ids
|
||||
self.eos_token_ids = eos_token_ids
|
||||
self.num_cached_tokens = 0
|
||||
self.num_cached_blocks = 0
|
||||
self.disable_chat_template = disable_chat_template
|
||||
self.disaggregate_info = disaggregate_info
|
||||
|
||||
@@ -528,7 +529,7 @@ class RequestMetrics:
|
||||
gpu_cache_token_num: Optional[int] = 0
|
||||
cpu_cache_token_num: Optional[int] = 0
|
||||
storage_cache_token_num: Optional[int] = 0
|
||||
gpu_cpu_cache_prepare_time: Optional[float] = None
|
||||
cpu_cache_prepare_time: Optional[float] = None
|
||||
storage_cache_prepare_time: Optional[float] = None
|
||||
|
||||
def __post_init__(self):
|
||||
|
||||
Reference in New Issue
Block a user