mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Metrics] Update time_to_first_token to include tokenization & queue time, and remove redundant metrics (#4993)
* [update] update time_to_first_tokens to include queue time, and remove first_token_latency and infer_latency * [doc] update docs * [ci] fix test * [chore] delete redundant code --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
This commit is contained in:
@@ -1820,6 +1820,7 @@ class PrefixCacheManager:
|
||||
# reset metrics
|
||||
self.metrics.reset_metrics()
|
||||
main_process_metrics.free_gpu_block_num.set(len(self.gpu_free_block_list))
|
||||
main_process_metrics.available_gpu_block_num.set(len(self.gpu_free_block_list))
|
||||
main_process_metrics.available_gpu_resource.set(self.available_gpu_resource)
|
||||
|
||||
def clear_prefix_cache(self):
|
||||
|
||||
Reference in New Issue
Block a user