mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] console print metrics add env (#6413)
This commit is contained in:
@@ -957,7 +957,11 @@ class ResourceManagerV1(ResourceManager):
|
||||
if self.current_reserve_output_block_num == 0:
|
||||
self.can_relax_prefill_strategy = True
|
||||
|
||||
if hasattr(self, "scheduler_metrics_logger") and self.scheduler_metrics_logger is not None:
|
||||
if (
|
||||
hasattr(self, "scheduler_metrics_logger")
|
||||
and self.scheduler_metrics_logger is not None
|
||||
and envs.FD_CONSOLE_SCHEDULER_METRICS
|
||||
):
|
||||
total_blocks = self.total_block_number()
|
||||
free_blocks = self.available_block_num()
|
||||
used_blocks = max(total_blocks - free_blocks, 0)
|
||||
@@ -979,12 +983,29 @@ class ResourceManagerV1(ResourceManager):
|
||||
token_usage=token_usage,
|
||||
)
|
||||
if has_decode:
|
||||
has_prefill = len(prefill_reqs) > 0
|
||||
graph_opt_cfg = self.config.graph_opt_config
|
||||
use_cudagraph_cfg = bool(getattr(graph_opt_cfg, "use_cudagraph", False))
|
||||
graph_opt_level = int(getattr(graph_opt_cfg, "graph_opt_level", 0) or 0)
|
||||
full_cuda_graph = bool(getattr(graph_opt_cfg, "full_cuda_graph", True))
|
||||
cudagraph_only_prefill = bool(getattr(graph_opt_cfg, "cudagraph_only_prefill", False))
|
||||
use_decode_cudagraph = (
|
||||
has_decode
|
||||
and use_cudagraph_cfg
|
||||
and (
|
||||
# Reference PR https://github.com/PaddlePaddle/FastDeploy/pull/6196
|
||||
# Static split graph mode: Prefill+Mixed and Decode can use CUDAGraph.
|
||||
(graph_opt_level > 0 and not full_cuda_graph)
|
||||
# Dynamic / static-full modes: decode-only can use CUDAGraph.
|
||||
or (not has_prefill and not cudagraph_only_prefill)
|
||||
)
|
||||
)
|
||||
self.scheduler_metrics_logger.log_decode_batch(
|
||||
running_cnt=running_cnt,
|
||||
queue_cnt=queue_cnt,
|
||||
tokens_used=tokens_used,
|
||||
token_usage=token_usage,
|
||||
use_cudagraph=self.config.graph_opt_config.use_cudagraph,
|
||||
use_cudagraph=use_decode_cudagraph,
|
||||
)
|
||||
|
||||
self.update_metrics()
|
||||
|
||||
Reference in New Issue
Block a user