[Feature] console print metrics add env (#6413)

This commit is contained in:
CSWYF3634076
2026-02-10 09:37:11 +08:00
committed by GitHub
parent b84056fdaa
commit 335ab70b1c
4 changed files with 66 additions and 2 deletions
+23 -2
View File
@@ -957,7 +957,11 @@ class ResourceManagerV1(ResourceManager):
if self.current_reserve_output_block_num == 0:
self.can_relax_prefill_strategy = True
if hasattr(self, "scheduler_metrics_logger") and self.scheduler_metrics_logger is not None:
if (
hasattr(self, "scheduler_metrics_logger")
and self.scheduler_metrics_logger is not None
and envs.FD_CONSOLE_SCHEDULER_METRICS
):
total_blocks = self.total_block_number()
free_blocks = self.available_block_num()
used_blocks = max(total_blocks - free_blocks, 0)
@@ -979,12 +983,29 @@ class ResourceManagerV1(ResourceManager):
token_usage=token_usage,
)
if has_decode:
has_prefill = len(prefill_reqs) > 0
graph_opt_cfg = self.config.graph_opt_config
use_cudagraph_cfg = bool(getattr(graph_opt_cfg, "use_cudagraph", False))
graph_opt_level = int(getattr(graph_opt_cfg, "graph_opt_level", 0) or 0)
full_cuda_graph = bool(getattr(graph_opt_cfg, "full_cuda_graph", True))
cudagraph_only_prefill = bool(getattr(graph_opt_cfg, "cudagraph_only_prefill", False))
use_decode_cudagraph = (
has_decode
and use_cudagraph_cfg
and (
# Reference PR https://github.com/PaddlePaddle/FastDeploy/pull/6196
# Static split graph mode: Prefill+Mixed and Decode can use CUDAGraph.
(graph_opt_level > 0 and not full_cuda_graph)
# Dynamic / static-full modes: decode-only can use CUDAGraph.
or (not has_prefill and not cudagraph_only_prefill)
)
)
self.scheduler_metrics_logger.log_decode_batch(
running_cnt=running_cnt,
queue_cnt=queue_cnt,
tokens_used=tokens_used,
token_usage=token_usage,
use_cudagraph=self.config.graph_opt_config.use_cudagraph,
use_cudagraph=use_decode_cudagraph,
)
self.update_metrics()