[Feature] console print metrics add env (#6413)

2026-04-23 00:17:25 +08:00 · 2026-02-10 09:37:11 +08:00
parent b84056fdaa
commit 335ab70b1c
4 changed files with 66 additions and 2 deletions
@@ -957,7 +957,11 @@ class ResourceManagerV1(ResourceManager):
                if self.current_reserve_output_block_num == 0:
                    self.can_relax_prefill_strategy = True

-            if hasattr(self, "scheduler_metrics_logger") and self.scheduler_metrics_logger is not None:
+            if (
+                hasattr(self, "scheduler_metrics_logger")
+                and self.scheduler_metrics_logger is not None
+                and envs.FD_CONSOLE_SCHEDULER_METRICS
+            ):
                total_blocks = self.total_block_number()
                free_blocks = self.available_block_num()
                used_blocks = max(total_blocks - free_blocks, 0)
@@ -979,12 +983,29 @@ class ResourceManagerV1(ResourceManager):
                    token_usage=token_usage,
                )
                if has_decode:
+                    has_prefill = len(prefill_reqs) > 0
+                    graph_opt_cfg = self.config.graph_opt_config
+                    use_cudagraph_cfg = bool(getattr(graph_opt_cfg, "use_cudagraph", False))
+                    graph_opt_level = int(getattr(graph_opt_cfg, "graph_opt_level", 0) or 0)
+                    full_cuda_graph = bool(getattr(graph_opt_cfg, "full_cuda_graph", True))
+                    cudagraph_only_prefill = bool(getattr(graph_opt_cfg, "cudagraph_only_prefill", False))
+                    use_decode_cudagraph = (
+                        has_decode
+                        and use_cudagraph_cfg
+                        and (
+                            # Reference PR https://github.com/PaddlePaddle/FastDeploy/pull/6196
+                            # Static split graph mode: Prefill+Mixed and Decode can use CUDAGraph.
+                            (graph_opt_level > 0 and not full_cuda_graph)
+                            # Dynamic / static-full modes: decode-only can use CUDAGraph.
+                            or (not has_prefill and not cudagraph_only_prefill)
+                        )
+                    )
                    self.scheduler_metrics_logger.log_decode_batch(
                        running_cnt=running_cnt,
                        queue_cnt=queue_cnt,
                        tokens_used=tokens_used,
                        token_usage=token_usage,
-                        use_cudagraph=self.config.graph_opt_config.use_cudagraph,
+                        use_cudagraph=use_decode_cudagraph,
                    )

            self.update_metrics()