[Graph Optimization] remove static_op_get_block_shape_and_split_kv_block from cudagraph (#6081)

* rm static_op_get_block_shape_and_split_kv_block from cudagraph * update max_capture_shape * fallback: zeros -> empty to avoid coverage check * check graph_opt_config exists * add max_capture_shape_dy2st && full_cuda_graph: false -> true in 28B vl test * add use_cudagraph flag to control step_use_cudagraph
2026-04-23 17:11:21 +08:00 · 2026-01-20 14:05:18 +08:00
parent 45ebb2efb4
commit dda27e50f5
5 changed files with 23 additions and 8 deletions
@@ -125,10 +125,6 @@ class GraphOptBackend:
                backend,
            ).__get__(self.runnable.__self__)

-        self.cudagraph_switch_threshold = (
-            1024 if self.fd_config.graph_opt_config.graph_opt_level > 0 else self.max_captre_size
-        )
-
    def __call__(self, **kwargs):
        if not self.fd_config.graph_opt_config.use_cudagraph:
            return self.runnable(**kwargs)
@@ -143,7 +139,7 @@ class GraphOptBackend:
            # only count the actual load.
            self._debug_count_total_step += 1

-        if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.cudagraph_switch_threshold):
+        if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.max_captre_size):
            return self.dy_runnable(**kwargs)
        else:
            self._debug_count_cudagraph_replay += 1