[Graph Optimization] remove static_op_get_block_shape_and_split_kv_block from cudagraph (#6081)

* rm static_op_get_block_shape_and_split_kv_block from cudagraph

* update max_capture_shape

* fallback: zeros -> empty to avoid coverage check

* check graph_opt_config exists

* add max_capture_shape_dy2st && full_cuda_graph: false -> true in 28B vl test

* add use_cudagraph flag to control step_use_cudagraph
This commit is contained in:
Ryan
2026-01-20 14:05:18 +08:00
committed by GitHub
parent 45ebb2efb4
commit dda27e50f5
5 changed files with 23 additions and 8 deletions
@@ -125,10 +125,6 @@ class GraphOptBackend:
backend,
).__get__(self.runnable.__self__)
self.cudagraph_switch_threshold = (
1024 if self.fd_config.graph_opt_config.graph_opt_level > 0 else self.max_captre_size
)
def __call__(self, **kwargs):
if not self.fd_config.graph_opt_config.use_cudagraph:
return self.runnable(**kwargs)
@@ -143,7 +139,7 @@ class GraphOptBackend:
# only count the actual load.
self._debug_count_total_step += 1
if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.cudagraph_switch_threshold):
if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.max_captre_size):
return self.dy_runnable(**kwargs)
else:
self._debug_count_cudagraph_replay += 1