mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Graph Optimization] remove static_op_get_block_shape_and_split_kv_block from cudagraph (#6081)
* rm static_op_get_block_shape_and_split_kv_block from cudagraph * update max_capture_shape * fallback: zeros -> empty to avoid coverage check * check graph_opt_config exists * add max_capture_shape_dy2st && full_cuda_graph: false -> true in 28B vl test * add use_cudagraph flag to control step_use_cudagraph
This commit is contained in:
@@ -125,10 +125,6 @@ class GraphOptBackend:
|
||||
backend,
|
||||
).__get__(self.runnable.__self__)
|
||||
|
||||
self.cudagraph_switch_threshold = (
|
||||
1024 if self.fd_config.graph_opt_config.graph_opt_level > 0 else self.max_captre_size
|
||||
)
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
if not self.fd_config.graph_opt_config.use_cudagraph:
|
||||
return self.runnable(**kwargs)
|
||||
@@ -143,7 +139,7 @@ class GraphOptBackend:
|
||||
# only count the actual load.
|
||||
self._debug_count_total_step += 1
|
||||
|
||||
if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.cudagraph_switch_threshold):
|
||||
if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.max_captre_size):
|
||||
return self.dy_runnable(**kwargs)
|
||||
else:
|
||||
self._debug_count_cudagraph_replay += 1
|
||||
|
||||
Reference in New Issue
Block a user