[Graph Optimization] remove static_op_get_block_shape_and_split_kv_block from cudagraph (#6081)

* rm static_op_get_block_shape_and_split_kv_block from cudagraph * update max_capture_shape * fallback: zeros -> empty to avoid coverage check * check graph_opt_config exists * add max_capture_shape_dy2st && full_cuda_graph: false -> true in 28B vl test * add use_cudagraph flag to control step_use_cudagraph
2026-04-23 00:17:25 +08:00 · 2026-01-20 14:05:18 +08:00
parent 45ebb2efb4
commit dda27e50f5
5 changed files with 23 additions and 8 deletions
@@ -172,7 +172,10 @@ class AppendAttentionBackend(AttentionBackend):
                        list(
                            set(
                                paddle.get_flags(flag)[flag].split(",")
-                                + ["custom_op.static_op_append_attention_with_output_"]
+                                + [
+                                    "custom_op.static_op_append_attention_with_output_",
+                                    "custom_op.static_op_get_block_shape_and_split_kv_block",
+                                ]
                            )
                        )
                    )
@@ -350,7 +353,7 @@ class AppendAttentionBackend(AttentionBackend):
                else:
                    raise NotImplementedError("Only supported attr of quant_max_bound in ['127', '448'].")
            else:
-                res = paddle.empty([token_nums, q_num_heads * head_dims], dtype=D_type)
+                res = paddle.zeros([token_nums, q_num_heads * head_dims], dtype=D_type)

            res = append_attention_with_output(
                qkv,