mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
[Graph Optimization] Add full_cuda_graph to control subgraph split (#6027)
This commit is contained in:
@@ -164,6 +164,20 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
|
||||
self.rank, self.device_id = init_rank_and_device_id(fd_config)
|
||||
self.use_output = not fd_config.graph_opt_config.full_cuda_graph
|
||||
if self.use_output:
|
||||
flag = "FLAGS_cuda_graph_blacklist"
|
||||
paddle.set_flags(
|
||||
{
|
||||
flag: ",".join(
|
||||
list(
|
||||
set(
|
||||
paddle.get_flags(flag)[flag].split(",")
|
||||
+ ["custom_op.static_op_append_attention_with_output_"]
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
)
|
||||
self.fd_config = fd_config
|
||||
|
||||
def init_attention_metadata(self, forward_meta: ForwardMeta):
|
||||
|
||||
@@ -6,3 +6,4 @@ graph_optimization_config:
|
||||
graph_opt_level: 2
|
||||
sot_warmup_sizes: [2,16,32,64]
|
||||
use_cudagraph: True
|
||||
full_cuda_graph: False
|
||||
|
||||
@@ -6,3 +6,4 @@ graph_optimization_config:
|
||||
graph_opt_level: 2
|
||||
sot_warmup_sizes: [2,16,32,64]
|
||||
use_cudagraph: True
|
||||
full_cuda_graph: False
|
||||
|
||||
@@ -6,3 +6,4 @@ graph_optimization_config:
|
||||
graph_opt_level: 1
|
||||
sot_warmup_sizes: [2,16,32,64]
|
||||
use_cudagraph: True
|
||||
full_cuda_graph: False
|
||||
|
||||
@@ -6,3 +6,4 @@ graph_optimization_config:
|
||||
graph_opt_level: 1
|
||||
sot_warmup_sizes: [2,16,32,64]
|
||||
use_cudagraph: True
|
||||
full_cuda_graph: False
|
||||
|
||||
@@ -89,7 +89,9 @@ class TestStaticGraphCUDAGraphSplit(unittest.TestCase):
|
||||
def test(self):
|
||||
"""Run test case"""
|
||||
# Set FastDeploy config
|
||||
graph_opt_config = GraphOptimizationConfig({"use_cudagraph": True, "graph_opt_level": 1})
|
||||
graph_opt_config = GraphOptimizationConfig(
|
||||
{"use_cudagraph": True, "graph_opt_level": 1, "full_cuda_graph": False}
|
||||
)
|
||||
scheduler_config = SchedulerConfig({"max_num_seqs": 1})
|
||||
graph_opt_config._set_cudagraph_sizes(max_capture_size=scheduler_config.max_num_seqs)
|
||||
graph_opt_config.init_with_cudagrpah_size(max_capture_size=scheduler_config.max_num_seqs)
|
||||
|
||||
Reference in New Issue
Block a user