mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[XPU] Support CudaGraph(add block attn cuda_graph support) (#6116)
* add block attn cuda_graph support
This commit is contained in:
@@ -203,6 +203,14 @@ class XPUAttentionBackend(AttentionBackend):
|
||||
forward_meta.decoder_context_len_cache_cpu,
|
||||
forward_meta.decoder_batch_map_cpu,
|
||||
forward_meta.prefix_len_cpu,
|
||||
forward_meta.encoder_seq_lod,
|
||||
forward_meta.decoder_seq_lod,
|
||||
forward_meta.encoder_kv_lod,
|
||||
forward_meta.encoder_batch_map,
|
||||
forward_meta.decoder_context_len,
|
||||
forward_meta.decoder_context_len_cache,
|
||||
forward_meta.decoder_batch_map,
|
||||
forward_meta.prefix_len,
|
||||
cache_k_scale,
|
||||
cache_v_scale,
|
||||
cache_k_out_scale,
|
||||
|
||||
Reference in New Issue
Block a user