[XPU][Graph Optimization] XPU Support CUDAGraph (#6152)

* support cuda graph
This commit is contained in:
yinwei
2026-01-22 14:41:56 +08:00
committed by GitHub
parent 82057cb71f
commit 1e3c35496c
6 changed files with 143 additions and 16 deletions
@@ -105,6 +105,7 @@ def xpu_pre_process(
seq_lens_decoder: Optional[paddle.Tensor] = None,
is_profiling: bool = False,
forward_meta=None,
use_cudagraph=False,
) -> XPUForwardMeta:
""" """
max_len = input_ids.shape[1]
@@ -152,7 +153,6 @@ def xpu_pre_process(
cu_seqlens_k,
) = get_padding_offset(input_ids, cum_offsets_now, token_num, seq_lens_this_time)
share_inputs["ids_remove_padding"] = None # set this after adjust batch
share_inputs["cum_offsets"] = cum_offsets
share_inputs["batch_id_per_token"] = batch_id_per_token
share_inputs["cu_seqlens_q"] = cu_seqlens_q
@@ -221,11 +221,18 @@ def xpu_pre_process(
adjusted_input = adjusted_input.squeeze(1)
share_inputs["ids_remove_padding"] = adjusted_input
share_inputs["ids_remove_padding"].copy_(adjusted_input, False)
xpu_forward_meta.ids_remove_padding = adjusted_input
# Set forward_meta.is_profiling to True to skip init_kv_signal_per_query for attention backends
xpu_forward_meta.is_profiling = is_profiling
return xpu_forward_meta
if use_cudagraph:
if forward_meta is None:
return xpu_forward_meta
else:
forward_meta.copy_from(xpu_forward_meta)
return forward_meta
else:
return xpu_forward_meta
def xpu_process_output(