[Attention] remove cum_offsets from atten, and use cu_seqlens_q (#2870)
Deploy GitHub Pages / deploy (push) Has been cancelled

[Attention] remove cum_offsets from atten, and use cu_seqlens_q (#2870)
This commit is contained in:
周周周
2025-07-16 20:10:57 +08:00
committed by GitHub
parent 42b80182e0
commit aa76085d1f
47 changed files with 237 additions and 260 deletions
@@ -41,7 +41,7 @@ void CascadeAppendAttentionC16Kernel(
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& cum_offsets,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
const paddle::Tensor& tile_ids_per_batch,
@@ -86,7 +86,7 @@ void CascadeAppendAttentionC8Kernel(
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& cum_offsets,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
const paddle::Tensor& tile_ids_per_batch,
@@ -131,7 +131,7 @@ void CascadeAppendAttentionC4Kernel(
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& cum_offsets,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
const paddle::Tensor& tile_ids_per_batch,
@@ -176,7 +176,7 @@ void CascadeAppendAttentionKernel(
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& cum_offsets,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
const paddle::Tensor& tile_ids_per_batch,
@@ -212,7 +212,7 @@ void CascadeAppendAttentionKernel(
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
cum_offsets,
cu_seqlens_q,
block_table,
batch_ids,
tile_ids_per_batch,
@@ -247,7 +247,7 @@ void CascadeAppendAttentionKernel(
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
cum_offsets,
cu_seqlens_q,
block_table,
batch_ids,
tile_ids_per_batch,
@@ -282,7 +282,7 @@ void CascadeAppendAttentionKernel(
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
cum_offsets,
cu_seqlens_q,
block_table,
batch_ids,
tile_ids_per_batch,
@@ -317,7 +317,7 @@ void CascadeAppendAttentionKernel(
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
cum_offsets,
cu_seqlens_q,
block_table,
batch_ids,
tile_ids_per_batch,