From 5416da8c6e6645031ffb6a34fe86ba1bff19eb9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E5=91=A8=E5=91=A8?=
 <39978853+zhoutianzi666@users.noreply.github.com>
Date: Mon, 23 Mar 2026 14:22:03 +0800
Subject: [PATCH] remove assert (#6970)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: “liuruian” <liuruian@baidu.com>
---
 .../model_executor/layers/attention/ops/append_attention.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fastdeploy/model_executor/layers/attention/ops/append_attention.py b/fastdeploy/model_executor/layers/attention/ops/append_attention.py
index f32022e1ed..8b36ffa85b 100644
--- a/fastdeploy/model_executor/layers/attention/ops/append_attention.py
+++ b/fastdeploy/model_executor/layers/attention/ops/append_attention.py
@@ -90,12 +90,6 @@ def append_attention(
     append_attention
     """
     if current_platform.is_cuda():
-        bsz = seq_lens_encoder.shape[0]
-        assert seq_lens_encoder.shape == [bsz]
-        assert seq_lens_decoder.shape == [bsz]
-        assert seq_lens_this_time.shape == [bsz]
-        assert cu_seqlens_q.shape == [bsz + 1]
-        assert block_tables.shape[0] == bsz
 
         if sliding_window > 0 and head_wise_full_hidden > 0:
             out_swa = append_attention_gpu(