From 5416da8c6e6645031ffb6a34fe86ba1bff19eb9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=91=A8=E5=91=A8?= <39978853+zhoutianzi666@users.noreply.github.com> Date: Mon, 23 Mar 2026 14:22:03 +0800 Subject: [PATCH] remove assert (#6970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: “liuruian” --- .../model_executor/layers/attention/ops/append_attention.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fastdeploy/model_executor/layers/attention/ops/append_attention.py b/fastdeploy/model_executor/layers/attention/ops/append_attention.py index f32022e1ed..8b36ffa85b 100644 --- a/fastdeploy/model_executor/layers/attention/ops/append_attention.py +++ b/fastdeploy/model_executor/layers/attention/ops/append_attention.py @@ -90,12 +90,6 @@ def append_attention( append_attention """ if current_platform.is_cuda(): - bsz = seq_lens_encoder.shape[0] - assert seq_lens_encoder.shape == [bsz] - assert seq_lens_decoder.shape == [bsz] - assert seq_lens_this_time.shape == [bsz] - assert cu_seqlens_q.shape == [bsz + 1] - assert block_tables.shape[0] == bsz if sliding_window > 0 and head_wise_full_hidden > 0: out_swa = append_attention_gpu(