mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[Other] Adjust GPUModelRunner to enhance compatibility (#6851)
This commit is contained in:
@@ -326,7 +326,6 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
cache_v_scales = getattr(layer, "cache_v_scale", None)
|
||||
|
||||
if layer.layer_id == 0:
|
||||
# print(forward_meta.seq_lens_this_time)
|
||||
get_block_shape_and_split_kv_block(
|
||||
forward_meta.seq_lens_encoder,
|
||||
forward_meta.seq_lens_decoder,
|
||||
|
||||
Reference in New Issue
Block a user