mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Others] Remove useless code (#5404)
This commit is contained in:
@@ -206,20 +206,9 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
Calculate kv cache shape
|
||||
"""
|
||||
key_cache_shape = [max_num_blocks, self.kv_num_heads, self.block_size, self.head_dim]
|
||||
value_cache_shape = [max_num_blocks, self.kv_num_heads, self.block_size, self.head_dim]
|
||||
if kv_cache_quant_type is not None and kv_cache_quant_type == "int4_zp":
|
||||
key_cache_shape = [
|
||||
max_num_blocks,
|
||||
self.kv_num_heads,
|
||||
self.block_size,
|
||||
self.head_dim // 2,
|
||||
]
|
||||
value_cache_shape = [
|
||||
max_num_blocks,
|
||||
self.kv_num_heads,
|
||||
self.block_size,
|
||||
self.head_dim // 2,
|
||||
]
|
||||
key_cache_shape[-1] = self.head_dim // 2
|
||||
value_cache_shape = key_cache_shape
|
||||
return key_cache_shape, value_cache_shape
|
||||
|
||||
def forward_mixed(
|
||||
|
||||
Reference in New Issue
Block a user