mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[KVCache] support unified cache backend (#4903)
* [Feature] support unified cache backend * fix * fix * fix * fix * Update metax_model_runner.py * fix * update * Update test_moba_attention_backend.py --------- Co-authored-by: ltd0924 <luotingdan@baidu.com>
This commit is contained in:
@@ -22,9 +22,8 @@ class Args:
|
||||
num_cpu_blocks = 1
|
||||
num_gpu_blocks = 1
|
||||
num_layers = 1
|
||||
head_dim = 1
|
||||
kv_num_head = 1
|
||||
bytes_per_layer_per_block = 1024
|
||||
key_cache_shape = "1,1,1,1"
|
||||
value_cache_shape = ""
|
||||
create_cache_tensor = False
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user