mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] [KVCache] support attention_store kv cache backend (#5823)
* [feat] support attention_store kv cache backend * [fix] fix codestyle * [chore] optimize log * [fix] fix write storage task * [fix] fix read storage * [fix] fix code conflict after merge develop * [fix] fix cache bytes and read task token ids * [chore] add model for cache transfer manager * [chore] add some log * [chore] remove launched_cache_manager_signal * [fix] fix write_back_storage_task match_block_num condition * [fix] fix swap_cost_time * [ci] fix ci * Update fastdeploy/engine/sched/resource_manager_v1.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/cache_manager/cache_transfer_manager.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/cache_manager/transfer_factory/mooncake_store/attention_store.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -629,7 +629,6 @@ class EngineArgs:
|
||||
for port in cur_dp_ports:
|
||||
assert is_port_available("0.0.0.0", port), f"Parameter `{name}`:{port} is already in use."
|
||||
|
||||
console_logger.debug(f"post init {name}: {ports}")
|
||||
return ports
|
||||
|
||||
num_nodes = len(self.ips) if self.ips else 1
|
||||
@@ -1077,7 +1076,7 @@ class EngineArgs:
|
||||
cache_group.add_argument(
|
||||
"--kvcache-storage-backend",
|
||||
type=nullable_str,
|
||||
choices=["mooncake"],
|
||||
choices=["mooncake", "attention_store"],
|
||||
default=EngineArgs.kvcache_storage_backend,
|
||||
help="The storage backend for kvcache storage. Leave empty to disable.",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user