mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support KV Cache Storage (#5571)
* Support Mooncake Store * up * up * add op * fix conflict * fix error * up for comments * avoid thread lock * up * fix unittest * fix unittest * remove debug info * consider tp_size > 1 * add default rdma_nics * add utils * up * fix error --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
@@ -230,6 +230,14 @@ class EngineArgs:
|
||||
"""
|
||||
Port for cache queue.
|
||||
"""
|
||||
kvcache_storage_backend: str = None
|
||||
"""
|
||||
The storage backend for kvcache storage. If set, it will use the kvcache storage backend.
|
||||
"""
|
||||
write_policy: str = "write_through"
|
||||
"""
|
||||
The policy of write cache to storage.
|
||||
"""
|
||||
|
||||
# System configuration parameters
|
||||
use_warmup: int = 0
|
||||
@@ -557,6 +565,14 @@ class EngineArgs:
|
||||
if "PaddleOCR" in get_model_architecture(self.model, self.model_config_name):
|
||||
envs.FD_ENABLE_MAX_PREFILL = 1
|
||||
|
||||
if self.kvcache_storage_backend is not None:
|
||||
if not self.enable_prefix_caching:
|
||||
raise NotImplementedError("kvcache_storage_backend is only supported when enable_prefix_caching=True")
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER == 0:
|
||||
raise NotImplementedError(
|
||||
"kvcache_storage_backend is only supported when ENABLE_V1_KVCACHE_SCHEDULER=1"
|
||||
)
|
||||
|
||||
self.post_init_all_ports()
|
||||
|
||||
def post_init_all_ports(self):
|
||||
@@ -1018,6 +1034,22 @@ class EngineArgs:
|
||||
help="Static decoding blocks num.",
|
||||
)
|
||||
|
||||
cache_group.add_argument(
|
||||
"--kvcache-storage-backend",
|
||||
type=nullable_str,
|
||||
choices=["mooncake"],
|
||||
default=EngineArgs.kvcache_storage_backend,
|
||||
help="The storage backend for kvcache storage. Leave empty to disable.",
|
||||
)
|
||||
|
||||
cache_group.add_argument(
|
||||
"--write-policy",
|
||||
type=str,
|
||||
choices=["write_through"],
|
||||
default=EngineArgs.write_policy,
|
||||
help="KVCache write policy",
|
||||
)
|
||||
|
||||
# Cluster system parameters group
|
||||
system_group = parser.add_argument_group("System Configuration")
|
||||
system_group.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user