[Feature] Support KV Cache Storage (#5571)

* Support Mooncake Store

* up

* up

* add op

* fix conflict

* fix error

* up for comments

* avoid thread lock

* up

* fix unittest

* fix unittest

* remove debug info

* consider tp_size > 1

* add default rdma_nics

* add utils

* up

* fix error

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
Juncai
2025-12-25 16:30:35 +08:00
committed by GitHub
parent be3be4913a
commit 412867fd99
27 changed files with 1672 additions and 195 deletions
+32
View File
@@ -230,6 +230,14 @@ class EngineArgs:
"""
Port for cache queue.
"""
kvcache_storage_backend: str = None
"""
The storage backend for kvcache storage. If set, it will use the kvcache storage backend.
"""
write_policy: str = "write_through"
"""
The policy of write cache to storage.
"""
# System configuration parameters
use_warmup: int = 0
@@ -557,6 +565,14 @@ class EngineArgs:
if "PaddleOCR" in get_model_architecture(self.model, self.model_config_name):
envs.FD_ENABLE_MAX_PREFILL = 1
if self.kvcache_storage_backend is not None:
if not self.enable_prefix_caching:
raise NotImplementedError("kvcache_storage_backend is only supported when enable_prefix_caching=True")
if envs.ENABLE_V1_KVCACHE_SCHEDULER == 0:
raise NotImplementedError(
"kvcache_storage_backend is only supported when ENABLE_V1_KVCACHE_SCHEDULER=1"
)
self.post_init_all_ports()
def post_init_all_ports(self):
@@ -1018,6 +1034,22 @@ class EngineArgs:
help="Static decoding blocks num.",
)
cache_group.add_argument(
"--kvcache-storage-backend",
type=nullable_str,
choices=["mooncake"],
default=EngineArgs.kvcache_storage_backend,
help="The storage backend for kvcache storage. Leave empty to disable.",
)
cache_group.add_argument(
"--write-policy",
type=str,
choices=["write_through"],
default=EngineArgs.write_policy,
help="KVCache write policy",
)
# Cluster system parameters group
system_group = parser.add_argument_group("System Configuration")
system_group.add_argument(