[PD Disaggregation] Prefill and decode support cache storage (#6768)

* Prefill and decode support cache storage

* up

* up

* update docs and refine mooncake store

* up
This commit is contained in:
jc
2026-03-16 14:44:49 +08:00
committed by GitHub
parent 72ff7bf4cd
commit 04fde3b227
12 changed files with 1083 additions and 66 deletions
-2
View File
@@ -558,8 +558,6 @@ class EngineArgs:
if not self.tokenizer:
self.tokenizer = self.model
if self.splitwise_role == "decode":
self.enable_prefix_caching = False
if (
not current_platform.is_cuda()
and not current_platform.is_xpu()