[Metax] adapt prefix caching & cpu swap (#5844)

Co-authored-by: root <root@lt-wks-10-0-180-15.pub.metax-tech.com>
This commit is contained in:
MingkunZhang
2025-12-31 17:02:48 +08:00
committed by GitHub
parent 193886e745
commit f732d7d2ad
4 changed files with 94 additions and 1 deletions
+6 -1
View File
@@ -531,7 +531,12 @@ class EngineArgs:
self.tokenizer = self.model
if self.splitwise_role == "decode":
self.enable_prefix_caching = False
if not current_platform.is_cuda() and not current_platform.is_xpu() and not current_platform.is_intel_hpu():
if (
not current_platform.is_cuda()
and not current_platform.is_xpu()
and not current_platform.is_intel_hpu()
and not current_platform.is_maca()
):
self.enable_prefix_caching = False
if self.enable_logprob:
if not current_platform.is_cuda() and not current_platform.is_xpu():