[Metax] adapt prefix caching & cpu swap (#5844)

Co-authored-by: root <root@lt-wks-10-0-180-15.pub.metax-tech.com>
2026-04-23 08:21:53 +08:00 · 2025-12-31 17:02:48 +08:00
parent 193886e745
commit f732d7d2ad
4 changed files with 94 additions and 1 deletions
@@ -531,7 +531,12 @@ class EngineArgs:
            self.tokenizer = self.model
        if self.splitwise_role == "decode":
            self.enable_prefix_caching = False
-        if not current_platform.is_cuda() and not current_platform.is_xpu() and not current_platform.is_intel_hpu():
+        if (
+            not current_platform.is_cuda()
+            and not current_platform.is_xpu()
+            and not current_platform.is_intel_hpu()
+            and not current_platform.is_maca()
+        ):
            self.enable_prefix_caching = False
        if self.enable_logprob:
            if not current_platform.is_cuda() and not current_platform.is_xpu():