mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix]fix iluvatar_model_runner about dsa_cache (#6733)
* fix iluvatar_model_runner
This commit is contained in:
@@ -59,6 +59,8 @@ class IluvatarModelRunner(GPUModelRunner):
|
||||
assert not self.cache_config.enable_prefix_caching, "Iluvatar does not support prefix caching"
|
||||
self.mla_cache = envs.FD_ATTENTION_BACKEND == "MLA_ATTN"
|
||||
assert not self.mla_cache, "Iluvatar does not support MLA"
|
||||
self.dsa_cache = envs.FD_ATTENTION_BACKEND == "DSA_ATTN"
|
||||
assert not self.dsa_cache, "Iluvatar does not support DSA_ATTN"
|
||||
if self.enable_mm:
|
||||
assert (
|
||||
not self.cache_config.enable_chunked_prefill
|
||||
|
||||
Reference in New Issue
Block a user