mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] support compute shared experts before combine for better overlap (#6697)
* [Feature] support compute shared experts before combine for better overlap * fix test * fix xpu * fix
This commit is contained in:
@@ -106,7 +106,7 @@ class MockAttentionBackend:
|
||||
|
||||
|
||||
class MockQuantMethod:
|
||||
def apply(self, layer, x, gate, topk_ids_hookfunc=None):
|
||||
def apply(self, layer, x, gate, topk_ids_hookfunc=None, shared_experts=None):
|
||||
return x
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user