mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[Optimization] Accelerate Qwen3 QK RMSNorm via Fused Triton Kernel (#5880)
* qk rmsnorm fused * inplace * glm * fix * add qknorm layer * fix * update * fix qwen3 vl * update rl baseline * fix qwen3 vl moe * test * fix qwen vl moe rl * fix
This commit is contained in:
@@ -207,6 +207,8 @@ class Qwen3VLForConditionalGeneration(ModelForCasualLM):
|
||||
("embed_tokens.embeddings", "embed_tokens", None),
|
||||
("lm_head.linear", "lm_head", None),
|
||||
("visual", "model.visual", None),
|
||||
("qk_norm.q_norm", "q_norm", None),
|
||||
("qk_norm.k_norm", "k_norm", None),
|
||||
]
|
||||
|
||||
params_dict = dict(self.named_parameters())
|
||||
|
||||
Reference in New Issue
Block a user