[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)

* gate bf16 * add gate-fp32 * fix * update baseline * update * update * fix
2026-04-23 00:17:25 +08:00 · 2026-02-27 13:08:46 +08:00
parent edd31e8849
commit 53aaac69da
19 changed files with 95 additions and 28 deletions
@@ -214,6 +214,7 @@ class ModelConfig:
        self.pad_token_id: int = -1
        self.eos_tokens_lens: int = 2
        self.lm_head_fp32: bool = False
+        self.moe_gate_fp32: bool = False
        self.model_format = "auto"
        self.runner = "auto"
        self.convert = "auto"