[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)

* gate bf16

* add gate-fp32

* fix

* update baseline

* update

* update

* fix
This commit is contained in:
sunxin
2026-02-27 13:08:46 +08:00
committed by GitHub
parent edd31e8849
commit 53aaac69da
19 changed files with 95 additions and 28 deletions
+1
View File
@@ -214,6 +214,7 @@ class ModelConfig:
self.pad_token_id: int = -1
self.eos_tokens_lens: int = 2
self.lm_head_fp32: bool = False
self.moe_gate_fp32: bool = False
self.model_format = "auto"
self.runner = "auto"
self.convert = "auto"