[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)

* gate bf16

* add gate-fp32

* fix

* update baseline

* update

* update

* fix
This commit is contained in:
sunxin
2026-02-27 13:08:46 +08:00
committed by GitHub
parent edd31e8849
commit 53aaac69da
19 changed files with 95 additions and 28 deletions
+1
View File
@@ -2046,6 +2046,7 @@ class EngineService:
"disable_sequence_parallel_moe": self.cfg.parallel_config.disable_sequence_parallel_moe,
"enable_logprob": self.cfg.model_config.enable_logprob,
"lm_head_fp32": self.cfg.model_config.lm_head_fp32,
"moe_gate_fp32": self.cfg.model_config.moe_gate_fp32,
"enable_entropy": self.cfg.model_config.enable_entropy,
"enable_overlap_schedule": self.cfg.scheduler_config.enable_overlap_schedule,
}