[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)

* gate bf16

* add gate-fp32

* fix

* update baseline

* update

* update

* fix
This commit is contained in:
sunxin
2026-02-27 13:08:46 +08:00
committed by GitHub
parent edd31e8849
commit 53aaac69da
19 changed files with 95 additions and 28 deletions
+6
View File
@@ -930,6 +930,12 @@ def parse_args():
help="Flag to specify dtype of lm_head as FP32",
)
parser.add_argument(
"--moe_gate_fp32",
action="store_true",
help="Flag to specify dtype of gate as FP32",
)
parser.add_argument(
"--max_encoder_cache",
type=int,