mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)
* gate bf16 * add gate-fp32 * fix * update baseline * update * update * fix
This commit is contained in:
@@ -930,6 +930,12 @@ def parse_args():
|
||||
help="Flag to specify dtype of lm_head as FP32",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--moe_gate_fp32",
|
||||
action="store_true",
|
||||
help="Flag to specify dtype of gate as FP32",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--max_encoder_cache",
|
||||
type=int,
|
||||
|
||||
Reference in New Issue
Block a user