mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)
* gate bf16 * add gate-fp32 * fix * update baseline * update * update * fix
This commit is contained in:
@@ -214,6 +214,7 @@ class ModelConfig:
|
||||
self.pad_token_id: int = -1
|
||||
self.eos_tokens_lens: int = 2
|
||||
self.lm_head_fp32: bool = False
|
||||
self.moe_gate_fp32: bool = False
|
||||
self.model_format = "auto"
|
||||
self.runner = "auto"
|
||||
self.convert = "auto"
|
||||
|
||||
Reference in New Issue
Block a user