mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] Enable BF16 gate computation for GLM and Qwen (#6457)
* gate bf16 * add gate-fp32 * fix * update baseline * update * update * fix
This commit is contained in:
@@ -2046,6 +2046,7 @@ class EngineService:
|
||||
"disable_sequence_parallel_moe": self.cfg.parallel_config.disable_sequence_parallel_moe,
|
||||
"enable_logprob": self.cfg.model_config.enable_logprob,
|
||||
"lm_head_fp32": self.cfg.model_config.lm_head_fp32,
|
||||
"moe_gate_fp32": self.cfg.model_config.moe_gate_fp32,
|
||||
"enable_entropy": self.cfg.model_config.enable_entropy,
|
||||
"enable_overlap_schedule": self.cfg.scheduler_config.enable_overlap_schedule,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user