[XPU] glm-4.5-air (#7071)

This commit is contained in:
zhupengyang
2026-04-14 11:31:49 +08:00
committed by GitHub
parent 26c47c2afc
commit 27b00cf385
9 changed files with 32 additions and 18 deletions
@@ -136,6 +136,7 @@ def parse_quant_config(args, model_config, is_ernie, is_v1_loader):
logger.warning(f"Failed to parse quantization config normally ({e}), trying fallback")
quant_config_name = args.quantization["quantization"]
quantization_config["quantization"] = quant_config_name
model_config.quantization_config = quantization_config
# Special handling for Ernie models
if quant_config_name == "wint4" and is_ernie:
quantization_config["dense_quant_type"] = "wint8"