[FDConfig] Support CLI args for quantization params and add cudagraph validation (#7281)

* refactor quant cli param
2026-04-23 00:17:25 +08:00 · 2026-04-10 14:13:42 +08:00
parent 7614175e13
commit c1fb3112f8
5 changed files with 116 additions and 45 deletions
@@ -859,11 +859,14 @@ class EngineArgs:
            "--quantization",
            type=parse_quantization,
            default=EngineArgs.quantization,
-            help="Quantization name for the model, currently support "
-            "'wint8', 'wint4',"
-            "default is None. The priority of this configuration "
-            "is lower than that of the config file. "
-            "More complex quantization methods need to be configured via the config file.",
+            help="Quantization config for the model. Can be a simple method name "
+            "(e.g. 'wint8', 'wint4') or a full JSON quantization_config string "
+            '(e.g. \'{"quantization": "mix_quant", "kv_cache_quant_type": "block_wise_fp8", '
+            '"dense_quant_type": "block_wise_fp8", "moe_quant_type": "block_wise_fp8"}\'). '
+            "When a JSON config is provided, it is processed the same way as "
+            "quantization_config in the model's config.json. "
+            "If both CLI and config.json specify quantization_config, "
+            "config.json takes higher priority. Default is None.",
        )
        model_group.add_argument(
            "--graph-optimization-config",