weight only quant method support QKVGate_proj (#6612)

2026-04-23 00:17:25 +08:00 · 2026-03-03 11:19:32 +08:00
parent 3cc09418f1
commit 1cae7a0d53
1 changed files with 2 additions and 0 deletions
@@ -26,6 +26,7 @@ from fastdeploy import envs
 from fastdeploy.model_executor.layers.linear import (
    MergedColumnParallelLinear,
    MergedReplicatedLinear,
+    QKVGateParallelLinear,
    QKVParallelLinear,
 )
 from fastdeploy.model_executor.utils import (
@@ -252,6 +253,7 @@ class WeightOnlyLinearMethod(QuantMethodBase):
                isinstance(layer, MergedColumnParallelLinear)
                or isinstance(layer, QKVParallelLinear)
                or isinstance(layer, MergedReplicatedLinear)
+                or isinstance(layer, QKVGateParallelLinear)
            ):
                # Only MergedReplicatedLinear uses the default outdim.
                tensor_output_dim = (self.model_format == "torch") ^ quant_attrs.get("output_dim", True)