[XPU] weight only quant method support QKVGate_proj (#6641)

2026-04-25 18:13:21 +08:00 · 2026-03-04 18:25:03 +08:00
parent 3345641f4e
commit 1256fd3806
1 changed files with 2 additions and 0 deletions
@@ -20,6 +20,7 @@ from paddle import nn
 from fastdeploy.model_executor.layers.linear import (
    MergedColumnParallelLinear,
    MergedReplicatedLinear,
+    QKVGateParallelLinear,
    QKVParallelLinear,
 )
 from fastdeploy.model_executor.layers.quantization.weight_only import (
@@ -60,6 +61,7 @@ class XPUWeightOnlyLinearMethod(WeightOnlyLinearMethod):
                isinstance(layer, MergedColumnParallelLinear)
                or isinstance(layer, QKVParallelLinear)
                or isinstance(layer, MergedReplicatedLinear)
+                or isinstance(layer, QKVGateParallelLinear)
            ):
                quant_attrs = {
                    **extra_weight_attrs,