mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
weight only quant method support QKVGate_proj (#6612)
This commit is contained in:
@@ -26,6 +26,7 @@ from fastdeploy import envs
|
||||
from fastdeploy.model_executor.layers.linear import (
|
||||
MergedColumnParallelLinear,
|
||||
MergedReplicatedLinear,
|
||||
QKVGateParallelLinear,
|
||||
QKVParallelLinear,
|
||||
)
|
||||
from fastdeploy.model_executor.utils import (
|
||||
@@ -252,6 +253,7 @@ class WeightOnlyLinearMethod(QuantMethodBase):
|
||||
isinstance(layer, MergedColumnParallelLinear)
|
||||
or isinstance(layer, QKVParallelLinear)
|
||||
or isinstance(layer, MergedReplicatedLinear)
|
||||
or isinstance(layer, QKVGateParallelLinear)
|
||||
):
|
||||
# Only MergedReplicatedLinear uses the default outdim.
|
||||
tensor_output_dim = (self.model_format == "torch") ^ quant_attrs.get("output_dim", True)
|
||||
|
||||
Reference in New Issue
Block a user