mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] support qkv&gate linear fusion (#6455)
* [Feature] support qkv&gate linear fusion * add test
This commit is contained in:
@@ -23,6 +23,7 @@ from fastdeploy import envs
|
||||
from fastdeploy.model_executor.layers.linear import (
|
||||
MergedColumnParallelLinear,
|
||||
MergedReplicatedLinear,
|
||||
QKVGateParallelLinear,
|
||||
QKVParallelLinear,
|
||||
)
|
||||
from fastdeploy.model_executor.layers.moe import FusedMoE
|
||||
@@ -160,6 +161,7 @@ class BlockWiseFP8LinearMethod(QuantMethodBase):
|
||||
isinstance(layer, MergedColumnParallelLinear)
|
||||
or isinstance(layer, QKVParallelLinear)
|
||||
or isinstance(layer, MergedReplicatedLinear)
|
||||
or isinstance(layer, QKVGateParallelLinear)
|
||||
):
|
||||
tensor_output_dim = (self.model_format == "torch") ^ quant_attrs.get("output_dim", True)
|
||||
quant_attrs = {
|
||||
|
||||
Reference in New Issue
Block a user