[Optimization] merge matmul and add (#6986)

* merge matmul and add

* modify format

* using paddle.nn.functional.linear

* using _C_ops.linear

* using paddle.nn.functional.linear

* add FLAGS_use_legacy_linear env var in test case

* fix format

* add assert and remove env

* modify format

* using matmul for no bias

* modify accurate baseline
This commit is contained in:
Bingoo
2026-04-03 18:02:03 +08:00
committed by GitHub
parent 1090f8b123
commit 2068656a85
2 changed files with 12 additions and 5 deletions
+10 -3
View File
@@ -82,10 +82,17 @@ class UnquantizedLinearMethod(QuantMethodBase):
layer.weight.set_value(weights)
def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor:
linear_out = paddle.matmul(x, layer.weight)
if layer.with_bias:
linear_out = paddle.add(linear_out, layer.bias)
return linear_out
bias = layer.bias
assert bias.dim() == 1 and bias.shape[-1] == layer.weight.shape[-1], (
f"bias must be 1D with size equal to the last dim of weight, "
f"but got bias.shape={bias.shape}, weight.shape[-1]={layer.weight.shape[-1]}"
)
out = paddle.nn.functional.linear(x, layer.weight, bias)
else:
out = paddle.matmul(x, layer.weight)
return out
class LinearBase(nn.Layer):