mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] merge matmul and add (#6986)
* merge matmul and add * modify format * using paddle.nn.functional.linear * using _C_ops.linear * using paddle.nn.functional.linear * add FLAGS_use_legacy_linear env var in test case * fix format * add assert and remove env * modify format * using matmul for no bias * modify accurate baseline
This commit is contained in:
@@ -82,10 +82,17 @@ class UnquantizedLinearMethod(QuantMethodBase):
|
||||
layer.weight.set_value(weights)
|
||||
|
||||
def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor:
|
||||
linear_out = paddle.matmul(x, layer.weight)
|
||||
if layer.with_bias:
|
||||
linear_out = paddle.add(linear_out, layer.bias)
|
||||
return linear_out
|
||||
bias = layer.bias
|
||||
assert bias.dim() == 1 and bias.shape[-1] == layer.weight.shape[-1], (
|
||||
f"bias must be 1D with size equal to the last dim of weight, "
|
||||
f"but got bias.shape={bias.shape}, weight.shape[-1]={layer.weight.shape[-1]}"
|
||||
)
|
||||
out = paddle.nn.functional.linear(x, layer.weight, bias)
|
||||
else:
|
||||
out = paddle.matmul(x, layer.weight)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class LinearBase(nn.Layer):
|
||||
|
||||
Reference in New Issue
Block a user