[Optimization] merge matmul and add (#6986)

* merge matmul and add * modify format * using paddle.nn.functional.linear * using _C_ops.linear * using paddle.nn.functional.linear * add FLAGS_use_legacy_linear env var in test case * fix format * add assert and remove env * modify format * using matmul for no bias * modify accurate baseline
2026-04-22 16:07:51 +08:00 · 2026-04-03 18:02:03 +08:00
parent 1090f8b123
commit 2068656a85
2 changed files with 12 additions and 5 deletions
@@ -82,10 +82,17 @@ class UnquantizedLinearMethod(QuantMethodBase):
        layer.weight.set_value(weights)

    def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor:
-        linear_out = paddle.matmul(x, layer.weight)
        if layer.with_bias:
-            linear_out = paddle.add(linear_out, layer.bias)
-        return linear_out
+            bias = layer.bias
+            assert bias.dim() == 1 and bias.shape[-1] == layer.weight.shape[-1], (
+                f"bias must be 1D with size equal to the last dim of weight, "
+                f"but got bias.shape={bias.shape}, weight.shape[-1]={layer.weight.shape[-1]}"
+            )
+            out = paddle.nn.functional.linear(x, layer.weight, bias)
+        else:
+            out = paddle.matmul(x, layer.weight)
+
+        return out


 class LinearBase(nn.Layer):
@@ -157,10 +157,10 @@ def check_routing_replay_chat_completion(openai_client, moe_layer_num: int, mode
    model_path = os.getenv("MODEL_PATH")
    if model_path:
        baseline_path = os.path.join(
-            model_path, f"R3_BaseLine_dev_uint8_0402/routing_replay_output_baseline_{model_name}"
+            model_path, f"R3_BaseLine_dev_uint8_0403/routing_replay_output_baseline_{model_name}"
        )
    else:
-        baseline_path = f"./R3_BaseLine_dev_uint8_0402/routing_replay_output_baseline_{model_name}"
+        baseline_path = f"./R3_BaseLine_dev_uint8_0403/routing_replay_output_baseline_{model_name}"
    stream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_stream")

    nonstream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_nonstream")