Sync v2.0 version of code to github repo

2026-04-23 00:17:25 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
@@ -0,0 +1,67 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import paddle
+import paddle.distributed as dist
+from paddle.distributed import fleet
+import argparse
+import os
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--model_path",
+    default="./",
+    type=str,
+    required=True,
+    help="The directory of model.",
+)
+parser.add_argument(
+    "--output_path",
+    default="./",
+    type=str,
+    help="The directory of splited model",
+)
+parser.add_argument("--model_degree", default=4, type=int, help="Input model mp degree.")
+args = parser.parse_args()
+
+hidden_size = 1280
+kv_num_heads = 16
+head_dim = 80
+
+input_model_state_dict = paddle.load(os.path.join(args.model_path, "model_state.pdparams"))
+
+for i in range(args.model_degree):
+    static_dict = {}
+    for k, v in input_model_state_dict.items():
+        if "qkv.weight" in k:
+            static_dict[k] = input_model_state_dict[k].reshape(
+                [hidden_size, 3, kv_num_heads, head_dim]
+            ).split(args.model_degree, axis=-2)[i].reshape([hidden_size, -1])
+        elif "qkv.bias" in k:
+            static_dict[k] = input_model_state_dict[k].reshape(
+                [3, kv_num_heads, head_dim]).split(args.model_degree, axis=-2)[i].reshape([-1])
+        elif "attn.proj.weight" in k:
+            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-2)[i]
+        elif "fc1.weight" in k:
+            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-1)[i]
+        elif "fc1.bias" in k:
+            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-1)[i]
+        elif "fc2.weight" in k:
+            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-2)[i]
+        else:
+            static_dict[k] = v
+
+    paddle.save(static_dict, os.path.join(args.model_path, f"model_state_tp0{i}.pdparams"))