mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
* fix mtp * fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -625,7 +625,7 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
)
|
||||
process_weights_after_loading_fn(model_sublayer_name, param)
|
||||
|
||||
if self.tie_word_embeddings:
|
||||
if getattr(self, "tie_word_embeddings", False):
|
||||
self.lm_head.load_state_dict({self.lm_head.weight_key: self.ernie.embed_tokens.embeddings.weight})
|
||||
|
||||
def compute_logits(self, hidden_states: paddle.Tensor):
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import Dict, Union
|
||||
|
||||
@@ -356,7 +355,6 @@ class Ernie4_5_MTPForCausalLM(ModelForCasualLM):
|
||||
self.ori_vocab_size = fd_config.model_config.ori_vocab_size
|
||||
|
||||
self.lm_head = fd_config.speculative_config.sharing_model.lm_head
|
||||
self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
|
||||
|
||||
@classmethod
|
||||
def name(self):
|
||||
@@ -374,11 +372,6 @@ class Ernie4_5_MTPForCausalLM(ModelForCasualLM):
|
||||
and values are NumPy arrays or PaddlePaddle tensors.
|
||||
"""
|
||||
self.ernie.load_state_dict(state_dict)
|
||||
# if self.tie_word_embeddings:
|
||||
# self.lm_head.linear.weight.set_value(
|
||||
# self.ernie.embed_tokens.embeddings.weight.transpose([1, 0]))
|
||||
# else:
|
||||
# self.lm_head.load_state_dict(state_dict)
|
||||
|
||||
@paddle.no_grad()
|
||||
def load_weights(self, weights_iterator) -> None:
|
||||
@@ -388,45 +381,22 @@ class Ernie4_5_MTPForCausalLM(ModelForCasualLM):
|
||||
Args:
|
||||
weights_iterator (Iterator): An iterator yielding (name, weight) pairs.
|
||||
"""
|
||||
|
||||
from fastdeploy.model_executor.utils import (
|
||||
default_weight_loader,
|
||||
process_weights_after_loading,
|
||||
from fastdeploy.model_executor.models.ernie4_5_moe import (
|
||||
Ernie4_5_MoeForCausalLM,
|
||||
)
|
||||
from fastdeploy.model_executor.utils import remap_weight_keys
|
||||
|
||||
all_param_mapping = [
|
||||
# (param_name, weight_name, expert_id, shard_id)
|
||||
("embed_tokens.embeddings", "embed_tokens", None, None),
|
||||
("lm_head.linear", "lm_head", None, None),
|
||||
("enorm", "mtp_emb_norm.0", None, None),
|
||||
("hnorm", "mtp_hidden_norm.0", None, None),
|
||||
("eh_proj.linear", "mtp_linear_proj.0", None, None),
|
||||
]
|
||||
|
||||
params_dict = dict(self.named_parameters())
|
||||
shard_id = None
|
||||
process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers()))
|
||||
for loaded_weight_name, loaded_weight in weights_iterator:
|
||||
for param_name, weight_name, exp_id, shard_id in all_param_mapping:
|
||||
if weight_name not in loaded_weight_name:
|
||||
continue
|
||||
model_param_name = loaded_weight_name.replace(weight_name, param_name)
|
||||
param = params_dict[model_param_name]
|
||||
shard_id = shard_id
|
||||
break
|
||||
else:
|
||||
if loaded_weight_name not in params_dict.keys():
|
||||
continue
|
||||
model_param_name = loaded_weight_name
|
||||
param = params_dict[loaded_weight_name]
|
||||
|
||||
# Get weight loader from parameter and set weight
|
||||
weight_loader = getattr(param, "weight_loader", default_weight_loader(self.fd_config))
|
||||
weight_loader(param, loaded_weight)
|
||||
model_sublayer_name = re.sub(
|
||||
r"\.(up_gate_proj_weight|down_proj_weight|weight|cache_k_scale|cache_v_scale)$", "", model_param_name
|
||||
)
|
||||
process_weights_after_loading_fn(model_sublayer_name, param)
|
||||
Ernie4_5_MoeForCausalLM.load_weights(
|
||||
self,
|
||||
remap_weight_keys(
|
||||
weights_iterator,
|
||||
{
|
||||
"mtp_emb_norm.0": "enorm",
|
||||
"mtp_hidden_norm.0": "hnorm",
|
||||
"mtp_linear_proj.0": "eh_proj.linear",
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
def compute_logits(self, hidden_states: paddle.Tensor):
|
||||
"""
|
||||
|
||||
@@ -724,7 +724,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
|
||||
r"\.(up_gate_proj_weight|down_proj_weight|weight|cache_k_scale|cache_v_scale)$", "", model_param_name
|
||||
)
|
||||
process_weights_after_loading_fn(model_sublayer_name, param)
|
||||
if self.tie_word_embeddings:
|
||||
if getattr(self, "tie_word_embeddings", False):
|
||||
# because we use lazy guard and is not initialized by default
|
||||
if not self.lm_head.linear.weight._is_initialized():
|
||||
self.lm_head.linear.weight.initialize()
|
||||
|
||||
@@ -166,6 +166,13 @@ class WeightsMapper:
|
||||
return self._map_name(weight_name)
|
||||
|
||||
|
||||
def remap_weight_keys(weights_iterator, mapper: dict):
|
||||
return (
|
||||
(next((key.replace(k, v) for k, v in mapper.items() if k in key), key), value)
|
||||
for key, value in weights_iterator
|
||||
)
|
||||
|
||||
|
||||
def process_weights_before_loading(
|
||||
*, skip_prefixes: Optional[List[str]] = None, mapper: Optional[WeightsMapper] = None
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user