diff --git a/fastdeploy/model_executor/models/ernie4_5_moe.py b/fastdeploy/model_executor/models/ernie4_5_moe.py index e771d497a5..c43bc801c7 100644 --- a/fastdeploy/model_executor/models/ernie4_5_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_moe.py @@ -625,7 +625,7 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM): ) process_weights_after_loading_fn(model_sublayer_name, param) - if self.tie_word_embeddings: + if getattr(self, "tie_word_embeddings", False): self.lm_head.load_state_dict({self.lm_head.weight_key: self.ernie.embed_tokens.embeddings.weight}) def compute_logits(self, hidden_states: paddle.Tensor): diff --git a/fastdeploy/model_executor/models/ernie4_5_mtp.py b/fastdeploy/model_executor/models/ernie4_5_mtp.py index 7944f9f08d..48c2c9b2b3 100644 --- a/fastdeploy/model_executor/models/ernie4_5_mtp.py +++ b/fastdeploy/model_executor/models/ernie4_5_mtp.py @@ -16,7 +16,6 @@ from __future__ import annotations -import re from functools import partial from typing import Dict, Union @@ -356,7 +355,6 @@ class Ernie4_5_MTPForCausalLM(ModelForCasualLM): self.ori_vocab_size = fd_config.model_config.ori_vocab_size self.lm_head = fd_config.speculative_config.sharing_model.lm_head - self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings @classmethod def name(self): @@ -374,11 +372,6 @@ class Ernie4_5_MTPForCausalLM(ModelForCasualLM): and values are NumPy arrays or PaddlePaddle tensors. """ self.ernie.load_state_dict(state_dict) - # if self.tie_word_embeddings: - # self.lm_head.linear.weight.set_value( - # self.ernie.embed_tokens.embeddings.weight.transpose([1, 0])) - # else: - # self.lm_head.load_state_dict(state_dict) @paddle.no_grad() def load_weights(self, weights_iterator) -> None: @@ -388,45 +381,22 @@ class Ernie4_5_MTPForCausalLM(ModelForCasualLM): Args: weights_iterator (Iterator): An iterator yielding (name, weight) pairs. """ - - from fastdeploy.model_executor.utils import ( - default_weight_loader, - process_weights_after_loading, + from fastdeploy.model_executor.models.ernie4_5_moe import ( + Ernie4_5_MoeForCausalLM, ) + from fastdeploy.model_executor.utils import remap_weight_keys - all_param_mapping = [ - # (param_name, weight_name, expert_id, shard_id) - ("embed_tokens.embeddings", "embed_tokens", None, None), - ("lm_head.linear", "lm_head", None, None), - ("enorm", "mtp_emb_norm.0", None, None), - ("hnorm", "mtp_hidden_norm.0", None, None), - ("eh_proj.linear", "mtp_linear_proj.0", None, None), - ] - - params_dict = dict(self.named_parameters()) - shard_id = None - process_weights_after_loading_fn = process_weights_after_loading(dict(self.named_sublayers())) - for loaded_weight_name, loaded_weight in weights_iterator: - for param_name, weight_name, exp_id, shard_id in all_param_mapping: - if weight_name not in loaded_weight_name: - continue - model_param_name = loaded_weight_name.replace(weight_name, param_name) - param = params_dict[model_param_name] - shard_id = shard_id - break - else: - if loaded_weight_name not in params_dict.keys(): - continue - model_param_name = loaded_weight_name - param = params_dict[loaded_weight_name] - - # Get weight loader from parameter and set weight - weight_loader = getattr(param, "weight_loader", default_weight_loader(self.fd_config)) - weight_loader(param, loaded_weight) - model_sublayer_name = re.sub( - r"\.(up_gate_proj_weight|down_proj_weight|weight|cache_k_scale|cache_v_scale)$", "", model_param_name - ) - process_weights_after_loading_fn(model_sublayer_name, param) + Ernie4_5_MoeForCausalLM.load_weights( + self, + remap_weight_keys( + weights_iterator, + { + "mtp_emb_norm.0": "enorm", + "mtp_hidden_norm.0": "hnorm", + "mtp_linear_proj.0": "eh_proj.linear", + }, + ), + ) def compute_logits(self, hidden_states: paddle.Tensor): """ diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py index 5b8e4e13db..5097cee9ed 100644 --- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py @@ -724,7 +724,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM): r"\.(up_gate_proj_weight|down_proj_weight|weight|cache_k_scale|cache_v_scale)$", "", model_param_name ) process_weights_after_loading_fn(model_sublayer_name, param) - if self.tie_word_embeddings: + if getattr(self, "tie_word_embeddings", False): # because we use lazy guard and is not initialized by default if not self.lm_head.linear.weight._is_initialized(): self.lm_head.linear.weight.initialize() diff --git a/fastdeploy/model_executor/utils.py b/fastdeploy/model_executor/utils.py index 28278d5654..4b7dff4bea 100644 --- a/fastdeploy/model_executor/utils.py +++ b/fastdeploy/model_executor/utils.py @@ -166,6 +166,13 @@ class WeightsMapper: return self._map_name(weight_name) +def remap_weight_keys(weights_iterator, mapper: dict): + return ( + (next((key.replace(k, v) for k, v in mapper.items() if k in key), key), value) + for key, value in weights_iterator + ) + + def process_weights_before_loading( *, skip_prefixes: Optional[List[str]] = None, mapper: Optional[WeightsMapper] = None ):