[Feature][MTP]support new speculative decoding method named hybrid mtp with ngram (#3610)

This commit is contained in:
freeliuzc
2025-08-26 14:29:22 +08:00
committed by GitHub
parent 0a0d2959b9
commit 52eda7fdb3
20 changed files with 454 additions and 571 deletions
@@ -248,6 +248,7 @@ class Ernie4_5_MTPModel(nn.Layer):
self.num_layers = fd_config.model_config.num_hidden_layers
self.embed_tokens = fd_config.speculative_config.sharing_model.ernie.embed_tokens
self.norm = fd_config.speculative_config.sharing_model.ernie.norm
self.layers = nn.LayerList(
[
@@ -318,6 +319,8 @@ class Ernie4_5_MTPModel(nn.Layer):
hidden_states = hidden_states + residual
hidden_states = self.norm(hidden_states)
return hidden_states