mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Feature][MTP]support new speculative decoding method named hybrid mtp with ngram (#3610)
This commit is contained in:
@@ -248,6 +248,7 @@ class Ernie4_5_MTPModel(nn.Layer):
|
||||
|
||||
self.num_layers = fd_config.model_config.num_hidden_layers
|
||||
self.embed_tokens = fd_config.speculative_config.sharing_model.ernie.embed_tokens
|
||||
self.norm = fd_config.speculative_config.sharing_model.ernie.norm
|
||||
|
||||
self.layers = nn.LayerList(
|
||||
[
|
||||
@@ -318,6 +319,8 @@ class Ernie4_5_MTPModel(nn.Layer):
|
||||
|
||||
hidden_states = hidden_states + residual
|
||||
|
||||
hidden_states = self.norm(hidden_states)
|
||||
|
||||
return hidden_states
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user