mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Speculative Decoding] Support suffix decoding (#6403)
* support suffix decoding
This commit is contained in:
@@ -23,3 +23,17 @@ from .mtp import MTPProposer
|
||||
if not current_platform.is_xpu():
|
||||
from .ngram import NgramProposer
|
||||
__all__ = ["Proposer", "MTPProposer", "NgramProposer"]
|
||||
|
||||
# Suffix proposer requires arctic_inference
|
||||
try:
|
||||
from .suffix import SuffixProposer
|
||||
|
||||
_suffix_proposer_available = True
|
||||
except ImportError:
|
||||
_suffix_proposer_available = False
|
||||
SuffixProposer = None
|
||||
|
||||
if _suffix_proposer_available:
|
||||
__all__ = ["Proposer", "MTPProposer", "NgramProposer", "SuffixProposer"]
|
||||
else:
|
||||
__all__ = ["Proposer", "MTPProposer", "NgramProposer"]
|
||||
|
||||
Reference in New Issue
Block a user