[Speculative Decoding] Support suffix decoding (#6403)

* support suffix decoding
This commit is contained in:
GoldPancake
2026-02-26 11:42:05 +08:00
committed by GitHub
parent 6d3fede240
commit 2178f2829b
18 changed files with 587 additions and 30 deletions
+14
View File
@@ -23,3 +23,17 @@ from .mtp import MTPProposer
if not current_platform.is_xpu():
from .ngram import NgramProposer
__all__ = ["Proposer", "MTPProposer", "NgramProposer"]
# Suffix proposer requires arctic_inference
try:
from .suffix import SuffixProposer
_suffix_proposer_available = True
except ImportError:
_suffix_proposer_available = False
SuffixProposer = None
if _suffix_proposer_available:
__all__ = ["Proposer", "MTPProposer", "NgramProposer", "SuffixProposer"]
else:
__all__ = ["Proposer", "MTPProposer", "NgramProposer"]