mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
Split enable_mm (#7183)
Co-authored-by: liuruian <liuruian@MacBook-Pro.local>
This commit is contained in:
@@ -71,7 +71,7 @@ class Proposer(ABC):
|
||||
self.max_ngram_size = self.speculative_config.max_ngram_size
|
||||
self.min_ngram_size = self.speculative_config.min_ngram_size
|
||||
|
||||
self.enable_mm = self.model_config.enable_mm
|
||||
self.enable_mm = self.fd_config.enable_mm_runtime
|
||||
|
||||
spec_logger.info(f"Speculate config: {self.speculative_config}")
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ class MTPProposer(Proposer):
|
||||
self.num_main_model_layers = self.model_config.num_hidden_layers
|
||||
self.local_rank = local_rank
|
||||
self.device_id = device_id
|
||||
self.use_attn_mask_offset = self.enable_mm and self.fd_config.deploy_modality != "text"
|
||||
self.use_attn_mask_offset = self.enable_mm
|
||||
|
||||
self._update_mtp_config(main_model)
|
||||
self._load_model()
|
||||
|
||||
Reference in New Issue
Block a user