mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
Split enable_mm (#7183)
Co-authored-by: liuruian <liuruian@MacBook-Pro.local>
This commit is contained in:
@@ -92,6 +92,7 @@ class MockFDConfig:
|
||||
model_config = MockModelConfig()
|
||||
cache_config = MockCacheConfig()
|
||||
speculative_config = MockSpecaulativeConfig()
|
||||
enable_mm_runtime = MockModelConfig.enable_mm
|
||||
|
||||
def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
|
||||
return 8192
|
||||
@@ -139,7 +140,7 @@ class TestChunkedMoE(unittest.TestCase):
|
||||
model_runner.model_config = mock_model_config
|
||||
model_runner.cache_config = mock_cache_config
|
||||
model_runner.attn_backends = [MockAttentionBackend()]
|
||||
model_runner.enable_mm = True
|
||||
model_runner.enable_mm = mock_fd_config.enable_mm_runtime
|
||||
model_runner.cudagraph_only_prefill = False
|
||||
model_runner.use_cudagraph = False
|
||||
model_runner.speculative_decoding = False
|
||||
|
||||
@@ -102,6 +102,7 @@ def create_mock_fd_config(
|
||||
mock_config.structured_outputs_config = Mock()
|
||||
mock_config.structured_outputs_config.reasoning_parser = None
|
||||
mock_config.tool_parser = None
|
||||
mock_config.enable_mm_runtime = enable_mm
|
||||
|
||||
return mock_config
|
||||
|
||||
@@ -181,6 +182,7 @@ class TestEngineClient(unittest.IsolatedAsyncioTestCase):
|
||||
mock_config.structured_outputs_config = Mock()
|
||||
mock_config.structured_outputs_config.reasoning_parser = None
|
||||
mock_config.node_rank = 0
|
||||
mock_config.enable_mm_runtime = mock_model_config.enable_mm
|
||||
|
||||
# Create mocks for all the external dependencies
|
||||
mock_input_processor = Mock()
|
||||
@@ -363,6 +365,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
mock_config.structured_outputs_config = MagicMock() # Add this
|
||||
mock_config.structured_outputs_config.reasoning_parser = None
|
||||
mock_config.tool_parser = None # Add this attribute
|
||||
mock_config.enable_mm_runtime = mock_model_config.enable_mm
|
||||
|
||||
# Mock IPCSignal to avoid file system dependencies
|
||||
with patch("fastdeploy.entrypoints.engine_client.IPCSignal") as mock_ipcsignal:
|
||||
@@ -655,6 +658,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
mock_config.structured_outputs_config = Mock()
|
||||
mock_config.structured_outputs_config.reasoning_parser = None
|
||||
mock_config.tool_parser = None
|
||||
mock_config.enable_mm_runtime = mock_config.model_config.enable_mm
|
||||
|
||||
client = EngineClient(
|
||||
pid=5678,
|
||||
@@ -1078,6 +1082,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
|
||||
mock_config = Mock()
|
||||
mock_config.model_config = mock_model_config
|
||||
mock_config.enable_mm_runtime = mock_model_config.enable_mm
|
||||
mock_config.eplb_config = Mock()
|
||||
mock_config.eplb_config.enable_eplb = False
|
||||
|
||||
@@ -1131,6 +1136,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
|
||||
mock_config = Mock()
|
||||
mock_config.model_config = mock_model_config
|
||||
mock_config.enable_mm_runtime = mock_model_config.enable_mm
|
||||
mock_config.eplb_config = Mock()
|
||||
mock_config.eplb_config.enable_eplb = False
|
||||
|
||||
@@ -1408,6 +1414,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
|
||||
mock_config = Mock()
|
||||
mock_config.model_config = mock_model_config
|
||||
mock_config.enable_mm_runtime = mock_model_config.enable_mm
|
||||
mock_config.eplb_config = Mock()
|
||||
mock_config.eplb_config.enable_eplb = False
|
||||
|
||||
|
||||
@@ -92,6 +92,7 @@ class DummyFDConfig:
|
||||
"max_model_len": 2048,
|
||||
"head_dim": 128,
|
||||
"num_hidden_layers": 2,
|
||||
"enable_mm": False,
|
||||
"causal": True,
|
||||
"start_layer_index": 0,
|
||||
"rope_3d": False,
|
||||
@@ -124,6 +125,8 @@ class DummyFDConfig:
|
||||
"model_type": "main",
|
||||
},
|
||||
)()
|
||||
self.enable_mm_runtime = self.model_config.enable_mm
|
||||
self.enable_rope_3d_runtime = self.model_config.enable_mm
|
||||
|
||||
|
||||
class DummyLayer:
|
||||
|
||||
@@ -78,6 +78,7 @@ class StubConfig:
|
||||
self.cache_config = CacheConfig()
|
||||
self.parallel_config = ParallelConfig()
|
||||
self.speculative_config = SpeculativeConfig()
|
||||
self.enable_mm_runtime = self.model_config.enable_mm
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -168,6 +169,7 @@ class TestChunkedPrefillDeterminism(unittest.TestCase):
|
||||
def _create_mm_resource_manager(self):
|
||||
config = StubConfig()
|
||||
config.model_config.enable_mm = True
|
||||
config.enable_mm_runtime = config.model_config.enable_mm
|
||||
return self._create_resource_manager(config)
|
||||
|
||||
# ==================== 1. Deterministic disabled ====================
|
||||
|
||||
@@ -64,6 +64,7 @@ class MockConfig:
|
||||
scheduler_config = SchedulerConfig()
|
||||
cache_config = CacheConfig()
|
||||
parallel_config = ParallelConfig()
|
||||
enable_mm_runtime = model_config.enable_mm
|
||||
|
||||
def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
|
||||
return 8192
|
||||
|
||||
@@ -83,6 +83,7 @@ def create_mock_config():
|
||||
fd_config.parallel_config = parallel_config
|
||||
fd_config.structured_outputs_config = structured_outputs_config
|
||||
fd_config.pad_to = 8
|
||||
fd_config.enable_mm_runtime = model_config.enable_mm
|
||||
|
||||
def get_max_chunk_tokens(mm_max_tokens_per_item=None):
|
||||
return 100
|
||||
|
||||
Reference in New Issue
Block a user