Split enable_mm (#7183)

Co-authored-by: liuruian <liuruian@MacBook-Pro.local>
This commit is contained in:
K11OntheBoat
2026-04-08 11:25:41 +08:00
committed by GitHub
parent 8496ec71a6
commit bb48bcbaa2
33 changed files with 109 additions and 69 deletions
+2 -1
View File
@@ -92,6 +92,7 @@ class MockFDConfig:
model_config = MockModelConfig()
cache_config = MockCacheConfig()
speculative_config = MockSpecaulativeConfig()
enable_mm_runtime = MockModelConfig.enable_mm
def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
return 8192
@@ -139,7 +140,7 @@ class TestChunkedMoE(unittest.TestCase):
model_runner.model_config = mock_model_config
model_runner.cache_config = mock_cache_config
model_runner.attn_backends = [MockAttentionBackend()]
model_runner.enable_mm = True
model_runner.enable_mm = mock_fd_config.enable_mm_runtime
model_runner.cudagraph_only_prefill = False
model_runner.use_cudagraph = False
model_runner.speculative_decoding = False
+7
View File
@@ -102,6 +102,7 @@ def create_mock_fd_config(
mock_config.structured_outputs_config = Mock()
mock_config.structured_outputs_config.reasoning_parser = None
mock_config.tool_parser = None
mock_config.enable_mm_runtime = enable_mm
return mock_config
@@ -181,6 +182,7 @@ class TestEngineClient(unittest.IsolatedAsyncioTestCase):
mock_config.structured_outputs_config = Mock()
mock_config.structured_outputs_config.reasoning_parser = None
mock_config.node_rank = 0
mock_config.enable_mm_runtime = mock_model_config.enable_mm
# Create mocks for all the external dependencies
mock_input_processor = Mock()
@@ -363,6 +365,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
mock_config.structured_outputs_config = MagicMock() # Add this
mock_config.structured_outputs_config.reasoning_parser = None
mock_config.tool_parser = None # Add this attribute
mock_config.enable_mm_runtime = mock_model_config.enable_mm
# Mock IPCSignal to avoid file system dependencies
with patch("fastdeploy.entrypoints.engine_client.IPCSignal") as mock_ipcsignal:
@@ -655,6 +658,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
mock_config.structured_outputs_config = Mock()
mock_config.structured_outputs_config.reasoning_parser = None
mock_config.tool_parser = None
mock_config.enable_mm_runtime = mock_config.model_config.enable_mm
client = EngineClient(
pid=5678,
@@ -1078,6 +1082,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
mock_config = Mock()
mock_config.model_config = mock_model_config
mock_config.enable_mm_runtime = mock_model_config.enable_mm
mock_config.eplb_config = Mock()
mock_config.eplb_config.enable_eplb = False
@@ -1131,6 +1136,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
mock_config = Mock()
mock_config.model_config = mock_model_config
mock_config.enable_mm_runtime = mock_model_config.enable_mm
mock_config.eplb_config = Mock()
mock_config.eplb_config.enable_eplb = False
@@ -1408,6 +1414,7 @@ class TestEngineClientValidParameters(unittest.TestCase):
mock_config = Mock()
mock_config.model_config = mock_model_config
mock_config.enable_mm_runtime = mock_model_config.enable_mm
mock_config.eplb_config = Mock()
mock_config.eplb_config.enable_eplb = False
@@ -92,6 +92,7 @@ class DummyFDConfig:
"max_model_len": 2048,
"head_dim": 128,
"num_hidden_layers": 2,
"enable_mm": False,
"causal": True,
"start_layer_index": 0,
"rope_3d": False,
@@ -124,6 +125,8 @@ class DummyFDConfig:
"model_type": "main",
},
)()
self.enable_mm_runtime = self.model_config.enable_mm
self.enable_rope_3d_runtime = self.model_config.enable_mm
class DummyLayer:
@@ -78,6 +78,7 @@ class StubConfig:
self.cache_config = CacheConfig()
self.parallel_config = ParallelConfig()
self.speculative_config = SpeculativeConfig()
self.enable_mm_runtime = self.model_config.enable_mm
# ---------------------------------------------------------------------------
@@ -168,6 +169,7 @@ class TestChunkedPrefillDeterminism(unittest.TestCase):
def _create_mm_resource_manager(self):
config = StubConfig()
config.model_config.enable_mm = True
config.enable_mm_runtime = config.model_config.enable_mm
return self._create_resource_manager(config)
# ==================== 1. Deterministic disabled ====================
+1
View File
@@ -64,6 +64,7 @@ class MockConfig:
scheduler_config = SchedulerConfig()
cache_config = CacheConfig()
parallel_config = ParallelConfig()
enable_mm_runtime = model_config.enable_mm
def get_max_chunk_tokens(self, mm_max_tokens_per_item=None):
return 8192
@@ -83,6 +83,7 @@ def create_mock_config():
fd_config.parallel_config = parallel_config
fd_config.structured_outputs_config = structured_outputs_config
fd_config.pad_to = 8
fd_config.enable_mm_runtime = model_config.enable_mm
def get_max_chunk_tokens(mm_max_tokens_per_item=None):
return 100