[Optimization]Unified data processing for online and offline (#6891)

* remove process_request * fix chat * fix unit test * remove process response * fix unit test * fix offline decode * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * fix sampling_params --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-04-23 08:21:53 +08:00 · 2026-03-19 21:56:09 +08:00
parent c3d8db85c4
commit f4a79d4c00
19 changed files with 160 additions and 619 deletions
@@ -21,7 +21,6 @@ from unittest.mock import MagicMock, patch
 import numpy as np
 from PIL import Image

-from fastdeploy.engine.request import Request
 from fastdeploy.input.qwen3_vl_processor import Qwen3VLProcessor
 from fastdeploy.input.qwen3_vl_processor.process import sample_frames

@@ -127,9 +126,9 @@ class TestQwen3VLProcessor(unittest.TestCase):
        self.patcher_parse_image.stop()
        self.patcher_parse_video.stop()

-    def test_process_request(self):
+    def test_process_request_dict_with_multimodal(self):
        """
-        Test processing of Request object with multimodal input
+        Test processing of request dict with multimodal input

        Validates:
        1. Token ID lengths match position_ids and token_type_ids shapes
@@ -151,17 +150,16 @@ class TestQwen3VLProcessor(unittest.TestCase):
            ],
        }

-        request = Request.from_dict(message)
-        result = self.processor.process_request(request, 1024 * 100)
+        result = self.processor.process_request_dict(message, 1024 * 100)

-        self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["position_ids"].shape[0])
-        self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["token_type_ids"].shape[0])
+        self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["position_ids"].shape[0])
+        self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["token_type_ids"].shape[0])
        self.assertEqual(
-            result.multimodal_inputs["images"].shape[0],
-            sum(map(lambda x: x.prod(), result.multimodal_inputs["grid_thw"])),
+            result["multimodal_inputs"]["images"].shape[0],
+            sum(map(lambda x: x.prod(), result["multimodal_inputs"]["grid_thw"])),
        )
        self.assertEqual(
-            result.multimodal_inputs["image_type_ids"].shape[0], result.multimodal_inputs["grid_thw"][:, 0].sum()
+            result["multimodal_inputs"]["image_type_ids"].shape[0], result["multimodal_inputs"]["grid_thw"][:, 0].sum()
        )

    def test_process_request_dict(self):
@@ -224,17 +222,16 @@ class TestQwen3VLProcessor(unittest.TestCase):
            },
        }

-        request = Request.from_dict(prompt)
-        result = self.processor.process_request(request, 1024 * 100)
+        result = self.processor.process_request_dict(prompt, 1024 * 100)

-        self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["position_ids"].shape[0])
-        self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["token_type_ids"].shape[0])
+        self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["position_ids"].shape[0])
+        self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["token_type_ids"].shape[0])
        self.assertEqual(
-            result.multimodal_inputs["images"].shape[0],
-            sum(map(lambda x: x.prod(), result.multimodal_inputs["grid_thw"])),
+            result["multimodal_inputs"]["images"].shape[0],
+            sum(map(lambda x: x.prod(), result["multimodal_inputs"]["grid_thw"])),
        )
        self.assertEqual(
-            result.multimodal_inputs["image_type_ids"].shape[0], result.multimodal_inputs["grid_thw"][:, 0].sum()
+            result["multimodal_inputs"]["image_type_ids"].shape[0], result["multimodal_inputs"]["grid_thw"][:, 0].sum()
        )

    def test_message_and_prompt(self):
@@ -276,14 +273,15 @@ class TestQwen3VLProcessor(unittest.TestCase):
                "video": [{"video": b"123"}],
            },
        }
-        request2 = Request.from_dict(prompt)
-        result2 = self.processor.process_request(request2, 1024 * 100)
+        result2 = self.processor.process_request_dict(prompt, 1024 * 100)

        # Verify both processing methods produce identical results
-        self.assertEqual(result["prompt_token_ids"], result2.prompt_token_ids)
-        self.assertTrue(np.equal(result["multimodal_inputs"]["grid_thw"], result2.multimodal_inputs["grid_thw"]).all())
+        self.assertEqual(result["prompt_token_ids"], result2["prompt_token_ids"])
        self.assertTrue(
-            np.equal(result["multimodal_inputs"]["position_ids"], result2.multimodal_inputs["position_ids"]).all()
+            np.equal(result["multimodal_inputs"]["grid_thw"], result2["multimodal_inputs"]["grid_thw"]).all()
+        )
+        self.assertTrue(
+            np.equal(result["multimodal_inputs"]["position_ids"], result2["multimodal_inputs"]["position_ids"]).all()
        )

    def test_apply_chat_template(self):