[Optimization]Unified data processing for online and offline (#6891)

* remove process_request

* fix chat

* fix unit test

* remove process response

* fix unit test

* fix offline decode

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* fix sampling_params

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
luukunn
2026-03-19 21:56:09 +08:00
committed by GitHub
parent c3d8db85c4
commit f4a79d4c00
19 changed files with 160 additions and 619 deletions
+20 -22
View File
@@ -21,7 +21,6 @@ from unittest.mock import MagicMock, patch
import numpy as np
from PIL import Image
from fastdeploy.engine.request import Request
from fastdeploy.input.qwen3_vl_processor import Qwen3VLProcessor
from fastdeploy.input.qwen3_vl_processor.process import sample_frames
@@ -127,9 +126,9 @@ class TestQwen3VLProcessor(unittest.TestCase):
self.patcher_parse_image.stop()
self.patcher_parse_video.stop()
def test_process_request(self):
def test_process_request_dict_with_multimodal(self):
"""
Test processing of Request object with multimodal input
Test processing of request dict with multimodal input
Validates:
1. Token ID lengths match position_ids and token_type_ids shapes
@@ -151,17 +150,16 @@ class TestQwen3VLProcessor(unittest.TestCase):
],
}
request = Request.from_dict(message)
result = self.processor.process_request(request, 1024 * 100)
result = self.processor.process_request_dict(message, 1024 * 100)
self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["position_ids"].shape[0])
self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["token_type_ids"].shape[0])
self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["position_ids"].shape[0])
self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["token_type_ids"].shape[0])
self.assertEqual(
result.multimodal_inputs["images"].shape[0],
sum(map(lambda x: x.prod(), result.multimodal_inputs["grid_thw"])),
result["multimodal_inputs"]["images"].shape[0],
sum(map(lambda x: x.prod(), result["multimodal_inputs"]["grid_thw"])),
)
self.assertEqual(
result.multimodal_inputs["image_type_ids"].shape[0], result.multimodal_inputs["grid_thw"][:, 0].sum()
result["multimodal_inputs"]["image_type_ids"].shape[0], result["multimodal_inputs"]["grid_thw"][:, 0].sum()
)
def test_process_request_dict(self):
@@ -224,17 +222,16 @@ class TestQwen3VLProcessor(unittest.TestCase):
},
}
request = Request.from_dict(prompt)
result = self.processor.process_request(request, 1024 * 100)
result = self.processor.process_request_dict(prompt, 1024 * 100)
self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["position_ids"].shape[0])
self.assertEqual(result.prompt_token_ids_len, result.multimodal_inputs["token_type_ids"].shape[0])
self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["position_ids"].shape[0])
self.assertEqual(len(result["prompt_token_ids"]), result["multimodal_inputs"]["token_type_ids"].shape[0])
self.assertEqual(
result.multimodal_inputs["images"].shape[0],
sum(map(lambda x: x.prod(), result.multimodal_inputs["grid_thw"])),
result["multimodal_inputs"]["images"].shape[0],
sum(map(lambda x: x.prod(), result["multimodal_inputs"]["grid_thw"])),
)
self.assertEqual(
result.multimodal_inputs["image_type_ids"].shape[0], result.multimodal_inputs["grid_thw"][:, 0].sum()
result["multimodal_inputs"]["image_type_ids"].shape[0], result["multimodal_inputs"]["grid_thw"][:, 0].sum()
)
def test_message_and_prompt(self):
@@ -276,14 +273,15 @@ class TestQwen3VLProcessor(unittest.TestCase):
"video": [{"video": b"123"}],
},
}
request2 = Request.from_dict(prompt)
result2 = self.processor.process_request(request2, 1024 * 100)
result2 = self.processor.process_request_dict(prompt, 1024 * 100)
# Verify both processing methods produce identical results
self.assertEqual(result["prompt_token_ids"], result2.prompt_token_ids)
self.assertTrue(np.equal(result["multimodal_inputs"]["grid_thw"], result2.multimodal_inputs["grid_thw"]).all())
self.assertEqual(result["prompt_token_ids"], result2["prompt_token_ids"])
self.assertTrue(
np.equal(result["multimodal_inputs"]["position_ids"], result2.multimodal_inputs["position_ids"]).all()
np.equal(result["multimodal_inputs"]["grid_thw"], result2["multimodal_inputs"]["grid_thw"]).all()
)
self.assertTrue(
np.equal(result["multimodal_inputs"]["position_ids"], result2["multimodal_inputs"]["position_ids"]).all()
)
def test_apply_chat_template(self):