feat: add support for API usage with multimodal models (#4548)

* feat: add support for API usage with multimodal models

* completion_tokens contains num_image_tokens

* remove test_request.py

* fix: paddle.device.is_compiled_with_cuda()

* fix test_unstream_without_logprobs
This commit is contained in:
SunLei
2025-10-28 20:23:46 +08:00
committed by GitHub
parent e1ac90d787
commit 2a9ed72533
10 changed files with 256 additions and 21 deletions
@@ -388,6 +388,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
"reasoning_content": "Normal reasoning",
"tool_call": None,
"num_cached_tokens": 3,
"num_image_tokens": 2,
"raw_prediction": "raw_answer_0",
},
"finished": True,
@@ -403,6 +404,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
"tool_calls": None,
"raw_prediction": "raw_answer_0",
"num_cached_tokens": 3,
"num_image_tokens": 2,
"finish_reason": "stop",
},
},
@@ -415,6 +417,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
"reasoning_content": None,
"tool_call": None,
"num_cached_tokens": 0,
"num_image_tokens": 0,
"raw_prediction": None,
},
"finished": True,
@@ -430,6 +433,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
"tool_calls": None,
"raw_prediction": None,
"num_cached_tokens": 0,
"num_image_tokens": 0,
"finish_reason": "stop",
},
},
@@ -442,6 +446,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
mock_response_processor.enable_multimodal_content.return_value = False
completion_token_ids = [[], []]
num_cached_tokens = [0, 0]
num_image_tokens = [0, 0]
for idx, case in enumerate(test_cases):
actual_choice = await self.chat_serving._create_chat_completion_choice(
@@ -453,6 +458,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
prompt_tokens=prompt_tokens,
completion_token_ids=completion_token_ids[idx],
num_cached_tokens=num_cached_tokens,
num_image_tokens=num_image_tokens,
logprob_contents=logprob_contents,
response_processor=mock_response_processor,
)
@@ -468,6 +474,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
self.assertEqual(actual_choice.message.completion_token_ids, completion_token_ids[idx])
self.assertEqual(num_cached_tokens[expected["index"]], expected["num_cached_tokens"])
self.assertEqual(num_image_tokens[expected["index"]], expected["num_image_tokens"])
self.assertEqual(actual_choice.finish_reason, expected["finish_reason"])
assert actual_choice.logprobs is not None