mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
feat: add support for API usage with multimodal models (#4548)
* feat: add support for API usage with multimodal models * completion_tokens contains num_image_tokens * remove test_request.py * fix: paddle.device.is_compiled_with_cuda() * fix test_unstream_without_logprobs
This commit is contained in:
@@ -388,6 +388,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
"reasoning_content": "Normal reasoning",
|
||||
"tool_call": None,
|
||||
"num_cached_tokens": 3,
|
||||
"num_image_tokens": 2,
|
||||
"raw_prediction": "raw_answer_0",
|
||||
},
|
||||
"finished": True,
|
||||
@@ -403,6 +404,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
"tool_calls": None,
|
||||
"raw_prediction": "raw_answer_0",
|
||||
"num_cached_tokens": 3,
|
||||
"num_image_tokens": 2,
|
||||
"finish_reason": "stop",
|
||||
},
|
||||
},
|
||||
@@ -415,6 +417,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
"reasoning_content": None,
|
||||
"tool_call": None,
|
||||
"num_cached_tokens": 0,
|
||||
"num_image_tokens": 0,
|
||||
"raw_prediction": None,
|
||||
},
|
||||
"finished": True,
|
||||
@@ -430,6 +433,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
"tool_calls": None,
|
||||
"raw_prediction": None,
|
||||
"num_cached_tokens": 0,
|
||||
"num_image_tokens": 0,
|
||||
"finish_reason": "stop",
|
||||
},
|
||||
},
|
||||
@@ -442,6 +446,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
mock_response_processor.enable_multimodal_content.return_value = False
|
||||
completion_token_ids = [[], []]
|
||||
num_cached_tokens = [0, 0]
|
||||
num_image_tokens = [0, 0]
|
||||
|
||||
for idx, case in enumerate(test_cases):
|
||||
actual_choice = await self.chat_serving._create_chat_completion_choice(
|
||||
@@ -453,6 +458,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_token_ids=completion_token_ids[idx],
|
||||
num_cached_tokens=num_cached_tokens,
|
||||
num_image_tokens=num_image_tokens,
|
||||
logprob_contents=logprob_contents,
|
||||
response_processor=mock_response_processor,
|
||||
)
|
||||
@@ -468,6 +474,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
self.assertEqual(actual_choice.message.completion_token_ids, completion_token_ids[idx])
|
||||
|
||||
self.assertEqual(num_cached_tokens[expected["index"]], expected["num_cached_tokens"])
|
||||
self.assertEqual(num_image_tokens[expected["index"]], expected["num_image_tokens"])
|
||||
self.assertEqual(actual_choice.finish_reason, expected["finish_reason"])
|
||||
assert actual_choice.logprobs is not None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user