mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
6e416c62dd
* to_request_for_infer initial commit * refact to from_chat_completion_request * preprocess use request initial commit * bugfix * processors refact to using request * bug fix * refact Request from_generic_request * post process initial commit * bugfix * postprocess second commit * bugfix * serving_embedding initial commit * serving_reward initial commit * bugfix * replace function name * async_llm initial commit * offline initial commit and fix bug * bugfix * fix async_llm * remove add speculate_metrics into data * fix logprobs bug * fix echo bug * fix bug * fix reasoning_max_tokens * bugfix * bugfix and modify unittest * bugfix and modify unit test * bugfix * bugfix * bugfix * modify unittest * fix error when reasong_content is none for text_processor * remove some unnessary logic * revert removed logic * implement add and set method for RequestOutput and refact code * modify unit test * modify unit test * union process_request and process_request_obj * remove a unit test * union process_response and process_response_obj * support qwen3_vl_processor * modify unittest and remove comments * fix prompt_logprobs * fix codestyle * add v1 * v1 * fix unit test * fix unit test * fix pre-commit * fix * add process request * add process request * fix * fix * fix unit test * fix unit test * fix unit test * fix unit test * fix unit test * remove file * add unit test * add unit test * add unit test * fix unit test * fix unit test * fix * fix --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Co-authored-by: luukunn <981429396@qq.com> Co-authored-by: luukunn <83932082+luukunn@users.noreply.github.com> Co-authored-by: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com>
148 lines
6.0 KiB
Python
148 lines
6.0 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
import os
|
|
import unittest
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, MagicMock, mock_open, patch
|
|
|
|
from fastdeploy.engine.sampling_params import SamplingParams
|
|
from fastdeploy.entrypoints.chat_utils import load_chat_template
|
|
from fastdeploy.entrypoints.llm import LLM
|
|
from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
|
|
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
|
|
|
|
|
|
class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
|
|
|
|
def setUp(self):
|
|
"""
|
|
Set up the test environment by creating an instance of the LLM class using Mock.
|
|
"""
|
|
self.input_chat_template = "unit test \n"
|
|
self.mock_engine = MagicMock()
|
|
self.tokenizer = MagicMock()
|
|
|
|
def test_load_chat_template_non(self):
|
|
result = load_chat_template(None)
|
|
self.assertEqual(None, result)
|
|
|
|
def test_load_chat_template_str(self):
|
|
result = load_chat_template(self.input_chat_template)
|
|
self.assertEqual(self.input_chat_template, result)
|
|
|
|
def test_load_chat_template_path(self):
|
|
with open("chat_template", "w", encoding="utf-8") as file:
|
|
file.write(self.input_chat_template)
|
|
file_path = os.path.join(os.getcwd(), "chat_template")
|
|
result = load_chat_template(file_path)
|
|
os.remove(file_path)
|
|
self.assertEqual(self.input_chat_template, result)
|
|
|
|
def test_load_chat_template_non_str_and_path(self):
|
|
with self.assertRaises(ValueError):
|
|
load_chat_template("unit test")
|
|
|
|
def test_path_with_literal_true(self):
|
|
with self.assertRaises(TypeError):
|
|
load_chat_template(Path("./chat_template"), is_literal=True)
|
|
|
|
def test_path_object_file_error(self):
|
|
with patch("builtins.open", mock_open()) as mock_file:
|
|
mock_file.side_effect = OSError("File error")
|
|
with self.assertRaises(OSError):
|
|
load_chat_template(Path("./chat_template"))
|
|
|
|
async def test_serving_chat(self):
|
|
request = ChatCompletionRequest(messages=[{"role": "user", "content": "你好"}])
|
|
self.chat_completion_handler = OpenAIServingChat(
|
|
self.mock_engine,
|
|
models=None,
|
|
pid=123,
|
|
ips=None,
|
|
max_waiting_time=-1,
|
|
chat_template=self.input_chat_template,
|
|
)
|
|
|
|
async def mock_chat_completion_full_generator(
|
|
request, request_id, model_name, prompt_token_ids, prompt_tokens, max_tokens_list
|
|
):
|
|
return prompt_token_ids
|
|
|
|
async def mock_format_and_add_data(current_req_obj):
|
|
current_req_obj["prompt_tokens"] = "你好"
|
|
return current_req_obj
|
|
|
|
self.chat_completion_handler.chat_completion_full_generator = mock_chat_completion_full_generator
|
|
self.chat_completion_handler.engine_client.format_and_add_data = mock_format_and_add_data
|
|
self.chat_completion_handler.engine_client.semaphore = AsyncMock()
|
|
self.chat_completion_handler.engine_client.semaphore.acquire = AsyncMock(return_value=None)
|
|
self.chat_completion_handler.engine_client.semaphore.status = MagicMock(return_value="mock_status")
|
|
chat_completiom = await self.chat_completion_handler.create_chat_completion(request)
|
|
print("1" * 50)
|
|
print(chat_completiom)
|
|
self.assertEqual(self.input_chat_template, chat_completiom["chat_template"])
|
|
|
|
async def test_serving_chat_cus(self):
|
|
request = ChatCompletionRequest(messages=[{"role": "user", "content": "hi"}], chat_template="hello")
|
|
self.chat_completion_handler = OpenAIServingChat(
|
|
self.mock_engine,
|
|
models=None,
|
|
pid=123,
|
|
ips=None,
|
|
max_waiting_time=10,
|
|
chat_template=self.input_chat_template,
|
|
)
|
|
|
|
async def mock_chat_completion_full_generator(
|
|
request, request_id, model_name, prompt_token_ids, prompt_tokens, max_tokens_list
|
|
):
|
|
return prompt_token_ids
|
|
|
|
async def mock_format_and_add_data(current_req_obj):
|
|
current_req_obj["prompt_tokens"] = "你好"
|
|
return current_req_obj
|
|
|
|
self.chat_completion_handler.chat_completion_full_generator = mock_chat_completion_full_generator
|
|
self.chat_completion_handler.engine_client.format_and_add_data = mock_format_and_add_data
|
|
self.chat_completion_handler.engine_client.semaphore = AsyncMock()
|
|
self.chat_completion_handler.engine_client.semaphore.acquire = AsyncMock(return_value=None)
|
|
self.chat_completion_handler.engine_client.semaphore.status = MagicMock(return_value="mock_status")
|
|
chat_completion = await self.chat_completion_handler.create_chat_completion(request)
|
|
self.assertEqual("hello", chat_completion["chat_template"])
|
|
|
|
@patch("fastdeploy.entrypoints.llm.LLM.__init__")
|
|
def test_llm(self, mock_class):
|
|
mock_class.return_value = None
|
|
llm = LLM()
|
|
llm.llm_engine = MagicMock()
|
|
llm.default_sampling_params = MagicMock()
|
|
|
|
def mock_run_engine(req_ids, **kwargs):
|
|
return req_ids
|
|
|
|
def mock_add_request(**kwargs):
|
|
return kwargs.get("chat_template")
|
|
|
|
llm._run_engine = mock_run_engine
|
|
llm._add_request = mock_add_request
|
|
result = llm.chat(["hello"], sampling_params=SamplingParams(1), chat_template="hello")
|
|
self.assertEqual("hello", result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|