mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
【Fix】 remove text_after_process & raw_prediction (#4421)
* remove text_after_process & raw_prediction * remove text_after_process & raw_prediction
This commit is contained in:
@@ -71,5 +71,5 @@ curl -X POST "http://0.0.0.0:8188/v1/chat/completions" \
|
||||
|
||||
### 3. Successfully returns the result
|
||||
```json
|
||||
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
||||
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
||||
```
|
||||
|
||||
@@ -231,8 +231,18 @@ ChatMessage:
|
||||
role: str
|
||||
content: str
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
ToolCall:
|
||||
id: str = None
|
||||
type: Literal["function"] = "function"
|
||||
function: FunctionCall
|
||||
FunctionCall:
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
# Fields returned for streaming responses
|
||||
ChatCompletionStreamResponse:
|
||||
@@ -254,6 +264,17 @@ DeltaMessage:
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
DeltaToolCall:
|
||||
id: Optional[str] = None
|
||||
type: Optional[Literal["function"]] = None
|
||||
index: int
|
||||
function: Optional[DeltaFunctionCall] = None
|
||||
DeltaFunctionCall:
|
||||
name: Optional[str] = None
|
||||
arguments: Optional[str] = None
|
||||
```
|
||||
|
||||
## Completion API
|
||||
@@ -384,10 +405,20 @@ CompletionResponseChoice:
|
||||
text: str
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
arrival_time: Optional[float] = None
|
||||
logprobs: Optional[int] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
ToolCall:
|
||||
id: str = None
|
||||
type: Literal["function"] = "function"
|
||||
function: FunctionCall
|
||||
FunctionCall:
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
# Fields returned for streaming responses
|
||||
CompletionStreamResponse:
|
||||
@@ -403,8 +434,18 @@ CompletionResponseStreamChoice:
|
||||
arrival_time: float = None
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
logprobs: Optional[float] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
||||
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
DeltaToolCall:
|
||||
id: Optional[str] = None
|
||||
type: Optional[Literal["function"]] = None
|
||||
index: int
|
||||
function: Optional[DeltaFunctionCall] = None
|
||||
DeltaFunctionCall:
|
||||
name: Optional[str] = None
|
||||
arguments: Optional[str] = None
|
||||
```
|
||||
|
||||
@@ -71,5 +71,5 @@ curl -X POST "http://0.0.0.0:8188/v1/chat/completions" \
|
||||
|
||||
### 3. 成功返回结果
|
||||
```json
|
||||
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
||||
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
||||
```
|
||||
|
||||
@@ -230,8 +230,18 @@ ChatMessage:
|
||||
role: str
|
||||
content: str
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
ToolCall:
|
||||
id: str = None
|
||||
type: Literal["function"] = "function"
|
||||
function: FunctionCall
|
||||
FunctionCall:
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
# 返回流式响应的字段
|
||||
ChatCompletionStreamResponse:
|
||||
@@ -253,6 +263,17 @@ DeltaMessage:
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
DeltaToolCall:
|
||||
id: Optional[str] = None
|
||||
type: Optional[Literal["function"]] = None
|
||||
index: int
|
||||
function: Optional[DeltaFunctionCall] = None
|
||||
DeltaFunctionCall:
|
||||
name: Optional[str] = None
|
||||
arguments: Optional[str] = None
|
||||
```
|
||||
|
||||
## Completion API
|
||||
@@ -380,10 +401,20 @@ CompletionResponseChoice:
|
||||
text: str
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
arrival_time: Optional[float] = None
|
||||
logprobs: Optional[int] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
ToolCall:
|
||||
id: str = None
|
||||
type: Literal["function"] = "function"
|
||||
function: FunctionCall
|
||||
FunctionCall:
|
||||
name: str
|
||||
arguments: str
|
||||
|
||||
# 返回流式响应的字段
|
||||
CompletionStreamResponse:
|
||||
@@ -399,8 +430,18 @@ CompletionResponseStreamChoice:
|
||||
arrival_time: float = None
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
logprobs: Optional[float] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
||||
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
DeltaToolCall:
|
||||
id: Optional[str] = None
|
||||
type: Optional[Literal["function"]] = None
|
||||
index: int
|
||||
function: Optional[DeltaFunctionCall] = None
|
||||
DeltaFunctionCall:
|
||||
name: Optional[str] = None
|
||||
arguments: Optional[str] = None
|
||||
```
|
||||
|
||||
@@ -193,8 +193,6 @@ class ChatMessage(BaseModel):
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
text_after_process: Optional[str] = None
|
||||
raw_prediction: Optional[str] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
|
||||
@@ -255,8 +253,6 @@ class DeltaMessage(BaseModel):
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
text_after_process: Optional[str] = None
|
||||
raw_prediction: Optional[str] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
|
||||
@@ -295,8 +291,6 @@ class CompletionResponseChoice(BaseModel):
|
||||
text: str
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
text_after_process: Optional[str] = None
|
||||
raw_prediction: Optional[str] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
arrival_time: Optional[float] = None
|
||||
@@ -341,8 +335,6 @@ class CompletionResponseStreamChoice(BaseModel):
|
||||
logprobs: Optional[CompletionLogprobs] = None
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
text_after_process: Optional[str] = None
|
||||
raw_prediction: Optional[str] = None
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
|
||||
@@ -118,14 +118,14 @@ class OpenAIServingChat:
|
||||
else:
|
||||
request_id = f"chatcmpl-{uuid.uuid4()}"
|
||||
api_server_logger.info(f"create chat completion request: {request_id}")
|
||||
text_after_process = None
|
||||
prompt_tokens = None
|
||||
try:
|
||||
current_req_dict = request.to_dict_for_infer(request_id)
|
||||
if "chat_template" not in current_req_dict:
|
||||
current_req_dict["chat_template"] = self.chat_template
|
||||
current_req_dict["arrival_time"] = time.time()
|
||||
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict)
|
||||
text_after_process = current_req_dict.get("text_after_process")
|
||||
prompt_tokens = current_req_dict.get("prompt_tokens")
|
||||
if isinstance(prompt_token_ids, np.ndarray):
|
||||
prompt_token_ids = prompt_token_ids.tolist()
|
||||
except ParameterError as e:
|
||||
@@ -143,12 +143,12 @@ class OpenAIServingChat:
|
||||
|
||||
if request.stream:
|
||||
return self.chat_completion_stream_generator(
|
||||
request, request_id, request.model, prompt_token_ids, text_after_process
|
||||
request, request_id, request.model, prompt_token_ids, prompt_tokens
|
||||
)
|
||||
else:
|
||||
try:
|
||||
return await self.chat_completion_full_generator(
|
||||
request, request_id, request.model, prompt_token_ids, text_after_process
|
||||
request, request_id, request.model, prompt_token_ids, prompt_tokens
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
|
||||
@@ -175,7 +175,7 @@ class OpenAIServingChat:
|
||||
request_id: str,
|
||||
model_name: str,
|
||||
prompt_token_ids: list(),
|
||||
text_after_process: str,
|
||||
prompt_tokens: str,
|
||||
):
|
||||
"""
|
||||
Streaming chat completion generator.
|
||||
@@ -289,8 +289,7 @@ class OpenAIServingChat:
|
||||
|
||||
if request.return_token_ids:
|
||||
choice.delta.prompt_token_ids = list(prompt_token_ids)
|
||||
choice.delta.text_after_process = text_after_process
|
||||
choice.delta.prompt_tokens = text_after_process
|
||||
choice.delta.prompt_tokens = prompt_tokens
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=request_id,
|
||||
object=chunk_object_type,
|
||||
@@ -368,8 +367,7 @@ class OpenAIServingChat:
|
||||
choice.delta.multimodal_content[0]["completion_token_ids"] = list(output["token_ids"])
|
||||
else:
|
||||
choice.delta.completion_token_ids = list(output["token_ids"])
|
||||
choice.delta.raw_prediction = output.get("raw_prediction")
|
||||
choice.delta.completion_tokens = output.get("raw_prediction")
|
||||
choice.delta.completion_tokens = output.get("completion_tokens")
|
||||
if include_continuous_usage:
|
||||
chunk.usage = UsageInfo(
|
||||
prompt_tokens=num_prompt_tokens,
|
||||
@@ -419,7 +417,7 @@ class OpenAIServingChat:
|
||||
request_id: str,
|
||||
model_name: str,
|
||||
prompt_token_ids: list(),
|
||||
text_after_process: str,
|
||||
prompt_tokens: str,
|
||||
):
|
||||
"""
|
||||
Full chat completion generator.
|
||||
@@ -509,10 +507,8 @@ class OpenAIServingChat:
|
||||
tool_calls=output.get("tool_call"),
|
||||
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
|
||||
completion_token_ids=completion_token_ids if request.return_token_ids else None,
|
||||
text_after_process=text_after_process if request.return_token_ids else None,
|
||||
prompt_tokens=text_after_process if request.return_token_ids else None,
|
||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
prompt_tokens=prompt_tokens if request.return_token_ids else None,
|
||||
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
|
||||
)
|
||||
|
||||
if response_processor.enable_multimodal_content():
|
||||
|
||||
@@ -132,7 +132,7 @@ class OpenAIServingCompletion:
|
||||
num_choices = len(request_prompts)
|
||||
api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}")
|
||||
prompt_batched_token_ids = []
|
||||
text_after_process_list = []
|
||||
prompt_tokens_list = []
|
||||
try:
|
||||
if self.max_waiting_time < 0:
|
||||
await self.engine_client.semaphore.acquire()
|
||||
@@ -157,7 +157,7 @@ class OpenAIServingCompletion:
|
||||
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict) # tokenize
|
||||
if isinstance(prompt_token_ids, np.ndarray):
|
||||
prompt_token_ids = prompt_token_ids.tolist()
|
||||
text_after_process_list.append(current_req_dict.get("text_after_process"))
|
||||
prompt_tokens_list.append(current_req_dict.get("prompt_tokens"))
|
||||
prompt_batched_token_ids.append(prompt_token_ids)
|
||||
del current_req_dict
|
||||
except ParameterError as e:
|
||||
@@ -180,7 +180,7 @@ class OpenAIServingCompletion:
|
||||
created_time=created_time,
|
||||
model_name=request.model,
|
||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||
text_after_process_list=text_after_process_list,
|
||||
prompt_tokens_list=prompt_tokens_list,
|
||||
)
|
||||
else:
|
||||
try:
|
||||
@@ -191,7 +191,7 @@ class OpenAIServingCompletion:
|
||||
created_time=created_time,
|
||||
model_name=request.model,
|
||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||
text_after_process_list=text_after_process_list,
|
||||
prompt_tokens_list=prompt_tokens_list,
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
@@ -213,7 +213,7 @@ class OpenAIServingCompletion:
|
||||
created_time: int,
|
||||
model_name: str,
|
||||
prompt_batched_token_ids: list(),
|
||||
text_after_process_list: list(),
|
||||
prompt_tokens_list: list(),
|
||||
):
|
||||
"""
|
||||
Process the full completion request with multiple choices.
|
||||
@@ -292,7 +292,7 @@ class OpenAIServingCompletion:
|
||||
model_name=model_name,
|
||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||
completion_batched_token_ids=completion_batched_token_ids,
|
||||
text_after_process_list=text_after_process_list,
|
||||
prompt_tokens_list=prompt_tokens_list,
|
||||
)
|
||||
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
||||
return res
|
||||
@@ -344,7 +344,7 @@ class OpenAIServingCompletion:
|
||||
created_time: int,
|
||||
model_name: str,
|
||||
prompt_batched_token_ids: list(),
|
||||
text_after_process_list: list(),
|
||||
prompt_tokens_list: list(),
|
||||
):
|
||||
"""
|
||||
Process the stream completion request.
|
||||
@@ -408,8 +408,7 @@ class OpenAIServingCompletion:
|
||||
index=idx,
|
||||
text="",
|
||||
prompt_token_ids=list(prompt_batched_token_ids[idx]),
|
||||
text_after_process=text_after_process_list[idx],
|
||||
prompt_tokens=text_after_process_list[idx],
|
||||
prompt_tokens=prompt_tokens_list[idx],
|
||||
completion_token_ids=None,
|
||||
)
|
||||
],
|
||||
@@ -443,8 +442,7 @@ class OpenAIServingCompletion:
|
||||
prompt_token_ids=None,
|
||||
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
|
||||
tool_calls=None,
|
||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
|
||||
reasoning_content="",
|
||||
arrival_time=arrival_time,
|
||||
logprobs=logprobs_res,
|
||||
@@ -522,7 +520,7 @@ class OpenAIServingCompletion:
|
||||
model_name: str,
|
||||
prompt_batched_token_ids: list(),
|
||||
completion_batched_token_ids: list(),
|
||||
text_after_process_list: list(),
|
||||
prompt_tokens_list: list(),
|
||||
) -> CompletionResponse:
|
||||
choices: List[CompletionResponseChoice] = []
|
||||
num_prompt_tokens = 0
|
||||
@@ -556,10 +554,8 @@ class OpenAIServingCompletion:
|
||||
text=output_text,
|
||||
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
|
||||
completion_token_ids=completion_token_ids if request.return_token_ids else None,
|
||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
|
||||
text_after_process=text_after_process_list[idx] if request.return_token_ids else None,
|
||||
prompt_tokens=text_after_process_list[idx] if request.return_token_ids else None,
|
||||
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
|
||||
prompt_tokens=prompt_tokens_list[idx] if request.return_token_ids else None,
|
||||
reasoning_content=output.get("reasoning_content"),
|
||||
tool_calls=output.get("tool_call"),
|
||||
logprobs=aggregated_logprobs,
|
||||
|
||||
@@ -197,7 +197,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
||||
if isinstance(prompt, list): # if prompt is a token id list
|
||||
request["prompt_token_ids"] = prompt
|
||||
else:
|
||||
request["text_after_process"] = prompt
|
||||
request["prompt_tokens"] = prompt
|
||||
tokens = self.tokenizer.tokenize(prompt)
|
||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||
request["prompt_token_ids"] = token_ids
|
||||
@@ -318,7 +318,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
||||
if tool_call_info.tools_called:
|
||||
response_dict["outputs"]["tool_call"] = tool_call_info.tool_calls
|
||||
response_dict["outputs"]["text"] = tool_call_info.content
|
||||
response_dict["outputs"]["raw_prediction"] = full_text
|
||||
response_dict["outputs"]["completion_tokens"] = full_text
|
||||
data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
|
||||
del self.decode_status[req_id]
|
||||
return response_dict
|
||||
@@ -342,7 +342,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
||||
if token_ids[-1] == self.tokenizer.eos_token_id:
|
||||
token_ids = token_ids[:-1]
|
||||
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||
response_dict["outputs"]["raw_prediction"] = delta_text
|
||||
response_dict["outputs"]["completion_tokens"] = delta_text
|
||||
if self.reasoning_parser and (
|
||||
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
|
||||
):
|
||||
@@ -398,7 +398,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
||||
add_special_tokens=False,
|
||||
**kwargs,
|
||||
)
|
||||
request_or_messages["text_after_process"] = spliced_message
|
||||
request_or_messages["prompt_tokens"] = spliced_message
|
||||
req_id = None
|
||||
if isinstance(request_or_messages, dict):
|
||||
req_id = request_or_messages.get("request_id", None)
|
||||
|
||||
@@ -222,7 +222,7 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
||||
self._check_mm_limits(multimodal_data)
|
||||
images = multimodal_data.get("image", None)
|
||||
videos = multimodal_data.get("video", None)
|
||||
request["text_after_process"] = request.get("prompt")
|
||||
request["prompt_tokens"] = request.get("prompt")
|
||||
outputs = self.ernie4_5_processor.text2ids(request["prompt"], images, videos)
|
||||
elif request.get("messages"):
|
||||
messages = request["messages"]
|
||||
|
||||
@@ -503,7 +503,7 @@ class DataProcessor:
|
||||
prompt_token_str = prompt_token_template.replace("<|image@placeholder|>", "").replace(
|
||||
"<|video@placeholder|>", ""
|
||||
)
|
||||
request["text_after_process"] = prompt_token_template
|
||||
request["prompt_tokens"] = prompt_token_template
|
||||
tokens = self.tokenizer.tokenize(prompt_token_str)
|
||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||
data_processor_logger.info(
|
||||
|
||||
@@ -495,7 +495,7 @@ class DataProcessor:
|
||||
add_generation_prompt=request.get("add_generation_prompt", True),
|
||||
)
|
||||
prompt_token_str = raw_prompt.replace(self.image_token, "").replace(self.video_token, "")
|
||||
request["text_after_process"] = raw_prompt
|
||||
request["prompt_tokens"] = raw_prompt
|
||||
|
||||
tokens = self.tokenizer.tokenize(prompt_token_str)
|
||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||
|
||||
@@ -403,7 +403,7 @@ class DataProcessor(BaseDataProcessor):
|
||||
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||
if is_end:
|
||||
full_text = previous_texts + delta_text
|
||||
response_dict["outputs"]["raw_prediction"] = full_text
|
||||
response_dict["outputs"]["completion_tokens"] = full_text
|
||||
if enable_thinking and self.reasoning_parser:
|
||||
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
|
||||
response_dict["outputs"]["text"] = text
|
||||
@@ -439,7 +439,7 @@ class DataProcessor(BaseDataProcessor):
|
||||
if token_ids[-1] in self.eos_token_ids:
|
||||
token_ids = token_ids[:-1]
|
||||
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||
response_dict["outputs"]["raw_prediction"] = delta_text
|
||||
response_dict["outputs"]["completion_tokens"] = delta_text
|
||||
if self.reasoning_parser and (
|
||||
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
|
||||
):
|
||||
@@ -548,7 +548,7 @@ class DataProcessor(BaseDataProcessor):
|
||||
return_tensors="pd",
|
||||
**kwargs,
|
||||
)
|
||||
request["text_after_process"] = spliced_message
|
||||
request["prompt_tokens"] = spliced_message
|
||||
req_id = None
|
||||
tokens = self.tokenizer.tokenize(spliced_message)
|
||||
if isinstance(request, dict):
|
||||
|
||||
@@ -14,10 +14,10 @@ from core import TEMPLATE, URL, build_request_payload, send_request
|
||||
COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
|
||||
|
||||
|
||||
def test_completion_stream_text_after_process_raw_prediction():
|
||||
def test_completion_stream_prompt_tokens_completion_tokens():
|
||||
"""
|
||||
/v1/completions接口, stream=True
|
||||
返回属性"text_after_process"和"reasoning_content"
|
||||
return "prompt_tokens"和"reasoning_content"
|
||||
"""
|
||||
data = {
|
||||
"prompt": "你是谁",
|
||||
@@ -39,55 +39,55 @@ def test_completion_stream_text_after_process_raw_prediction():
|
||||
|
||||
choice = response_data["choices"][0]
|
||||
if "prompt_token_ids" in choice and choice["prompt_token_ids"] is not None:
|
||||
text_after_process = choice["text_after_process"]
|
||||
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
|
||||
prompt_tokens = choice["prompt_tokens"]
|
||||
assert data["prompt"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||
else:
|
||||
raw_prediction = choice["raw_prediction"]
|
||||
completion_tokens = choice["completion_tokens"]
|
||||
reasoning_content = choice["reasoning_content"]
|
||||
text = choice["text"]
|
||||
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
|
||||
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
|
||||
if "finish_reason" in line.strip():
|
||||
break
|
||||
|
||||
|
||||
def test_completion_text_after_process_raw_predictio_return_token_ids():
|
||||
def test_completion_prompt_tokens_completion_tokens_return_token_ids():
|
||||
"""
|
||||
/v1/completions接口,非流式接口
|
||||
返回属性"text_after_process"和"reasoning_content"
|
||||
return "prompt_tokens"和"reasoning_content"
|
||||
"""
|
||||
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50, "return_token_ids": True}
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(COMPLETIONS_URL, payload).json()
|
||||
|
||||
text_after_process = resp["choices"][0]["text_after_process"]
|
||||
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
|
||||
prompt_tokens = resp["choices"][0]["prompt_tokens"]
|
||||
assert data["prompt"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||
|
||||
raw_prediction = resp["choices"][0]["raw_prediction"]
|
||||
completion_tokens = resp["choices"][0]["completion_tokens"]
|
||||
reasoning_content = resp["choices"][0]["reasoning_content"]
|
||||
text = resp["choices"][0]["text"]
|
||||
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
|
||||
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
|
||||
|
||||
|
||||
def test_completion_text_after_process_raw_prediction():
|
||||
def test_completion_prompt_tokens_completion_tokens():
|
||||
"""
|
||||
/v1/completions接口,无return_token_ids参数
|
||||
非流式接口中,无return token ids 属性"text_after_process"和"reasoning_content"值为null
|
||||
非流式接口中,无return token ids 属性"prompt_tokens"和"reasoning_content"值为null
|
||||
"""
|
||||
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50}
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(COMPLETIONS_URL, payload).json()
|
||||
|
||||
text_after_process = resp["choices"][0]["text_after_process"]
|
||||
assert text_after_process is None, "text_after_process取值结果不正确"
|
||||
prompt_tokens = resp["choices"][0]["prompt_tokens"]
|
||||
assert prompt_tokens is None, "prompt_tokens取值结果不正确"
|
||||
|
||||
raw_prediction = resp["choices"][0]["raw_prediction"]
|
||||
assert raw_prediction is None, "raw_prediction取值结果不正确"
|
||||
completion_tokens = resp["choices"][0]["completion_tokens"]
|
||||
assert completion_tokens is None, "completion_tokens取值结果不正确"
|
||||
|
||||
|
||||
def test_stream_text_after_process_raw_prediction():
|
||||
def test_stream_prompt_tokens_completion_tokens():
|
||||
"""
|
||||
/v1/chat/completions接口,"stream": True
|
||||
返回属性"text_after_process"和"reasoning_content"
|
||||
返回属性"prompt_tokens"和"reasoning_content"
|
||||
"""
|
||||
data = {
|
||||
"messages": [{"role": "user", "content": "你是谁"}],
|
||||
@@ -109,21 +109,21 @@ def test_stream_text_after_process_raw_prediction():
|
||||
|
||||
choice = response_data["choices"][0]
|
||||
if "prompt_token_ids" in choice["delta"] and choice["delta"]["prompt_token_ids"] is not None:
|
||||
text_after_process = choice["delta"]["text_after_process"]
|
||||
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
|
||||
prompt_tokens = choice["delta"]["prompt_tokens"]
|
||||
assert data["messages"][0]["content"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||
else:
|
||||
raw_prediction = choice["delta"]["raw_prediction"]
|
||||
completion_tokens = choice["delta"]["completion_tokens"]
|
||||
reasoning_content = choice["delta"]["reasoning_content"]
|
||||
content = choice["delta"]["content"]
|
||||
assert reasoning_content or content in raw_prediction, "raw_prediction取值结果不正确"
|
||||
assert reasoning_content or content in completion_tokens, "completion_tokens取值结果不正确"
|
||||
if "finish_reason" in line.strip():
|
||||
break
|
||||
|
||||
|
||||
def test_text_after_process_raw_prediction_return_token_ids():
|
||||
def test_prompt_tokens_completion_tokens_return_token_ids():
|
||||
"""
|
||||
/v1/chat/completions接口,非流式接口
|
||||
返回属性"text_after_process"和"reasoning_content"
|
||||
返回属性"prompt_tokens"和"reasoning_content"
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
@@ -136,19 +136,19 @@ def test_text_after_process_raw_prediction_return_token_ids():
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(URL, payload).json()
|
||||
|
||||
text_after_process = resp["choices"][0]["message"]["text_after_process"]
|
||||
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
|
||||
prompt_tokens = resp["choices"][0]["message"]["prompt_tokens"]
|
||||
assert data["messages"][0]["content"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||
|
||||
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
|
||||
completion_tokens = resp["choices"][0]["message"]["completion_tokens"]
|
||||
reasoning_content = resp["choices"][0]["message"]["reasoning_content"]
|
||||
text = resp["choices"][0]["message"]["content"]
|
||||
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
|
||||
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
|
||||
|
||||
|
||||
def test_text_after_process_raw_prediction():
|
||||
def test_prompt_tokens_completion_tokens():
|
||||
"""
|
||||
/v1/chat/completions接口,无return_token_ids参数
|
||||
无return token ids 属性"text_after_process"和"reasoning_content"值为null
|
||||
无return token ids 属性"prompt_tokens"和"reasoning_content"值为null
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
@@ -160,8 +160,8 @@ def test_text_after_process_raw_prediction():
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(URL, payload).json()
|
||||
|
||||
text_after_process = resp["choices"][0]["message"]["text_after_process"]
|
||||
assert text_after_process is None, "text_after_process取值结果不正确"
|
||||
prompt_tokens = resp["choices"][0]["message"]["prompt_tokens"]
|
||||
assert prompt_tokens is None, "prompt_tokens取值结果不正确"
|
||||
|
||||
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
|
||||
assert raw_prediction is None, "raw_prediction取值结果不正确"
|
||||
completion_tokens = resp["choices"][0]["message"]["completion_tokens"]
|
||||
assert completion_tokens is None, "completion_tokens取值结果不正确"
|
||||
|
||||
@@ -57,7 +57,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
||||
model_name="test_model",
|
||||
prompt_batched_token_ids=[[1, 2]],
|
||||
completion_batched_token_ids=[[3, 4, 5]],
|
||||
text_after_process_list=["test prompt"],
|
||||
prompt_tokens_list=["test prompt"],
|
||||
)
|
||||
|
||||
self.assertEqual(response.choices[0].text, "test prompt generated text")
|
||||
@@ -90,7 +90,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
||||
model_name="test_model",
|
||||
prompt_batched_token_ids=[[1, 2]],
|
||||
completion_batched_token_ids=[[3, 4, 5]],
|
||||
text_after_process_list=["test prompt"],
|
||||
prompt_tokens_list=["test prompt"],
|
||||
)
|
||||
self.assertEqual(response.choices[0].text, "decoded_[1, 2, 3] generated text")
|
||||
|
||||
@@ -123,7 +123,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
||||
model_name="test_model",
|
||||
prompt_batched_token_ids=[[1], [2]],
|
||||
completion_batched_token_ids=[[1, 2], [3, 4]],
|
||||
text_after_process_list=["prompt1", "prompt2"],
|
||||
prompt_tokens_list=["prompt1", "prompt2"],
|
||||
)
|
||||
|
||||
self.assertEqual(len(response.choices), 2)
|
||||
@@ -159,7 +159,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
||||
model_name="test_model",
|
||||
prompt_batched_token_ids=[[1], [2]],
|
||||
completion_batched_token_ids=[[1, 2], [3, 4]],
|
||||
text_after_process_list=["prompt1", "prompt2"],
|
||||
prompt_tokens_list=["prompt1", "prompt2"],
|
||||
)
|
||||
|
||||
self.assertEqual(len(response.choices), 2)
|
||||
|
||||
@@ -160,7 +160,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
request_id="test-request-id",
|
||||
model_name="test-model",
|
||||
prompt_token_ids=[1, 2, 3],
|
||||
text_after_process="Hello",
|
||||
prompt_tokens="Hello",
|
||||
)
|
||||
|
||||
chunks = []
|
||||
@@ -242,7 +242,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
model_name="test-model",
|
||||
created_time=11,
|
||||
prompt_batched_token_ids=[[1, 2, 3]],
|
||||
text_after_process_list=["Hello"],
|
||||
prompt_tokens_list=["Hello"],
|
||||
)
|
||||
|
||||
chunks = []
|
||||
|
||||
@@ -54,8 +54,8 @@ INVALID_INPUT_BATCH = """
|
||||
"""
|
||||
|
||||
BATCH_RESPONSE = """
|
||||
{"id":"fastdeploy-7fcc30e2e4334fca806c4d01ee7ac4ab","custom_id":"req-00001","response":{"status_code":200,"request_id":"fastdeploy-batch-5f4017beded84b15aa3a8b0f1fce154c","body":{"id":"chatcmpl-33b09ae5-a8f1-40ad-9110-efa2b381eac9","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"In a sunlit meadow where dreams bloom,\\nA gentle breeze carries the breeze,\\nThe leaves rustle like ancient letters,\\nAnd in the sky, a song of hope and love.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":19,"total_tokens":60,"completion_tokens":41,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
||||
{"id":"fastdeploy-bf549849df2145598ae1758ba260f784","custom_id":"req-00002","response":{"status_code":200,"request_id":"fastdeploy-batch-81223f12fdc345efbfe85114ced10a1d","body":{"id":"chatcmpl-9479e36c-1542-45ff-b364-1dc6d34be9e7","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"Based on the given text, here are some possible actions you can take:\\n\\n1. **Read the question**: To understand what you can do, you can read the question (id=2) and analyze its requirements or constraints.\\n2. **Identify the keywords**: Look for specific keywords or phrases that describe what you can do. For example, if the question mentions \\"coding,\\" you can focus on coding skills or platforms.\\n3. **Brainstorm ideas**: You can think creatively about different ways to perform the action. For example, you could brainstorm different methods of communication, data analysis, or problem-solving.\\n4. **Explain your action**: If you have knowledge or skills in a particular area, you can explain how you would use those skills to achieve the desired outcome.\\n5. **Ask for help**: If you need assistance, you can ask for help from a friend, teacher, or mentor.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":17,"total_tokens":211,"completion_tokens":194,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
||||
{"id":"fastdeploy-7fcc30e2e4334fca806c4d01ee7ac4ab","custom_id":"req-00001","response":{"status_code":200,"request_id":"fastdeploy-batch-5f4017beded84b15aa3a8b0f1fce154c","body":{"id":"chatcmpl-33b09ae5-a8f1-40ad-9110-efa2b381eac9","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"In a sunlit meadow where dreams bloom,\\nA gentle breeze carries the breeze,\\nThe leaves rustle like ancient letters,\\nAnd in the sky, a song of hope and love.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":19,"total_tokens":60,"completion_tokens":41,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
||||
{"id":"fastdeploy-bf549849df2145598ae1758ba260f784","custom_id":"req-00002","response":{"status_code":200,"request_id":"fastdeploy-batch-81223f12fdc345efbfe85114ced10a1d","body":{"id":"chatcmpl-9479e36c-1542-45ff-b364-1dc6d34be9e7","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"Based on the given text, here are some possible actions you can take:\\n\\n1. **Read the question**: To understand what you can do, you can read the question (id=2) and analyze its requirements or constraints.\\n2. **Identify the keywords**: Look for specific keywords or phrases that describe what you can do. For example, if the question mentions \\"coding,\\" you can focus on coding skills or platforms.\\n3. **Brainstorm ideas**: You can think creatively about different ways to perform the action. For example, you could brainstorm different methods of communication, data analysis, or problem-solving.\\n4. **Explain your action**: If you have knowledge or skills in a particular area, you can explain how you would use those skills to achieve the desired outcome.\\n5. **Ask for help**: If you need assistance, you can ask for help from a friend, teacher, or mentor.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":17,"total_tokens":211,"completion_tokens":194,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
||||
"""
|
||||
|
||||
|
||||
@@ -867,8 +867,6 @@ class TestFileOperations(unittest.TestCase):
|
||||
tool_calls=message_data["tool_calls"],
|
||||
prompt_token_ids=message_data["prompt_token_ids"],
|
||||
completion_token_ids=message_data["completion_token_ids"],
|
||||
text_after_process=message_data["text_after_process"],
|
||||
raw_prediction=message_data["raw_prediction"],
|
||||
prompt_tokens=message_data["prompt_tokens"],
|
||||
completion_tokens=message_data["completion_tokens"],
|
||||
)
|
||||
|
||||
@@ -155,7 +155,7 @@ class TestOpenAIServingCompletion(unittest.TestCase):
|
||||
model_name=model_name,
|
||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||
completion_batched_token_ids=completion_batched_token_ids,
|
||||
text_after_process_list=["1", "1"],
|
||||
prompt_tokens_list=["1", "1"],
|
||||
)
|
||||
|
||||
assert completion_response.id == request_id
|
||||
|
||||
@@ -61,7 +61,7 @@ class TestErnie4_5ProcessorProcessResponseDictStreaming(unittest.TestCase):
|
||||
result = self.processor.process_response_dict_streaming(response_dict, **kwargs)
|
||||
|
||||
# 验证结果
|
||||
self.assertEqual(result["outputs"]["raw_prediction"], "delta_text")
|
||||
self.assertEqual(result["outputs"]["completion_tokens"], "delta_text")
|
||||
|
||||
def test_process_request_dict(self):
|
||||
request_dict = {
|
||||
|
||||
@@ -276,7 +276,7 @@ class TestQwenVLProcessor(unittest.TestCase):
|
||||
# Create equivalent request in prompt format
|
||||
prompt = {
|
||||
"request_id": "12345",
|
||||
"prompt": request["text_after_process"],
|
||||
"prompt": request["prompt_tokens"],
|
||||
"multimodal_data": {
|
||||
"image": [mock_pil_image(480, 640)],
|
||||
"video": [{"video": b"123"}],
|
||||
@@ -300,7 +300,7 @@ class TestQwenVLProcessor(unittest.TestCase):
|
||||
|
||||
This test verifies that:
|
||||
- The processor correctly handles multimodal messages (image, video, text)
|
||||
- The text_after_process field matches the output from direct tokenizer application
|
||||
- The prompt_tokens field matches the output from direct tokenizer application
|
||||
- The chat template application preserves the message structure and content
|
||||
|
||||
Test Steps:
|
||||
@@ -345,7 +345,7 @@ class TestQwenVLProcessor(unittest.TestCase):
|
||||
|
||||
# Process request through the processor
|
||||
self.processor.process_request_dict(request, 1024 * 100)
|
||||
prompt2 = request["text_after_process"]
|
||||
prompt2 = request["prompt_tokens"]
|
||||
|
||||
# Verify both methods produce identical prompt strings
|
||||
self.assertEqual(prompt, prompt2)
|
||||
|
||||
@@ -62,7 +62,7 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
|
||||
)
|
||||
|
||||
async def mock_chat_completion_full_generator(
|
||||
request, request_id, model_name, prompt_token_ids, text_after_process
|
||||
request, request_id, model_name, prompt_token_ids, prompt_tokens
|
||||
):
|
||||
return prompt_token_ids
|
||||
|
||||
@@ -89,7 +89,7 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
|
||||
)
|
||||
|
||||
async def mock_chat_completion_full_generator(
|
||||
request, request_id, model_name, prompt_token_ids, text_after_process
|
||||
request, request_id, model_name, prompt_token_ids, prompt_tokens
|
||||
):
|
||||
return prompt_token_ids
|
||||
|
||||
|
||||
Reference in New Issue
Block a user