【Fix】 remove text_after_process & raw_prediction (#4421)

* remove text_after_process &  raw_prediction

* remove text_after_process &  raw_prediction
This commit is contained in:
LiqinruiG
2025-10-16 19:00:18 +08:00
committed by GitHub
parent 8f77adc381
commit 4251ac5e95
20 changed files with 169 additions and 105 deletions
+1 -1
View File
@@ -71,5 +71,5 @@ curl -X POST "http://0.0.0.0:8188/v1/chat/completions" \
### 3. Successfully returns the result
```json
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
```
+42 -1
View File
@@ -231,8 +231,18 @@ ChatMessage:
role: str
content: str
reasoning_content: Optional[str] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
ToolCall:
id: str = None
type: Literal["function"] = "function"
function: FunctionCall
FunctionCall:
name: str
arguments: str
# Fields returned for streaming responses
ChatCompletionStreamResponse:
@@ -254,6 +264,17 @@ DeltaMessage:
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
reasoning_content: Optional[str] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
DeltaToolCall:
id: Optional[str] = None
type: Optional[Literal["function"]] = None
index: int
function: Optional[DeltaFunctionCall] = None
DeltaFunctionCall:
name: Optional[str] = None
arguments: Optional[str] = None
```
## Completion API
@@ -384,10 +405,20 @@ CompletionResponseChoice:
text: str
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
arrival_time: Optional[float] = None
logprobs: Optional[int] = None
reasoning_content: Optional[str] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
ToolCall:
id: str = None
type: Literal["function"] = "function"
function: FunctionCall
FunctionCall:
name: str
arguments: str
# Fields returned for streaming responses
CompletionStreamResponse
@@ -403,8 +434,18 @@ CompletionResponseStreamChoice:
arrival_time: float = None
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
logprobs: Optional[float] = None
reasoning_content: Optional[str] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
DeltaToolCall:
id: Optional[str] = None
type: Optional[Literal["function"]] = None
index: int
function: Optional[DeltaFunctionCall] = None
DeltaFunctionCall:
name: Optional[str] = None
arguments: Optional[str] = None
```
@@ -71,5 +71,5 @@ curl -X POST "http://0.0.0.0:8188/v1/chat/completions" \
### 3. 成功返回结果
```json
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
```
+42 -1
View File
@@ -230,8 +230,18 @@ ChatMessage:
role: str
content: str
reasoning_content: Optional[str] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
ToolCall:
id: str = None
type: Literal["function"] = "function"
function: FunctionCall
FunctionCall:
name: str
arguments: str
# 返回流式响应的字段
ChatCompletionStreamResponse:
@@ -253,6 +263,17 @@ DeltaMessage:
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
reasoning_content: Optional[str] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
DeltaToolCall:
id: Optional[str] = None
type: Optional[Literal["function"]] = None
index: int
function: Optional[DeltaFunctionCall] = None
DeltaFunctionCall:
name: Optional[str] = None
arguments: Optional[str] = None
```
## Completion API
@@ -380,10 +401,20 @@ CompletionResponseChoice:
text: str
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
arrival_time: Optional[float] = None
logprobs: Optional[int] = None
reasoning_content: Optional[str] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
ToolCall:
id: str = None
type: Literal["function"] = "function"
function: FunctionCall
FunctionCall:
name: str
arguments: str
# 返回流式响应的字段
CompletionStreamResponse
@@ -399,8 +430,18 @@ CompletionResponseStreamChoice:
arrival_time: float = None
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
logprobs: Optional[float] = None
reasoning_content: Optional[str] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
DeltaToolCall:
id: Optional[str] = None
type: Optional[Literal["function"]] = None
index: int
function: Optional[DeltaFunctionCall] = None
DeltaFunctionCall:
name: Optional[str] = None
arguments: Optional[str] = None
```
@@ -193,8 +193,6 @@ class ChatMessage(BaseModel):
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
text_after_process: Optional[str] = None
raw_prediction: Optional[str] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
@@ -255,8 +253,6 @@ class DeltaMessage(BaseModel):
completion_token_ids: Optional[List[int]] = None
reasoning_content: Optional[str] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
text_after_process: Optional[str] = None
raw_prediction: Optional[str] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
@@ -295,8 +291,6 @@ class CompletionResponseChoice(BaseModel):
text: str
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
text_after_process: Optional[str] = None
raw_prediction: Optional[str] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
arrival_time: Optional[float] = None
@@ -341,8 +335,6 @@ class CompletionResponseStreamChoice(BaseModel):
logprobs: Optional[CompletionLogprobs] = None
prompt_token_ids: Optional[List[int]] = None
completion_token_ids: Optional[List[int]] = None
text_after_process: Optional[str] = None
raw_prediction: Optional[str] = None
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
reasoning_content: Optional[str] = None
+10 -14
View File
@@ -118,14 +118,14 @@ class OpenAIServingChat:
else:
request_id = f"chatcmpl-{uuid.uuid4()}"
api_server_logger.info(f"create chat completion request: {request_id}")
text_after_process = None
prompt_tokens = None
try:
current_req_dict = request.to_dict_for_infer(request_id)
if "chat_template" not in current_req_dict:
current_req_dict["chat_template"] = self.chat_template
current_req_dict["arrival_time"] = time.time()
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict)
text_after_process = current_req_dict.get("text_after_process")
prompt_tokens = current_req_dict.get("prompt_tokens")
if isinstance(prompt_token_ids, np.ndarray):
prompt_token_ids = prompt_token_ids.tolist()
except ParameterError as e:
@@ -143,12 +143,12 @@ class OpenAIServingChat:
if request.stream:
return self.chat_completion_stream_generator(
request, request_id, request.model, prompt_token_ids, text_after_process
request, request_id, request.model, prompt_token_ids, prompt_tokens
)
else:
try:
return await self.chat_completion_full_generator(
request, request_id, request.model, prompt_token_ids, text_after_process
request, request_id, request.model, prompt_token_ids, prompt_tokens
)
except Exception as e:
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
@@ -175,7 +175,7 @@ class OpenAIServingChat:
request_id: str,
model_name: str,
prompt_token_ids: list(),
text_after_process: str,
prompt_tokens: str,
):
"""
Streaming chat completion generator.
@@ -289,8 +289,7 @@ class OpenAIServingChat:
if request.return_token_ids:
choice.delta.prompt_token_ids = list(prompt_token_ids)
choice.delta.text_after_process = text_after_process
choice.delta.prompt_tokens = text_after_process
choice.delta.prompt_tokens = prompt_tokens
chunk = ChatCompletionStreamResponse(
id=request_id,
object=chunk_object_type,
@@ -368,8 +367,7 @@ class OpenAIServingChat:
choice.delta.multimodal_content[0]["completion_token_ids"] = list(output["token_ids"])
else:
choice.delta.completion_token_ids = list(output["token_ids"])
choice.delta.raw_prediction = output.get("raw_prediction")
choice.delta.completion_tokens = output.get("raw_prediction")
choice.delta.completion_tokens = output.get("completion_tokens")
if include_continuous_usage:
chunk.usage = UsageInfo(
prompt_tokens=num_prompt_tokens,
@@ -419,7 +417,7 @@ class OpenAIServingChat:
request_id: str,
model_name: str,
prompt_token_ids: list(),
text_after_process: str,
prompt_tokens: str,
):
"""
Full chat completion generator.
@@ -509,10 +507,8 @@ class OpenAIServingChat:
tool_calls=output.get("tool_call"),
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
completion_token_ids=completion_token_ids if request.return_token_ids else None,
text_after_process=text_after_process if request.return_token_ids else None,
prompt_tokens=text_after_process if request.return_token_ids else None,
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
prompt_tokens=prompt_tokens if request.return_token_ids else None,
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
)
if response_processor.enable_multimodal_content():
@@ -132,7 +132,7 @@ class OpenAIServingCompletion:
num_choices = len(request_prompts)
api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}")
prompt_batched_token_ids = []
text_after_process_list = []
prompt_tokens_list = []
try:
if self.max_waiting_time < 0:
await self.engine_client.semaphore.acquire()
@@ -157,7 +157,7 @@ class OpenAIServingCompletion:
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict) # tokenize
if isinstance(prompt_token_ids, np.ndarray):
prompt_token_ids = prompt_token_ids.tolist()
text_after_process_list.append(current_req_dict.get("text_after_process"))
prompt_tokens_list.append(current_req_dict.get("prompt_tokens"))
prompt_batched_token_ids.append(prompt_token_ids)
del current_req_dict
except ParameterError as e:
@@ -180,7 +180,7 @@ class OpenAIServingCompletion:
created_time=created_time,
model_name=request.model,
prompt_batched_token_ids=prompt_batched_token_ids,
text_after_process_list=text_after_process_list,
prompt_tokens_list=prompt_tokens_list,
)
else:
try:
@@ -191,7 +191,7 @@ class OpenAIServingCompletion:
created_time=created_time,
model_name=request.model,
prompt_batched_token_ids=prompt_batched_token_ids,
text_after_process_list=text_after_process_list,
prompt_tokens_list=prompt_tokens_list,
)
except Exception as e:
error_msg = (
@@ -213,7 +213,7 @@ class OpenAIServingCompletion:
created_time: int,
model_name: str,
prompt_batched_token_ids: list(),
text_after_process_list: list(),
prompt_tokens_list: list(),
):
"""
Process the full completion request with multiple choices.
@@ -292,7 +292,7 @@ class OpenAIServingCompletion:
model_name=model_name,
prompt_batched_token_ids=prompt_batched_token_ids,
completion_batched_token_ids=completion_batched_token_ids,
text_after_process_list=text_after_process_list,
prompt_tokens_list=prompt_tokens_list,
)
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
return res
@@ -344,7 +344,7 @@ class OpenAIServingCompletion:
created_time: int,
model_name: str,
prompt_batched_token_ids: list(),
text_after_process_list: list(),
prompt_tokens_list: list(),
):
"""
Process the stream completion request.
@@ -408,8 +408,7 @@ class OpenAIServingCompletion:
index=idx,
text="",
prompt_token_ids=list(prompt_batched_token_ids[idx]),
text_after_process=text_after_process_list[idx],
prompt_tokens=text_after_process_list[idx],
prompt_tokens=prompt_tokens_list[idx],
completion_token_ids=None,
)
],
@@ -443,8 +442,7 @@ class OpenAIServingCompletion:
prompt_token_ids=None,
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
tool_calls=None,
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
reasoning_content="",
arrival_time=arrival_time,
logprobs=logprobs_res,
@@ -522,7 +520,7 @@ class OpenAIServingCompletion:
model_name: str,
prompt_batched_token_ids: list(),
completion_batched_token_ids: list(),
text_after_process_list: list(),
prompt_tokens_list: list(),
) -> CompletionResponse:
choices: List[CompletionResponseChoice] = []
num_prompt_tokens = 0
@@ -556,10 +554,8 @@ class OpenAIServingCompletion:
text=output_text,
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
completion_token_ids=completion_token_ids if request.return_token_ids else None,
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
text_after_process=text_after_process_list[idx] if request.return_token_ids else None,
prompt_tokens=text_after_process_list[idx] if request.return_token_ids else None,
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
prompt_tokens=prompt_tokens_list[idx] if request.return_token_ids else None,
reasoning_content=output.get("reasoning_content"),
tool_calls=output.get("tool_call"),
logprobs=aggregated_logprobs,
+4 -4
View File
@@ -197,7 +197,7 @@ class Ernie4_5Processor(BaseDataProcessor):
if isinstance(prompt, list): # if prompt is a token id list
request["prompt_token_ids"] = prompt
else:
request["text_after_process"] = prompt
request["prompt_tokens"] = prompt
tokens = self.tokenizer.tokenize(prompt)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
request["prompt_token_ids"] = token_ids
@@ -318,7 +318,7 @@ class Ernie4_5Processor(BaseDataProcessor):
if tool_call_info.tools_called:
response_dict["outputs"]["tool_call"] = tool_call_info.tool_calls
response_dict["outputs"]["text"] = tool_call_info.content
response_dict["outputs"]["raw_prediction"] = full_text
response_dict["outputs"]["completion_tokens"] = full_text
data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
del self.decode_status[req_id]
return response_dict
@@ -342,7 +342,7 @@ class Ernie4_5Processor(BaseDataProcessor):
if token_ids[-1] == self.tokenizer.eos_token_id:
token_ids = token_ids[:-1]
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
response_dict["outputs"]["raw_prediction"] = delta_text
response_dict["outputs"]["completion_tokens"] = delta_text
if self.reasoning_parser and (
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
):
@@ -398,7 +398,7 @@ class Ernie4_5Processor(BaseDataProcessor):
add_special_tokens=False,
**kwargs,
)
request_or_messages["text_after_process"] = spliced_message
request_or_messages["prompt_tokens"] = spliced_message
req_id = None
if isinstance(request_or_messages, dict):
req_id = request_or_messages.get("request_id", None)
@@ -222,7 +222,7 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
self._check_mm_limits(multimodal_data)
images = multimodal_data.get("image", None)
videos = multimodal_data.get("video", None)
request["text_after_process"] = request.get("prompt")
request["prompt_tokens"] = request.get("prompt")
outputs = self.ernie4_5_processor.text2ids(request["prompt"], images, videos)
elif request.get("messages"):
messages = request["messages"]
@@ -503,7 +503,7 @@ class DataProcessor:
prompt_token_str = prompt_token_template.replace("<|image@placeholder|>", "").replace(
"<|video@placeholder|>", ""
)
request["text_after_process"] = prompt_token_template
request["prompt_tokens"] = prompt_token_template
tokens = self.tokenizer.tokenize(prompt_token_str)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
data_processor_logger.info(
@@ -495,7 +495,7 @@ class DataProcessor:
add_generation_prompt=request.get("add_generation_prompt", True),
)
prompt_token_str = raw_prompt.replace(self.image_token, "").replace(self.video_token, "")
request["text_after_process"] = raw_prompt
request["prompt_tokens"] = raw_prompt
tokens = self.tokenizer.tokenize(prompt_token_str)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
+3 -3
View File
@@ -403,7 +403,7 @@ class DataProcessor(BaseDataProcessor):
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
if is_end:
full_text = previous_texts + delta_text
response_dict["outputs"]["raw_prediction"] = full_text
response_dict["outputs"]["completion_tokens"] = full_text
if enable_thinking and self.reasoning_parser:
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
response_dict["outputs"]["text"] = text
@@ -439,7 +439,7 @@ class DataProcessor(BaseDataProcessor):
if token_ids[-1] in self.eos_token_ids:
token_ids = token_ids[:-1]
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
response_dict["outputs"]["raw_prediction"] = delta_text
response_dict["outputs"]["completion_tokens"] = delta_text
if self.reasoning_parser and (
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
):
@@ -548,7 +548,7 @@ class DataProcessor(BaseDataProcessor):
return_tensors="pd",
**kwargs,
)
request["text_after_process"] = spliced_message
request["prompt_tokens"] = spliced_message
req_id = None
tokens = self.tokenizer.tokenize(spliced_message)
if isinstance(request, dict):
+36 -36
View File
@@ -14,10 +14,10 @@ from core import TEMPLATE, URL, build_request_payload, send_request
COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
def test_completion_stream_text_after_process_raw_prediction():
def test_completion_stream_prompt_tokens_completion_tokens():
"""
/v1/completions接口, stream=True
返回属性"text_after_process""reasoning_content"
return "prompt_tokens""reasoning_content"
"""
data = {
"prompt": "你是谁",
@@ -39,55 +39,55 @@ def test_completion_stream_text_after_process_raw_prediction():
choice = response_data["choices"][0]
if "prompt_token_ids" in choice and choice["prompt_token_ids"] is not None:
text_after_process = choice["text_after_process"]
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
prompt_tokens = choice["prompt_tokens"]
assert data["prompt"] in prompt_tokens, "prompt_tokens取值结果不正确"
else:
raw_prediction = choice["raw_prediction"]
completion_tokens = choice["completion_tokens"]
reasoning_content = choice["reasoning_content"]
text = choice["text"]
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
if "finish_reason" in line.strip():
break
def test_completion_text_after_process_raw_predictio_return_token_ids():
def test_completion_prompt_tokens_completion_tokens_return_token_ids():
"""
/v1/completions接口,非流式接口
返回属性"text_after_process""reasoning_content"
return "prompt_tokens""reasoning_content"
"""
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50, "return_token_ids": True}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload).json()
text_after_process = resp["choices"][0]["text_after_process"]
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
prompt_tokens = resp["choices"][0]["prompt_tokens"]
assert data["prompt"] in prompt_tokens, "prompt_tokens取值结果不正确"
raw_prediction = resp["choices"][0]["raw_prediction"]
completion_tokens = resp["choices"][0]["completion_tokens"]
reasoning_content = resp["choices"][0]["reasoning_content"]
text = resp["choices"][0]["text"]
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
def test_completion_text_after_process_raw_prediction():
def test_completion_prompt_tokens_completion_tokens():
"""
/v1/completions接口,无return_token_ids参数
非流式接口中,无return token ids 属性"text_after_process""reasoning_content"值为null
非流式接口中,无return token ids 属性"prompt_tokens""reasoning_content"值为null
"""
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50}
payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload).json()
text_after_process = resp["choices"][0]["text_after_process"]
assert text_after_process is None, "text_after_process取值结果不正确"
prompt_tokens = resp["choices"][0]["prompt_tokens"]
assert prompt_tokens is None, "prompt_tokens取值结果不正确"
raw_prediction = resp["choices"][0]["raw_prediction"]
assert raw_prediction is None, "raw_prediction取值结果不正确"
completion_tokens = resp["choices"][0]["completion_tokens"]
assert completion_tokens is None, "completion_tokens取值结果不正确"
def test_stream_text_after_process_raw_prediction():
def test_stream_prompt_tokens_completion_tokens():
"""
/v1/chat/completions接口,"stream": True
返回属性"text_after_process""reasoning_content"
返回属性"prompt_tokens""reasoning_content"
"""
data = {
"messages": [{"role": "user", "content": "你是谁"}],
@@ -109,21 +109,21 @@ def test_stream_text_after_process_raw_prediction():
choice = response_data["choices"][0]
if "prompt_token_ids" in choice["delta"] and choice["delta"]["prompt_token_ids"] is not None:
text_after_process = choice["delta"]["text_after_process"]
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
prompt_tokens = choice["delta"]["prompt_tokens"]
assert data["messages"][0]["content"] in prompt_tokens, "prompt_tokens取值结果不正确"
else:
raw_prediction = choice["delta"]["raw_prediction"]
completion_tokens = choice["delta"]["completion_tokens"]
reasoning_content = choice["delta"]["reasoning_content"]
content = choice["delta"]["content"]
assert reasoning_content or content in raw_prediction, "raw_prediction取值结果不正确"
assert reasoning_content or content in completion_tokens, "completion_tokens取值结果不正确"
if "finish_reason" in line.strip():
break
def test_text_after_process_raw_prediction_return_token_ids():
def test_prompt_tokens_completion_tokens_return_token_ids():
"""
/v1/chat/completions接口,非流式接口
返回属性"text_after_process""reasoning_content"
返回属性"prompt_tokens""reasoning_content"
"""
data = {
"stream": False,
@@ -136,19 +136,19 @@ def test_text_after_process_raw_prediction_return_token_ids():
payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload).json()
text_after_process = resp["choices"][0]["message"]["text_after_process"]
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
prompt_tokens = resp["choices"][0]["message"]["prompt_tokens"]
assert data["messages"][0]["content"] in prompt_tokens, "prompt_tokens取值结果不正确"
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
completion_tokens = resp["choices"][0]["message"]["completion_tokens"]
reasoning_content = resp["choices"][0]["message"]["reasoning_content"]
text = resp["choices"][0]["message"]["content"]
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
def test_text_after_process_raw_prediction():
def test_prompt_tokens_completion_tokens():
"""
/v1/chat/completions接口,无return_token_ids参数
无return token ids 属性"text_after_process""reasoning_content"值为null
无return token ids 属性"prompt_tokens""reasoning_content"值为null
"""
data = {
"stream": False,
@@ -160,8 +160,8 @@ def test_text_after_process_raw_prediction():
payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload).json()
text_after_process = resp["choices"][0]["message"]["text_after_process"]
assert text_after_process is None, "text_after_process取值结果不正确"
prompt_tokens = resp["choices"][0]["message"]["prompt_tokens"]
assert prompt_tokens is None, "prompt_tokens取值结果不正确"
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
assert raw_prediction is None, "raw_prediction取值结果不正确"
completion_tokens = resp["choices"][0]["message"]["completion_tokens"]
assert completion_tokens is None, "completion_tokens取值结果不正确"
@@ -57,7 +57,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
model_name="test_model",
prompt_batched_token_ids=[[1, 2]],
completion_batched_token_ids=[[3, 4, 5]],
text_after_process_list=["test prompt"],
prompt_tokens_list=["test prompt"],
)
self.assertEqual(response.choices[0].text, "test prompt generated text")
@@ -90,7 +90,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
model_name="test_model",
prompt_batched_token_ids=[[1, 2]],
completion_batched_token_ids=[[3, 4, 5]],
text_after_process_list=["test prompt"],
prompt_tokens_list=["test prompt"],
)
self.assertEqual(response.choices[0].text, "decoded_[1, 2, 3] generated text")
@@ -123,7 +123,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
model_name="test_model",
prompt_batched_token_ids=[[1], [2]],
completion_batched_token_ids=[[1, 2], [3, 4]],
text_after_process_list=["prompt1", "prompt2"],
prompt_tokens_list=["prompt1", "prompt2"],
)
self.assertEqual(len(response.choices), 2)
@@ -159,7 +159,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
model_name="test_model",
prompt_batched_token_ids=[[1], [2]],
completion_batched_token_ids=[[1, 2], [3, 4]],
text_after_process_list=["prompt1", "prompt2"],
prompt_tokens_list=["prompt1", "prompt2"],
)
self.assertEqual(len(response.choices), 2)
@@ -160,7 +160,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
request_id="test-request-id",
model_name="test-model",
prompt_token_ids=[1, 2, 3],
text_after_process="Hello",
prompt_tokens="Hello",
)
chunks = []
@@ -242,7 +242,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
model_name="test-model",
created_time=11,
prompt_batched_token_ids=[[1, 2, 3]],
text_after_process_list=["Hello"],
prompt_tokens_list=["Hello"],
)
chunks = []
+2 -4
View File
@@ -54,8 +54,8 @@ INVALID_INPUT_BATCH = """
"""
BATCH_RESPONSE = """
{"id":"fastdeploy-7fcc30e2e4334fca806c4d01ee7ac4ab","custom_id":"req-00001","response":{"status_code":200,"request_id":"fastdeploy-batch-5f4017beded84b15aa3a8b0f1fce154c","body":{"id":"chatcmpl-33b09ae5-a8f1-40ad-9110-efa2b381eac9","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"In a sunlit meadow where dreams bloom,\\nA gentle breeze carries the breeze,\\nThe leaves rustle like ancient letters,\\nAnd in the sky, a song of hope and love.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":19,"total_tokens":60,"completion_tokens":41,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
{"id":"fastdeploy-bf549849df2145598ae1758ba260f784","custom_id":"req-00002","response":{"status_code":200,"request_id":"fastdeploy-batch-81223f12fdc345efbfe85114ced10a1d","body":{"id":"chatcmpl-9479e36c-1542-45ff-b364-1dc6d34be9e7","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"Based on the given text, here are some possible actions you can take:\\n\\n1. **Read the question**: To understand what you can do, you can read the question (id=2) and analyze its requirements or constraints.\\n2. **Identify the keywords**: Look for specific keywords or phrases that describe what you can do. For example, if the question mentions \\"coding,\\" you can focus on coding skills or platforms.\\n3. **Brainstorm ideas**: You can think creatively about different ways to perform the action. For example, you could brainstorm different methods of communication, data analysis, or problem-solving.\\n4. **Explain your action**: If you have knowledge or skills in a particular area, you can explain how you would use those skills to achieve the desired outcome.\\n5. **Ask for help**: If you need assistance, you can ask for help from a friend, teacher, or mentor.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":17,"total_tokens":211,"completion_tokens":194,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
{"id":"fastdeploy-7fcc30e2e4334fca806c4d01ee7ac4ab","custom_id":"req-00001","response":{"status_code":200,"request_id":"fastdeploy-batch-5f4017beded84b15aa3a8b0f1fce154c","body":{"id":"chatcmpl-33b09ae5-a8f1-40ad-9110-efa2b381eac9","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"In a sunlit meadow where dreams bloom,\\nA gentle breeze carries the breeze,\\nThe leaves rustle like ancient letters,\\nAnd in the sky, a song of hope and love.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":19,"total_tokens":60,"completion_tokens":41,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
{"id":"fastdeploy-bf549849df2145598ae1758ba260f784","custom_id":"req-00002","response":{"status_code":200,"request_id":"fastdeploy-batch-81223f12fdc345efbfe85114ced10a1d","body":{"id":"chatcmpl-9479e36c-1542-45ff-b364-1dc6d34be9e7","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"Based on the given text, here are some possible actions you can take:\\n\\n1. **Read the question**: To understand what you can do, you can read the question (id=2) and analyze its requirements or constraints.\\n2. **Identify the keywords**: Look for specific keywords or phrases that describe what you can do. For example, if the question mentions \\"coding,\\" you can focus on coding skills or platforms.\\n3. **Brainstorm ideas**: You can think creatively about different ways to perform the action. For example, you could brainstorm different methods of communication, data analysis, or problem-solving.\\n4. **Explain your action**: If you have knowledge or skills in a particular area, you can explain how you would use those skills to achieve the desired outcome.\\n5. **Ask for help**: If you need assistance, you can ask for help from a friend, teacher, or mentor.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":17,"total_tokens":211,"completion_tokens":194,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
"""
@@ -867,8 +867,6 @@ class TestFileOperations(unittest.TestCase):
tool_calls=message_data["tool_calls"],
prompt_token_ids=message_data["prompt_token_ids"],
completion_token_ids=message_data["completion_token_ids"],
text_after_process=message_data["text_after_process"],
raw_prediction=message_data["raw_prediction"],
prompt_tokens=message_data["prompt_tokens"],
completion_tokens=message_data["completion_tokens"],
)
@@ -155,7 +155,7 @@ class TestOpenAIServingCompletion(unittest.TestCase):
model_name=model_name,
prompt_batched_token_ids=prompt_batched_token_ids,
completion_batched_token_ids=completion_batched_token_ids,
text_after_process_list=["1", "1"],
prompt_tokens_list=["1", "1"],
)
assert completion_response.id == request_id
+1 -1
View File
@@ -61,7 +61,7 @@ class TestErnie4_5ProcessorProcessResponseDictStreaming(unittest.TestCase):
result = self.processor.process_response_dict_streaming(response_dict, **kwargs)
# 验证结果
self.assertEqual(result["outputs"]["raw_prediction"], "delta_text")
self.assertEqual(result["outputs"]["completion_tokens"], "delta_text")
def test_process_request_dict(self):
request_dict = {
+3 -3
View File
@@ -276,7 +276,7 @@ class TestQwenVLProcessor(unittest.TestCase):
# Create equivalent request in prompt format
prompt = {
"request_id": "12345",
"prompt": request["text_after_process"],
"prompt": request["prompt_tokens"],
"multimodal_data": {
"image": [mock_pil_image(480, 640)],
"video": [{"video": b"123"}],
@@ -300,7 +300,7 @@ class TestQwenVLProcessor(unittest.TestCase):
This test verifies that:
- The processor correctly handles multimodal messages (image, video, text)
- The text_after_process field matches the output from direct tokenizer application
- The prompt_tokens field matches the output from direct tokenizer application
- The chat template application preserves the message structure and content
Test Steps:
@@ -345,7 +345,7 @@ class TestQwenVLProcessor(unittest.TestCase):
# Process request through the processor
self.processor.process_request_dict(request, 1024 * 100)
prompt2 = request["text_after_process"]
prompt2 = request["prompt_tokens"]
# Verify both methods produce identical prompt strings
self.assertEqual(prompt, prompt2)
+2 -2
View File
@@ -62,7 +62,7 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
)
async def mock_chat_completion_full_generator(
request, request_id, model_name, prompt_token_ids, text_after_process
request, request_id, model_name, prompt_token_ids, prompt_tokens
):
return prompt_token_ids
@@ -89,7 +89,7 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
)
async def mock_chat_completion_full_generator(
request, request_id, model_name, prompt_token_ids, text_after_process
request, request_id, model_name, prompt_token_ids, prompt_tokens
):
return prompt_token_ids