mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
【Fix】 remove text_after_process & raw_prediction (#4421)
* remove text_after_process & raw_prediction * remove text_after_process & raw_prediction
This commit is contained in:
@@ -71,5 +71,5 @@ curl -X POST "http://0.0.0.0:8188/v1/chat/completions" \
|
|||||||
|
|
||||||
### 3. Successfully returns the result
|
### 3. Successfully returns the result
|
||||||
```json
|
```json
|
||||||
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -231,8 +231,18 @@ ChatMessage:
|
|||||||
role: str
|
role: str
|
||||||
content: str
|
content: str
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
|
ToolCall:
|
||||||
|
id: str = None
|
||||||
|
type: Literal["function"] = "function"
|
||||||
|
function: FunctionCall
|
||||||
|
FunctionCall:
|
||||||
|
name: str
|
||||||
|
arguments: str
|
||||||
|
|
||||||
# Fields returned for streaming responses
|
# Fields returned for streaming responses
|
||||||
ChatCompletionStreamResponse:
|
ChatCompletionStreamResponse:
|
||||||
@@ -254,6 +264,17 @@ DeltaMessage:
|
|||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
|
DeltaToolCall:
|
||||||
|
id: Optional[str] = None
|
||||||
|
type: Optional[Literal["function"]] = None
|
||||||
|
index: int
|
||||||
|
function: Optional[DeltaFunctionCall] = None
|
||||||
|
DeltaFunctionCall:
|
||||||
|
name: Optional[str] = None
|
||||||
|
arguments: Optional[str] = None
|
||||||
```
|
```
|
||||||
|
|
||||||
## Completion API
|
## Completion API
|
||||||
@@ -384,10 +405,20 @@ CompletionResponseChoice:
|
|||||||
text: str
|
text: str
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
arrival_time: Optional[float] = None
|
arrival_time: Optional[float] = None
|
||||||
logprobs: Optional[int] = None
|
logprobs: Optional[int] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
|
ToolCall:
|
||||||
|
id: str = None
|
||||||
|
type: Literal["function"] = "function"
|
||||||
|
function: FunctionCall
|
||||||
|
FunctionCall:
|
||||||
|
name: str
|
||||||
|
arguments: str
|
||||||
|
|
||||||
# Fields returned for streaming responses
|
# Fields returned for streaming responses
|
||||||
CompletionStreamResponse:
|
CompletionStreamResponse:
|
||||||
@@ -403,8 +434,18 @@ CompletionResponseStreamChoice:
|
|||||||
arrival_time: float = None
|
arrival_time: float = None
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
logprobs: Optional[float] = None
|
logprobs: Optional[float] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
|
DeltaToolCall:
|
||||||
|
id: Optional[str] = None
|
||||||
|
type: Optional[Literal["function"]] = None
|
||||||
|
index: int
|
||||||
|
function: Optional[DeltaFunctionCall] = None
|
||||||
|
DeltaFunctionCall:
|
||||||
|
name: Optional[str] = None
|
||||||
|
arguments: Optional[str] = None
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -71,5 +71,5 @@ curl -X POST "http://0.0.0.0:8188/v1/chat/completions" \
|
|||||||
|
|
||||||
### 3. 成功返回结果
|
### 3. 成功返回结果
|
||||||
```json
|
```json
|
||||||
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
{"id":"chatcmpl-3bd98ae2-fafe-46ae-a552-d653a8526503","object":"chat.completion","created":1757653575,"model":"ERNIE-4.5-21B-A3B-Paddle","choices":[{"index":0,"message":{"role":"assistant","content":"**AI (Artificial Intelligence)** refers to the development of computer systems that can perform tasks typically requiring human intelligence.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"total_tokens":35,"completion_tokens":24,"prompt_tokens_details":{"cached_tokens":0}}}
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -230,8 +230,18 @@ ChatMessage:
|
|||||||
role: str
|
role: str
|
||||||
content: str
|
content: str
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
|
ToolCall:
|
||||||
|
id: str = None
|
||||||
|
type: Literal["function"] = "function"
|
||||||
|
function: FunctionCall
|
||||||
|
FunctionCall:
|
||||||
|
name: str
|
||||||
|
arguments: str
|
||||||
|
|
||||||
# 返回流式响应的字段
|
# 返回流式响应的字段
|
||||||
ChatCompletionStreamResponse:
|
ChatCompletionStreamResponse:
|
||||||
@@ -253,6 +263,17 @@ DeltaMessage:
|
|||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
|
DeltaToolCall:
|
||||||
|
id: Optional[str] = None
|
||||||
|
type: Optional[Literal["function"]] = None
|
||||||
|
index: int
|
||||||
|
function: Optional[DeltaFunctionCall] = None
|
||||||
|
DeltaFunctionCall:
|
||||||
|
name: Optional[str] = None
|
||||||
|
arguments: Optional[str] = None
|
||||||
```
|
```
|
||||||
|
|
||||||
## Completion API
|
## Completion API
|
||||||
@@ -380,10 +401,20 @@ CompletionResponseChoice:
|
|||||||
text: str
|
text: str
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
arrival_time: Optional[float] = None
|
arrival_time: Optional[float] = None
|
||||||
logprobs: Optional[int] = None
|
logprobs: Optional[int] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
|
ToolCall:
|
||||||
|
id: str = None
|
||||||
|
type: Literal["function"] = "function"
|
||||||
|
function: FunctionCall
|
||||||
|
FunctionCall:
|
||||||
|
name: str
|
||||||
|
arguments: str
|
||||||
|
|
||||||
# 返回流式响应的字段
|
# 返回流式响应的字段
|
||||||
CompletionStreamResponse:
|
CompletionStreamResponse:
|
||||||
@@ -399,8 +430,18 @@ CompletionResponseStreamChoice:
|
|||||||
arrival_time: float = None
|
arrival_time: float = None
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
|
prompt_tokens: Optional[str] = None
|
||||||
|
completion_tokens: Optional[str] = None
|
||||||
logprobs: Optional[float] = None
|
logprobs: Optional[float] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
||||||
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
|
DeltaToolCall:
|
||||||
|
id: Optional[str] = None
|
||||||
|
type: Optional[Literal["function"]] = None
|
||||||
|
index: int
|
||||||
|
function: Optional[DeltaFunctionCall] = None
|
||||||
|
DeltaFunctionCall:
|
||||||
|
name: Optional[str] = None
|
||||||
|
arguments: Optional[str] = None
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -193,8 +193,6 @@ class ChatMessage(BaseModel):
|
|||||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
text_after_process: Optional[str] = None
|
|
||||||
raw_prediction: Optional[str] = None
|
|
||||||
prompt_tokens: Optional[str] = None
|
prompt_tokens: Optional[str] = None
|
||||||
completion_tokens: Optional[str] = None
|
completion_tokens: Optional[str] = None
|
||||||
|
|
||||||
@@ -255,8 +253,6 @@ class DeltaMessage(BaseModel):
|
|||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||||
text_after_process: Optional[str] = None
|
|
||||||
raw_prediction: Optional[str] = None
|
|
||||||
prompt_tokens: Optional[str] = None
|
prompt_tokens: Optional[str] = None
|
||||||
completion_tokens: Optional[str] = None
|
completion_tokens: Optional[str] = None
|
||||||
|
|
||||||
@@ -295,8 +291,6 @@ class CompletionResponseChoice(BaseModel):
|
|||||||
text: str
|
text: str
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
text_after_process: Optional[str] = None
|
|
||||||
raw_prediction: Optional[str] = None
|
|
||||||
prompt_tokens: Optional[str] = None
|
prompt_tokens: Optional[str] = None
|
||||||
completion_tokens: Optional[str] = None
|
completion_tokens: Optional[str] = None
|
||||||
arrival_time: Optional[float] = None
|
arrival_time: Optional[float] = None
|
||||||
@@ -341,8 +335,6 @@ class CompletionResponseStreamChoice(BaseModel):
|
|||||||
logprobs: Optional[CompletionLogprobs] = None
|
logprobs: Optional[CompletionLogprobs] = None
|
||||||
prompt_token_ids: Optional[List[int]] = None
|
prompt_token_ids: Optional[List[int]] = None
|
||||||
completion_token_ids: Optional[List[int]] = None
|
completion_token_ids: Optional[List[int]] = None
|
||||||
text_after_process: Optional[str] = None
|
|
||||||
raw_prediction: Optional[str] = None
|
|
||||||
prompt_tokens: Optional[str] = None
|
prompt_tokens: Optional[str] = None
|
||||||
completion_tokens: Optional[str] = None
|
completion_tokens: Optional[str] = None
|
||||||
reasoning_content: Optional[str] = None
|
reasoning_content: Optional[str] = None
|
||||||
|
|||||||
@@ -118,14 +118,14 @@ class OpenAIServingChat:
|
|||||||
else:
|
else:
|
||||||
request_id = f"chatcmpl-{uuid.uuid4()}"
|
request_id = f"chatcmpl-{uuid.uuid4()}"
|
||||||
api_server_logger.info(f"create chat completion request: {request_id}")
|
api_server_logger.info(f"create chat completion request: {request_id}")
|
||||||
text_after_process = None
|
prompt_tokens = None
|
||||||
try:
|
try:
|
||||||
current_req_dict = request.to_dict_for_infer(request_id)
|
current_req_dict = request.to_dict_for_infer(request_id)
|
||||||
if "chat_template" not in current_req_dict:
|
if "chat_template" not in current_req_dict:
|
||||||
current_req_dict["chat_template"] = self.chat_template
|
current_req_dict["chat_template"] = self.chat_template
|
||||||
current_req_dict["arrival_time"] = time.time()
|
current_req_dict["arrival_time"] = time.time()
|
||||||
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict)
|
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict)
|
||||||
text_after_process = current_req_dict.get("text_after_process")
|
prompt_tokens = current_req_dict.get("prompt_tokens")
|
||||||
if isinstance(prompt_token_ids, np.ndarray):
|
if isinstance(prompt_token_ids, np.ndarray):
|
||||||
prompt_token_ids = prompt_token_ids.tolist()
|
prompt_token_ids = prompt_token_ids.tolist()
|
||||||
except ParameterError as e:
|
except ParameterError as e:
|
||||||
@@ -143,12 +143,12 @@ class OpenAIServingChat:
|
|||||||
|
|
||||||
if request.stream:
|
if request.stream:
|
||||||
return self.chat_completion_stream_generator(
|
return self.chat_completion_stream_generator(
|
||||||
request, request_id, request.model, prompt_token_ids, text_after_process
|
request, request_id, request.model, prompt_token_ids, prompt_tokens
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
return await self.chat_completion_full_generator(
|
return await self.chat_completion_full_generator(
|
||||||
request, request_id, request.model, prompt_token_ids, text_after_process
|
request, request_id, request.model, prompt_token_ids, prompt_tokens
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
|
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
|
||||||
@@ -175,7 +175,7 @@ class OpenAIServingChat:
|
|||||||
request_id: str,
|
request_id: str,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
prompt_token_ids: list(),
|
prompt_token_ids: list(),
|
||||||
text_after_process: str,
|
prompt_tokens: str,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Streaming chat completion generator.
|
Streaming chat completion generator.
|
||||||
@@ -289,8 +289,7 @@ class OpenAIServingChat:
|
|||||||
|
|
||||||
if request.return_token_ids:
|
if request.return_token_ids:
|
||||||
choice.delta.prompt_token_ids = list(prompt_token_ids)
|
choice.delta.prompt_token_ids = list(prompt_token_ids)
|
||||||
choice.delta.text_after_process = text_after_process
|
choice.delta.prompt_tokens = prompt_tokens
|
||||||
choice.delta.prompt_tokens = text_after_process
|
|
||||||
chunk = ChatCompletionStreamResponse(
|
chunk = ChatCompletionStreamResponse(
|
||||||
id=request_id,
|
id=request_id,
|
||||||
object=chunk_object_type,
|
object=chunk_object_type,
|
||||||
@@ -368,8 +367,7 @@ class OpenAIServingChat:
|
|||||||
choice.delta.multimodal_content[0]["completion_token_ids"] = list(output["token_ids"])
|
choice.delta.multimodal_content[0]["completion_token_ids"] = list(output["token_ids"])
|
||||||
else:
|
else:
|
||||||
choice.delta.completion_token_ids = list(output["token_ids"])
|
choice.delta.completion_token_ids = list(output["token_ids"])
|
||||||
choice.delta.raw_prediction = output.get("raw_prediction")
|
choice.delta.completion_tokens = output.get("completion_tokens")
|
||||||
choice.delta.completion_tokens = output.get("raw_prediction")
|
|
||||||
if include_continuous_usage:
|
if include_continuous_usage:
|
||||||
chunk.usage = UsageInfo(
|
chunk.usage = UsageInfo(
|
||||||
prompt_tokens=num_prompt_tokens,
|
prompt_tokens=num_prompt_tokens,
|
||||||
@@ -419,7 +417,7 @@ class OpenAIServingChat:
|
|||||||
request_id: str,
|
request_id: str,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
prompt_token_ids: list(),
|
prompt_token_ids: list(),
|
||||||
text_after_process: str,
|
prompt_tokens: str,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Full chat completion generator.
|
Full chat completion generator.
|
||||||
@@ -509,10 +507,8 @@ class OpenAIServingChat:
|
|||||||
tool_calls=output.get("tool_call"),
|
tool_calls=output.get("tool_call"),
|
||||||
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
|
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
|
||||||
completion_token_ids=completion_token_ids if request.return_token_ids else None,
|
completion_token_ids=completion_token_ids if request.return_token_ids else None,
|
||||||
text_after_process=text_after_process if request.return_token_ids else None,
|
prompt_tokens=prompt_tokens if request.return_token_ids else None,
|
||||||
prompt_tokens=text_after_process if request.return_token_ids else None,
|
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
|
||||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
|
||||||
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if response_processor.enable_multimodal_content():
|
if response_processor.enable_multimodal_content():
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ class OpenAIServingCompletion:
|
|||||||
num_choices = len(request_prompts)
|
num_choices = len(request_prompts)
|
||||||
api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}")
|
api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}")
|
||||||
prompt_batched_token_ids = []
|
prompt_batched_token_ids = []
|
||||||
text_after_process_list = []
|
prompt_tokens_list = []
|
||||||
try:
|
try:
|
||||||
if self.max_waiting_time < 0:
|
if self.max_waiting_time < 0:
|
||||||
await self.engine_client.semaphore.acquire()
|
await self.engine_client.semaphore.acquire()
|
||||||
@@ -157,7 +157,7 @@ class OpenAIServingCompletion:
|
|||||||
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict) # tokenize
|
prompt_token_ids = await self.engine_client.format_and_add_data(current_req_dict) # tokenize
|
||||||
if isinstance(prompt_token_ids, np.ndarray):
|
if isinstance(prompt_token_ids, np.ndarray):
|
||||||
prompt_token_ids = prompt_token_ids.tolist()
|
prompt_token_ids = prompt_token_ids.tolist()
|
||||||
text_after_process_list.append(current_req_dict.get("text_after_process"))
|
prompt_tokens_list.append(current_req_dict.get("prompt_tokens"))
|
||||||
prompt_batched_token_ids.append(prompt_token_ids)
|
prompt_batched_token_ids.append(prompt_token_ids)
|
||||||
del current_req_dict
|
del current_req_dict
|
||||||
except ParameterError as e:
|
except ParameterError as e:
|
||||||
@@ -180,7 +180,7 @@ class OpenAIServingCompletion:
|
|||||||
created_time=created_time,
|
created_time=created_time,
|
||||||
model_name=request.model,
|
model_name=request.model,
|
||||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||||
text_after_process_list=text_after_process_list,
|
prompt_tokens_list=prompt_tokens_list,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
@@ -191,7 +191,7 @@ class OpenAIServingCompletion:
|
|||||||
created_time=created_time,
|
created_time=created_time,
|
||||||
model_name=request.model,
|
model_name=request.model,
|
||||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||||
text_after_process_list=text_after_process_list,
|
prompt_tokens_list=prompt_tokens_list,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = (
|
error_msg = (
|
||||||
@@ -213,7 +213,7 @@ class OpenAIServingCompletion:
|
|||||||
created_time: int,
|
created_time: int,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
prompt_batched_token_ids: list(),
|
prompt_batched_token_ids: list(),
|
||||||
text_after_process_list: list(),
|
prompt_tokens_list: list(),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Process the full completion request with multiple choices.
|
Process the full completion request with multiple choices.
|
||||||
@@ -292,7 +292,7 @@ class OpenAIServingCompletion:
|
|||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||||
completion_batched_token_ids=completion_batched_token_ids,
|
completion_batched_token_ids=completion_batched_token_ids,
|
||||||
text_after_process_list=text_after_process_list,
|
prompt_tokens_list=prompt_tokens_list,
|
||||||
)
|
)
|
||||||
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
||||||
return res
|
return res
|
||||||
@@ -344,7 +344,7 @@ class OpenAIServingCompletion:
|
|||||||
created_time: int,
|
created_time: int,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
prompt_batched_token_ids: list(),
|
prompt_batched_token_ids: list(),
|
||||||
text_after_process_list: list(),
|
prompt_tokens_list: list(),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Process the stream completion request.
|
Process the stream completion request.
|
||||||
@@ -408,8 +408,7 @@ class OpenAIServingCompletion:
|
|||||||
index=idx,
|
index=idx,
|
||||||
text="",
|
text="",
|
||||||
prompt_token_ids=list(prompt_batched_token_ids[idx]),
|
prompt_token_ids=list(prompt_batched_token_ids[idx]),
|
||||||
text_after_process=text_after_process_list[idx],
|
prompt_tokens=prompt_tokens_list[idx],
|
||||||
prompt_tokens=text_after_process_list[idx],
|
|
||||||
completion_token_ids=None,
|
completion_token_ids=None,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@@ -443,8 +442,7 @@ class OpenAIServingCompletion:
|
|||||||
prompt_token_ids=None,
|
prompt_token_ids=None,
|
||||||
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
|
completion_token_ids=output.get("token_ids") if request.return_token_ids else None,
|
||||||
tool_calls=None,
|
tool_calls=None,
|
||||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
|
||||||
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
|
|
||||||
reasoning_content="",
|
reasoning_content="",
|
||||||
arrival_time=arrival_time,
|
arrival_time=arrival_time,
|
||||||
logprobs=logprobs_res,
|
logprobs=logprobs_res,
|
||||||
@@ -522,7 +520,7 @@ class OpenAIServingCompletion:
|
|||||||
model_name: str,
|
model_name: str,
|
||||||
prompt_batched_token_ids: list(),
|
prompt_batched_token_ids: list(),
|
||||||
completion_batched_token_ids: list(),
|
completion_batched_token_ids: list(),
|
||||||
text_after_process_list: list(),
|
prompt_tokens_list: list(),
|
||||||
) -> CompletionResponse:
|
) -> CompletionResponse:
|
||||||
choices: List[CompletionResponseChoice] = []
|
choices: List[CompletionResponseChoice] = []
|
||||||
num_prompt_tokens = 0
|
num_prompt_tokens = 0
|
||||||
@@ -556,10 +554,8 @@ class OpenAIServingCompletion:
|
|||||||
text=output_text,
|
text=output_text,
|
||||||
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
|
prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
|
||||||
completion_token_ids=completion_token_ids if request.return_token_ids else None,
|
completion_token_ids=completion_token_ids if request.return_token_ids else None,
|
||||||
raw_prediction=output.get("raw_prediction") if request.return_token_ids else None,
|
completion_tokens=output.get("completion_tokens") if request.return_token_ids else None,
|
||||||
completion_tokens=output.get("raw_prediction") if request.return_token_ids else None,
|
prompt_tokens=prompt_tokens_list[idx] if request.return_token_ids else None,
|
||||||
text_after_process=text_after_process_list[idx] if request.return_token_ids else None,
|
|
||||||
prompt_tokens=text_after_process_list[idx] if request.return_token_ids else None,
|
|
||||||
reasoning_content=output.get("reasoning_content"),
|
reasoning_content=output.get("reasoning_content"),
|
||||||
tool_calls=output.get("tool_call"),
|
tool_calls=output.get("tool_call"),
|
||||||
logprobs=aggregated_logprobs,
|
logprobs=aggregated_logprobs,
|
||||||
|
|||||||
@@ -197,7 +197,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
|||||||
if isinstance(prompt, list): # if prompt is a token id list
|
if isinstance(prompt, list): # if prompt is a token id list
|
||||||
request["prompt_token_ids"] = prompt
|
request["prompt_token_ids"] = prompt
|
||||||
else:
|
else:
|
||||||
request["text_after_process"] = prompt
|
request["prompt_tokens"] = prompt
|
||||||
tokens = self.tokenizer.tokenize(prompt)
|
tokens = self.tokenizer.tokenize(prompt)
|
||||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||||
request["prompt_token_ids"] = token_ids
|
request["prompt_token_ids"] = token_ids
|
||||||
@@ -318,7 +318,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
|||||||
if tool_call_info.tools_called:
|
if tool_call_info.tools_called:
|
||||||
response_dict["outputs"]["tool_call"] = tool_call_info.tool_calls
|
response_dict["outputs"]["tool_call"] = tool_call_info.tool_calls
|
||||||
response_dict["outputs"]["text"] = tool_call_info.content
|
response_dict["outputs"]["text"] = tool_call_info.content
|
||||||
response_dict["outputs"]["raw_prediction"] = full_text
|
response_dict["outputs"]["completion_tokens"] = full_text
|
||||||
data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
|
data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
|
||||||
del self.decode_status[req_id]
|
del self.decode_status[req_id]
|
||||||
return response_dict
|
return response_dict
|
||||||
@@ -342,7 +342,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
|||||||
if token_ids[-1] == self.tokenizer.eos_token_id:
|
if token_ids[-1] == self.tokenizer.eos_token_id:
|
||||||
token_ids = token_ids[:-1]
|
token_ids = token_ids[:-1]
|
||||||
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
|
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||||
response_dict["outputs"]["raw_prediction"] = delta_text
|
response_dict["outputs"]["completion_tokens"] = delta_text
|
||||||
if self.reasoning_parser and (
|
if self.reasoning_parser and (
|
||||||
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
|
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
|
||||||
):
|
):
|
||||||
@@ -398,7 +398,7 @@ class Ernie4_5Processor(BaseDataProcessor):
|
|||||||
add_special_tokens=False,
|
add_special_tokens=False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
request_or_messages["text_after_process"] = spliced_message
|
request_or_messages["prompt_tokens"] = spliced_message
|
||||||
req_id = None
|
req_id = None
|
||||||
if isinstance(request_or_messages, dict):
|
if isinstance(request_or_messages, dict):
|
||||||
req_id = request_or_messages.get("request_id", None)
|
req_id = request_or_messages.get("request_id", None)
|
||||||
|
|||||||
@@ -222,7 +222,7 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
|||||||
self._check_mm_limits(multimodal_data)
|
self._check_mm_limits(multimodal_data)
|
||||||
images = multimodal_data.get("image", None)
|
images = multimodal_data.get("image", None)
|
||||||
videos = multimodal_data.get("video", None)
|
videos = multimodal_data.get("video", None)
|
||||||
request["text_after_process"] = request.get("prompt")
|
request["prompt_tokens"] = request.get("prompt")
|
||||||
outputs = self.ernie4_5_processor.text2ids(request["prompt"], images, videos)
|
outputs = self.ernie4_5_processor.text2ids(request["prompt"], images, videos)
|
||||||
elif request.get("messages"):
|
elif request.get("messages"):
|
||||||
messages = request["messages"]
|
messages = request["messages"]
|
||||||
|
|||||||
@@ -503,7 +503,7 @@ class DataProcessor:
|
|||||||
prompt_token_str = prompt_token_template.replace("<|image@placeholder|>", "").replace(
|
prompt_token_str = prompt_token_template.replace("<|image@placeholder|>", "").replace(
|
||||||
"<|video@placeholder|>", ""
|
"<|video@placeholder|>", ""
|
||||||
)
|
)
|
||||||
request["text_after_process"] = prompt_token_template
|
request["prompt_tokens"] = prompt_token_template
|
||||||
tokens = self.tokenizer.tokenize(prompt_token_str)
|
tokens = self.tokenizer.tokenize(prompt_token_str)
|
||||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||||
data_processor_logger.info(
|
data_processor_logger.info(
|
||||||
|
|||||||
@@ -495,7 +495,7 @@ class DataProcessor:
|
|||||||
add_generation_prompt=request.get("add_generation_prompt", True),
|
add_generation_prompt=request.get("add_generation_prompt", True),
|
||||||
)
|
)
|
||||||
prompt_token_str = raw_prompt.replace(self.image_token, "").replace(self.video_token, "")
|
prompt_token_str = raw_prompt.replace(self.image_token, "").replace(self.video_token, "")
|
||||||
request["text_after_process"] = raw_prompt
|
request["prompt_tokens"] = raw_prompt
|
||||||
|
|
||||||
tokens = self.tokenizer.tokenize(prompt_token_str)
|
tokens = self.tokenizer.tokenize(prompt_token_str)
|
||||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||||
|
|||||||
@@ -403,7 +403,7 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||||
if is_end:
|
if is_end:
|
||||||
full_text = previous_texts + delta_text
|
full_text = previous_texts + delta_text
|
||||||
response_dict["outputs"]["raw_prediction"] = full_text
|
response_dict["outputs"]["completion_tokens"] = full_text
|
||||||
if enable_thinking and self.reasoning_parser:
|
if enable_thinking and self.reasoning_parser:
|
||||||
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
|
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
|
||||||
response_dict["outputs"]["text"] = text
|
response_dict["outputs"]["text"] = text
|
||||||
@@ -439,7 +439,7 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
if token_ids[-1] in self.eos_token_ids:
|
if token_ids[-1] in self.eos_token_ids:
|
||||||
token_ids = token_ids[:-1]
|
token_ids = token_ids[:-1]
|
||||||
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
|
delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||||
response_dict["outputs"]["raw_prediction"] = delta_text
|
response_dict["outputs"]["completion_tokens"] = delta_text
|
||||||
if self.reasoning_parser and (
|
if self.reasoning_parser and (
|
||||||
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
|
enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
|
||||||
):
|
):
|
||||||
@@ -548,7 +548,7 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
return_tensors="pd",
|
return_tensors="pd",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
request["text_after_process"] = spliced_message
|
request["prompt_tokens"] = spliced_message
|
||||||
req_id = None
|
req_id = None
|
||||||
tokens = self.tokenizer.tokenize(spliced_message)
|
tokens = self.tokenizer.tokenize(spliced_message)
|
||||||
if isinstance(request, dict):
|
if isinstance(request, dict):
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ from core import TEMPLATE, URL, build_request_payload, send_request
|
|||||||
COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
|
COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
|
||||||
|
|
||||||
|
|
||||||
def test_completion_stream_text_after_process_raw_prediction():
|
def test_completion_stream_prompt_tokens_completion_tokens():
|
||||||
"""
|
"""
|
||||||
/v1/completions接口, stream=True
|
/v1/completions接口, stream=True
|
||||||
返回属性"text_after_process"和"reasoning_content"
|
return "prompt_tokens"和"reasoning_content"
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"prompt": "你是谁",
|
"prompt": "你是谁",
|
||||||
@@ -39,55 +39,55 @@ def test_completion_stream_text_after_process_raw_prediction():
|
|||||||
|
|
||||||
choice = response_data["choices"][0]
|
choice = response_data["choices"][0]
|
||||||
if "prompt_token_ids" in choice and choice["prompt_token_ids"] is not None:
|
if "prompt_token_ids" in choice and choice["prompt_token_ids"] is not None:
|
||||||
text_after_process = choice["text_after_process"]
|
prompt_tokens = choice["prompt_tokens"]
|
||||||
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
|
assert data["prompt"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||||
else:
|
else:
|
||||||
raw_prediction = choice["raw_prediction"]
|
completion_tokens = choice["completion_tokens"]
|
||||||
reasoning_content = choice["reasoning_content"]
|
reasoning_content = choice["reasoning_content"]
|
||||||
text = choice["text"]
|
text = choice["text"]
|
||||||
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
|
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
|
||||||
if "finish_reason" in line.strip():
|
if "finish_reason" in line.strip():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def test_completion_text_after_process_raw_predictio_return_token_ids():
|
def test_completion_prompt_tokens_completion_tokens_return_token_ids():
|
||||||
"""
|
"""
|
||||||
/v1/completions接口,非流式接口
|
/v1/completions接口,非流式接口
|
||||||
返回属性"text_after_process"和"reasoning_content"
|
return "prompt_tokens"和"reasoning_content"
|
||||||
"""
|
"""
|
||||||
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50, "return_token_ids": True}
|
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50, "return_token_ids": True}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(COMPLETIONS_URL, payload).json()
|
resp = send_request(COMPLETIONS_URL, payload).json()
|
||||||
|
|
||||||
text_after_process = resp["choices"][0]["text_after_process"]
|
prompt_tokens = resp["choices"][0]["prompt_tokens"]
|
||||||
assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
|
assert data["prompt"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||||
|
|
||||||
raw_prediction = resp["choices"][0]["raw_prediction"]
|
completion_tokens = resp["choices"][0]["completion_tokens"]
|
||||||
reasoning_content = resp["choices"][0]["reasoning_content"]
|
reasoning_content = resp["choices"][0]["reasoning_content"]
|
||||||
text = resp["choices"][0]["text"]
|
text = resp["choices"][0]["text"]
|
||||||
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
|
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
|
||||||
|
|
||||||
|
|
||||||
def test_completion_text_after_process_raw_prediction():
|
def test_completion_prompt_tokens_completion_tokens():
|
||||||
"""
|
"""
|
||||||
/v1/completions接口,无return_token_ids参数
|
/v1/completions接口,无return_token_ids参数
|
||||||
非流式接口中,无return token ids 属性"text_after_process"和"reasoning_content"值为null
|
非流式接口中,无return token ids 属性"prompt_tokens"和"reasoning_content"值为null
|
||||||
"""
|
"""
|
||||||
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50}
|
data = {"stream": False, "prompt": "你是谁", "max_tokens": 50}
|
||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(COMPLETIONS_URL, payload).json()
|
resp = send_request(COMPLETIONS_URL, payload).json()
|
||||||
|
|
||||||
text_after_process = resp["choices"][0]["text_after_process"]
|
prompt_tokens = resp["choices"][0]["prompt_tokens"]
|
||||||
assert text_after_process is None, "text_after_process取值结果不正确"
|
assert prompt_tokens is None, "prompt_tokens取值结果不正确"
|
||||||
|
|
||||||
raw_prediction = resp["choices"][0]["raw_prediction"]
|
completion_tokens = resp["choices"][0]["completion_tokens"]
|
||||||
assert raw_prediction is None, "raw_prediction取值结果不正确"
|
assert completion_tokens is None, "completion_tokens取值结果不正确"
|
||||||
|
|
||||||
|
|
||||||
def test_stream_text_after_process_raw_prediction():
|
def test_stream_prompt_tokens_completion_tokens():
|
||||||
"""
|
"""
|
||||||
/v1/chat/completions接口,"stream": True
|
/v1/chat/completions接口,"stream": True
|
||||||
返回属性"text_after_process"和"reasoning_content"
|
返回属性"prompt_tokens"和"reasoning_content"
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"messages": [{"role": "user", "content": "你是谁"}],
|
"messages": [{"role": "user", "content": "你是谁"}],
|
||||||
@@ -109,21 +109,21 @@ def test_stream_text_after_process_raw_prediction():
|
|||||||
|
|
||||||
choice = response_data["choices"][0]
|
choice = response_data["choices"][0]
|
||||||
if "prompt_token_ids" in choice["delta"] and choice["delta"]["prompt_token_ids"] is not None:
|
if "prompt_token_ids" in choice["delta"] and choice["delta"]["prompt_token_ids"] is not None:
|
||||||
text_after_process = choice["delta"]["text_after_process"]
|
prompt_tokens = choice["delta"]["prompt_tokens"]
|
||||||
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
|
assert data["messages"][0]["content"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||||
else:
|
else:
|
||||||
raw_prediction = choice["delta"]["raw_prediction"]
|
completion_tokens = choice["delta"]["completion_tokens"]
|
||||||
reasoning_content = choice["delta"]["reasoning_content"]
|
reasoning_content = choice["delta"]["reasoning_content"]
|
||||||
content = choice["delta"]["content"]
|
content = choice["delta"]["content"]
|
||||||
assert reasoning_content or content in raw_prediction, "raw_prediction取值结果不正确"
|
assert reasoning_content or content in completion_tokens, "completion_tokens取值结果不正确"
|
||||||
if "finish_reason" in line.strip():
|
if "finish_reason" in line.strip():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def test_text_after_process_raw_prediction_return_token_ids():
|
def test_prompt_tokens_completion_tokens_return_token_ids():
|
||||||
"""
|
"""
|
||||||
/v1/chat/completions接口,非流式接口
|
/v1/chat/completions接口,非流式接口
|
||||||
返回属性"text_after_process"和"reasoning_content"
|
返回属性"prompt_tokens"和"reasoning_content"
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@@ -136,19 +136,19 @@ def test_text_after_process_raw_prediction_return_token_ids():
|
|||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
|
|
||||||
text_after_process = resp["choices"][0]["message"]["text_after_process"]
|
prompt_tokens = resp["choices"][0]["message"]["prompt_tokens"]
|
||||||
assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
|
assert data["messages"][0]["content"] in prompt_tokens, "prompt_tokens取值结果不正确"
|
||||||
|
|
||||||
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
|
completion_tokens = resp["choices"][0]["message"]["completion_tokens"]
|
||||||
reasoning_content = resp["choices"][0]["message"]["reasoning_content"]
|
reasoning_content = resp["choices"][0]["message"]["reasoning_content"]
|
||||||
text = resp["choices"][0]["message"]["content"]
|
text = resp["choices"][0]["message"]["content"]
|
||||||
assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
|
assert reasoning_content or text in completion_tokens, "completion_tokens取值结果不正确"
|
||||||
|
|
||||||
|
|
||||||
def test_text_after_process_raw_prediction():
|
def test_prompt_tokens_completion_tokens():
|
||||||
"""
|
"""
|
||||||
/v1/chat/completions接口,无return_token_ids参数
|
/v1/chat/completions接口,无return_token_ids参数
|
||||||
无return token ids 属性"text_after_process"和"reasoning_content"值为null
|
无return token ids 属性"prompt_tokens"和"reasoning_content"值为null
|
||||||
"""
|
"""
|
||||||
data = {
|
data = {
|
||||||
"stream": False,
|
"stream": False,
|
||||||
@@ -160,8 +160,8 @@ def test_text_after_process_raw_prediction():
|
|||||||
payload = build_request_payload(TEMPLATE, data)
|
payload = build_request_payload(TEMPLATE, data)
|
||||||
resp = send_request(URL, payload).json()
|
resp = send_request(URL, payload).json()
|
||||||
|
|
||||||
text_after_process = resp["choices"][0]["message"]["text_after_process"]
|
prompt_tokens = resp["choices"][0]["message"]["prompt_tokens"]
|
||||||
assert text_after_process is None, "text_after_process取值结果不正确"
|
assert prompt_tokens is None, "prompt_tokens取值结果不正确"
|
||||||
|
|
||||||
raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
|
completion_tokens = resp["choices"][0]["message"]["completion_tokens"]
|
||||||
assert raw_prediction is None, "raw_prediction取值结果不正确"
|
assert completion_tokens is None, "completion_tokens取值结果不正确"
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
|||||||
model_name="test_model",
|
model_name="test_model",
|
||||||
prompt_batched_token_ids=[[1, 2]],
|
prompt_batched_token_ids=[[1, 2]],
|
||||||
completion_batched_token_ids=[[3, 4, 5]],
|
completion_batched_token_ids=[[3, 4, 5]],
|
||||||
text_after_process_list=["test prompt"],
|
prompt_tokens_list=["test prompt"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(response.choices[0].text, "test prompt generated text")
|
self.assertEqual(response.choices[0].text, "test prompt generated text")
|
||||||
@@ -90,7 +90,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
|||||||
model_name="test_model",
|
model_name="test_model",
|
||||||
prompt_batched_token_ids=[[1, 2]],
|
prompt_batched_token_ids=[[1, 2]],
|
||||||
completion_batched_token_ids=[[3, 4, 5]],
|
completion_batched_token_ids=[[3, 4, 5]],
|
||||||
text_after_process_list=["test prompt"],
|
prompt_tokens_list=["test prompt"],
|
||||||
)
|
)
|
||||||
self.assertEqual(response.choices[0].text, "decoded_[1, 2, 3] generated text")
|
self.assertEqual(response.choices[0].text, "decoded_[1, 2, 3] generated text")
|
||||||
|
|
||||||
@@ -123,7 +123,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
|||||||
model_name="test_model",
|
model_name="test_model",
|
||||||
prompt_batched_token_ids=[[1], [2]],
|
prompt_batched_token_ids=[[1], [2]],
|
||||||
completion_batched_token_ids=[[1, 2], [3, 4]],
|
completion_batched_token_ids=[[1, 2], [3, 4]],
|
||||||
text_after_process_list=["prompt1", "prompt2"],
|
prompt_tokens_list=["prompt1", "prompt2"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(len(response.choices), 2)
|
self.assertEqual(len(response.choices), 2)
|
||||||
@@ -159,7 +159,7 @@ class TestCompletionEcho(unittest.IsolatedAsyncioTestCase):
|
|||||||
model_name="test_model",
|
model_name="test_model",
|
||||||
prompt_batched_token_ids=[[1], [2]],
|
prompt_batched_token_ids=[[1], [2]],
|
||||||
completion_batched_token_ids=[[1, 2], [3, 4]],
|
completion_batched_token_ids=[[1, 2], [3, 4]],
|
||||||
text_after_process_list=["prompt1", "prompt2"],
|
prompt_tokens_list=["prompt1", "prompt2"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(len(response.choices), 2)
|
self.assertEqual(len(response.choices), 2)
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
|||||||
request_id="test-request-id",
|
request_id="test-request-id",
|
||||||
model_name="test-model",
|
model_name="test-model",
|
||||||
prompt_token_ids=[1, 2, 3],
|
prompt_token_ids=[1, 2, 3],
|
||||||
text_after_process="Hello",
|
prompt_tokens="Hello",
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
@@ -242,7 +242,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
|||||||
model_name="test-model",
|
model_name="test-model",
|
||||||
created_time=11,
|
created_time=11,
|
||||||
prompt_batched_token_ids=[[1, 2, 3]],
|
prompt_batched_token_ids=[[1, 2, 3]],
|
||||||
text_after_process_list=["Hello"],
|
prompt_tokens_list=["Hello"],
|
||||||
)
|
)
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
|
|||||||
@@ -54,8 +54,8 @@ INVALID_INPUT_BATCH = """
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
BATCH_RESPONSE = """
|
BATCH_RESPONSE = """
|
||||||
{"id":"fastdeploy-7fcc30e2e4334fca806c4d01ee7ac4ab","custom_id":"req-00001","response":{"status_code":200,"request_id":"fastdeploy-batch-5f4017beded84b15aa3a8b0f1fce154c","body":{"id":"chatcmpl-33b09ae5-a8f1-40ad-9110-efa2b381eac9","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"In a sunlit meadow where dreams bloom,\\nA gentle breeze carries the breeze,\\nThe leaves rustle like ancient letters,\\nAnd in the sky, a song of hope and love.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":19,"total_tokens":60,"completion_tokens":41,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
{"id":"fastdeploy-7fcc30e2e4334fca806c4d01ee7ac4ab","custom_id":"req-00001","response":{"status_code":200,"request_id":"fastdeploy-batch-5f4017beded84b15aa3a8b0f1fce154c","body":{"id":"chatcmpl-33b09ae5-a8f1-40ad-9110-efa2b381eac9","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"In a sunlit meadow where dreams bloom,\\nA gentle breeze carries the breeze,\\nThe leaves rustle like ancient letters,\\nAnd in the sky, a song of hope and love.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":19,"total_tokens":60,"completion_tokens":41,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
||||||
{"id":"fastdeploy-bf549849df2145598ae1758ba260f784","custom_id":"req-00002","response":{"status_code":200,"request_id":"fastdeploy-batch-81223f12fdc345efbfe85114ced10a1d","body":{"id":"chatcmpl-9479e36c-1542-45ff-b364-1dc6d34be9e7","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"Based on the given text, here are some possible actions you can take:\\n\\n1. **Read the question**: To understand what you can do, you can read the question (id=2) and analyze its requirements or constraints.\\n2. **Identify the keywords**: Look for specific keywords or phrases that describe what you can do. For example, if the question mentions \\"coding,\\" you can focus on coding skills or platforms.\\n3. **Brainstorm ideas**: You can think creatively about different ways to perform the action. For example, you could brainstorm different methods of communication, data analysis, or problem-solving.\\n4. **Explain your action**: If you have knowledge or skills in a particular area, you can explain how you would use those skills to achieve the desired outcome.\\n5. **Ask for help**: If you need assistance, you can ask for help from a friend, teacher, or mentor.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"text_after_process":null,"raw_prediction":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":17,"total_tokens":211,"completion_tokens":194,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
{"id":"fastdeploy-bf549849df2145598ae1758ba260f784","custom_id":"req-00002","response":{"status_code":200,"request_id":"fastdeploy-batch-81223f12fdc345efbfe85114ced10a1d","body":{"id":"chatcmpl-9479e36c-1542-45ff-b364-1dc6d34be9e7","object":"chat.completion","created":1758698637,"model":"/root/paddlejob/zhaolei36/ernie-4_5-0_3b-bf16-paddle","choices":[{"index":0,"message":{"role":"assistant","content":"Based on the given text, here are some possible actions you can take:\\n\\n1. **Read the question**: To understand what you can do, you can read the question (id=2) and analyze its requirements or constraints.\\n2. **Identify the keywords**: Look for specific keywords or phrases that describe what you can do. For example, if the question mentions \\"coding,\\" you can focus on coding skills or platforms.\\n3. **Brainstorm ideas**: You can think creatively about different ways to perform the action. For example, you could brainstorm different methods of communication, data analysis, or problem-solving.\\n4. **Explain your action**: If you have knowledge or skills in a particular area, you can explain how you would use those skills to achieve the desired outcome.\\n5. **Ask for help**: If you need assistance, you can ask for help from a friend, teacher, or mentor.","multimodal_content":null,"reasoning_content":null,"tool_calls":null,"prompt_token_ids":null,"completion_token_ids":null,"prompt_tokens":null,"completion_tokens":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":17,"total_tokens":211,"completion_tokens":194,"prompt_tokens_details":{"cached_tokens":0}}}},"error":null}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -867,8 +867,6 @@ class TestFileOperations(unittest.TestCase):
|
|||||||
tool_calls=message_data["tool_calls"],
|
tool_calls=message_data["tool_calls"],
|
||||||
prompt_token_ids=message_data["prompt_token_ids"],
|
prompt_token_ids=message_data["prompt_token_ids"],
|
||||||
completion_token_ids=message_data["completion_token_ids"],
|
completion_token_ids=message_data["completion_token_ids"],
|
||||||
text_after_process=message_data["text_after_process"],
|
|
||||||
raw_prediction=message_data["raw_prediction"],
|
|
||||||
prompt_tokens=message_data["prompt_tokens"],
|
prompt_tokens=message_data["prompt_tokens"],
|
||||||
completion_tokens=message_data["completion_tokens"],
|
completion_tokens=message_data["completion_tokens"],
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ class TestOpenAIServingCompletion(unittest.TestCase):
|
|||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
prompt_batched_token_ids=prompt_batched_token_ids,
|
prompt_batched_token_ids=prompt_batched_token_ids,
|
||||||
completion_batched_token_ids=completion_batched_token_ids,
|
completion_batched_token_ids=completion_batched_token_ids,
|
||||||
text_after_process_list=["1", "1"],
|
prompt_tokens_list=["1", "1"],
|
||||||
)
|
)
|
||||||
|
|
||||||
assert completion_response.id == request_id
|
assert completion_response.id == request_id
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class TestErnie4_5ProcessorProcessResponseDictStreaming(unittest.TestCase):
|
|||||||
result = self.processor.process_response_dict_streaming(response_dict, **kwargs)
|
result = self.processor.process_response_dict_streaming(response_dict, **kwargs)
|
||||||
|
|
||||||
# 验证结果
|
# 验证结果
|
||||||
self.assertEqual(result["outputs"]["raw_prediction"], "delta_text")
|
self.assertEqual(result["outputs"]["completion_tokens"], "delta_text")
|
||||||
|
|
||||||
def test_process_request_dict(self):
|
def test_process_request_dict(self):
|
||||||
request_dict = {
|
request_dict = {
|
||||||
|
|||||||
@@ -276,7 +276,7 @@ class TestQwenVLProcessor(unittest.TestCase):
|
|||||||
# Create equivalent request in prompt format
|
# Create equivalent request in prompt format
|
||||||
prompt = {
|
prompt = {
|
||||||
"request_id": "12345",
|
"request_id": "12345",
|
||||||
"prompt": request["text_after_process"],
|
"prompt": request["prompt_tokens"],
|
||||||
"multimodal_data": {
|
"multimodal_data": {
|
||||||
"image": [mock_pil_image(480, 640)],
|
"image": [mock_pil_image(480, 640)],
|
||||||
"video": [{"video": b"123"}],
|
"video": [{"video": b"123"}],
|
||||||
@@ -300,7 +300,7 @@ class TestQwenVLProcessor(unittest.TestCase):
|
|||||||
|
|
||||||
This test verifies that:
|
This test verifies that:
|
||||||
- The processor correctly handles multimodal messages (image, video, text)
|
- The processor correctly handles multimodal messages (image, video, text)
|
||||||
- The text_after_process field matches the output from direct tokenizer application
|
- The prompt_tokens field matches the output from direct tokenizer application
|
||||||
- The chat template application preserves the message structure and content
|
- The chat template application preserves the message structure and content
|
||||||
|
|
||||||
Test Steps:
|
Test Steps:
|
||||||
@@ -345,7 +345,7 @@ class TestQwenVLProcessor(unittest.TestCase):
|
|||||||
|
|
||||||
# Process request through the processor
|
# Process request through the processor
|
||||||
self.processor.process_request_dict(request, 1024 * 100)
|
self.processor.process_request_dict(request, 1024 * 100)
|
||||||
prompt2 = request["text_after_process"]
|
prompt2 = request["prompt_tokens"]
|
||||||
|
|
||||||
# Verify both methods produce identical prompt strings
|
# Verify both methods produce identical prompt strings
|
||||||
self.assertEqual(prompt, prompt2)
|
self.assertEqual(prompt, prompt2)
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def mock_chat_completion_full_generator(
|
async def mock_chat_completion_full_generator(
|
||||||
request, request_id, model_name, prompt_token_ids, text_after_process
|
request, request_id, model_name, prompt_token_ids, prompt_tokens
|
||||||
):
|
):
|
||||||
return prompt_token_ids
|
return prompt_token_ids
|
||||||
|
|
||||||
@@ -89,7 +89,7 @@ class TestLodChatTemplate(unittest.IsolatedAsyncioTestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def mock_chat_completion_full_generator(
|
async def mock_chat_completion_full_generator(
|
||||||
request, request_id, model_name, prompt_token_ids, text_after_process
|
request, request_id, model_name, prompt_token_ids, prompt_tokens
|
||||||
):
|
):
|
||||||
return prompt_token_ids
|
return prompt_token_ids
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user