[DataProcessor] add reasoning_tokens into usage info (#4520)
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* add reasoning_tokens into usage info initial commit

* add unit tests

* modify unit test

* modify and add unit tests

* fix unit test

* move steam usage to processor

* modify processor

* modify test_logprobs

* modify test_logprobs.py

* modify stream reasoning tokens accumulation

* fix unit test
This commit is contained in:
kxz2002
2025-10-25 16:57:58 +08:00
committed by GitHub
parent e4e3cede7f
commit 327fa4c255
11 changed files with 390 additions and 2 deletions
@@ -30,6 +30,7 @@ from fastdeploy.entrypoints.openai.protocol import (
CompletionResponseChoice,
CompletionResponseStreamChoice,
CompletionStreamResponse,
CompletionTokenUsageInfo,
ErrorInfo,
ErrorResponse,
UsageInfo,
@@ -370,6 +371,7 @@ class OpenAIServingCompletion:
dealer.write([b"", req_id.encode("utf-8")]) # 发送多路请求
output_tokens = [0] * num_choices
inference_start_time = [0] * num_choices
reasoning_tokens = [0] * num_choices
first_iteration = [True] * num_choices
tool_called = [False] * num_choices
max_streaming_response_tokens = (
@@ -458,6 +460,7 @@ class OpenAIServingCompletion:
output_draft_top_logprobs, request.logprobs, 0
)
output_tokens[idx] += 1
reasoning_tokens[idx] += output.get("reasoning_token_num", 0)
delta_message = CompletionResponseStreamChoice(
index=idx,
text=output["text"],
@@ -524,6 +527,9 @@ class OpenAIServingCompletion:
prompt_batched_token_ids[idx // (1 if request.n is None else request.n)]
)
+ output_tokens[idx],
completion_tokens_details=CompletionTokenUsageInfo(
reasoning_tokens=reasoning_tokens[idx]
),
),
)
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
@@ -553,6 +559,7 @@ class OpenAIServingCompletion:
choices: List[CompletionResponseChoice] = []
num_prompt_tokens = 0
num_generated_tokens = 0
num_reasoning_tokens = 0
for idx in range(len(final_res_batch)):
final_res = final_res_batch[idx]
@@ -608,11 +615,14 @@ class OpenAIServingCompletion:
num_prompt_tokens += len(prompt_token_ids)
num_reasoning_tokens += output.get("reasoning_token_num", 0)
num_prompt_tokens = num_prompt_tokens // (1 if request.n is None else request.n)
usage = UsageInfo(
prompt_tokens=num_prompt_tokens,
completion_tokens=num_generated_tokens,
total_tokens=num_prompt_tokens + num_generated_tokens,
completion_tokens_details=CompletionTokenUsageInfo(reasoning_tokens=num_reasoning_tokens),
)
del request