[Optimization] The pre- and post-processing pipeline do not perform dict conversion (#5494)

* to_request_for_infer initial commit

* refact to from_chat_completion_request

* preprocess use request initial commit

* bugfix

* processors refact to using request

* bug fix

* refact Request from_generic_request

* post process initial commit

* bugfix

* postprocess second commit

* bugfix

* serving_embedding initial commit

* serving_reward initial commit

* bugfix

* replace function name

* async_llm initial commit

* offline initial commit and fix bug

* bugfix

* fix async_llm

* remove add speculate_metrics into data

* fix logprobs bug

* fix echo bug

* fix bug

* fix reasoning_max_tokens

* bugfix

* bugfix and modify unittest

* bugfix and modify unit test

* bugfix

* bugfix

* bugfix

* modify unittest

* fix error when reasong_content is none for text_processor

* remove some unnessary logic

* revert removed logic

* implement add and set method for RequestOutput and refact code

* modify unit test

* modify unit test

* union process_request and process_request_obj

* remove a unit test

* union process_response and process_response_obj

* support qwen3_vl_processor

* modify unittest and remove comments

* fix prompt_logprobs

* fix codestyle

* add v1

* v1

* fix unit test

* fix unit test

* fix pre-commit

* fix

* add process request

* add process request

* fix

* fix

* fix unit test

* fix unit test

* fix unit test

* fix unit test

* fix unit test

* remove file

* add unit test

* add unit test

* add unit test

* fix unit test

* fix unit test

* fix

* fix

---------

Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
Co-authored-by: luukunn <981429396@qq.com>
Co-authored-by: luukunn <83932082+luukunn@users.noreply.github.com>
Co-authored-by: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com>
This commit is contained in:
kxz2002
2026-01-22 00:50:52 +08:00
committed by GitHub
parent fe5ba4b509
commit 6e416c62dd
66 changed files with 16614 additions and 739 deletions
+99 -2
View File
@@ -19,10 +19,12 @@ from __future__ import annotations
import random
from dataclasses import dataclass, fields
from enum import Enum
from typing import Any, List, Optional, Union
from typing import Any, List, Optional, TypeVar, Union
from fastdeploy import envs
T = TypeVar("T")
@dataclass
class SamplingParams:
@@ -118,6 +120,101 @@ class SamplingParams:
}
)
@classmethod
def from_generic_request(cls, req: T) -> SamplingParams:
logprobs_val = None
if hasattr(req, "top_logprobs"):
if getattr(req, "logprobs", None):
logprobs_val = getattr(req, "top_logprobs", None)
else:
logprobs_val = getattr(req, "logprobs", None)
max_tokens_val = (
req.max_completion_tokens or getattr(req, "max_tokens", cls.max_tokens)
if hasattr(req, "max_completion_tokens")
else getattr(req, "max_tokens", cls.max_tokens)
)
return cls(
n=getattr(req, "n", None) if getattr(req, "n", None) is not None else cls.n,
best_of=getattr(req, "best_of", None) if getattr(req, "best_of", None) is not None else cls.best_of,
presence_penalty=(
getattr(req, "presence_penalty", None)
if getattr(req, "presence_penalty", None) is not None
else cls.presence_penalty
),
frequency_penalty=(
getattr(req, "frequency_penalty", None)
if getattr(req, "frequency_penalty", None) is not None
else cls.frequency_penalty
),
repetition_penalty=(
getattr(req, "repetition_penalty", None)
if getattr(req, "repetition_penalty", None) is not None
else cls.repetition_penalty
),
temperature=(
getattr(req, "temperature", None) if getattr(req, "temperature", None) is not None else cls.temperature
),
top_p=getattr(req, "top_p", None) if getattr(req, "top_p", None) is not None else cls.top_p,
top_k=getattr(req, "top_k", None) if getattr(req, "top_k", None) is not None else cls.top_k,
min_p=getattr(req, "min_p", None) if getattr(req, "min_p", None) is not None else cls.min_p,
seed=getattr(req, "seed", None) if getattr(req, "seed", None) is not None else cls.seed,
stop=getattr(req, "stop", None) if getattr(req, "stop", None) is not None else cls.stop,
stop_token_ids=(
getattr(req, "stop_token_ids", None)
if getattr(req, "stop_token_ids", None) is not None
else cls.stop_token_ids
),
stop_seqs_len=(
getattr(req, "stop_seqs_len", None)
if getattr(req, "stop_seqs_len", None) is not None
else cls.stop_seqs_len
),
max_tokens=max_tokens_val,
reasoning_max_tokens=(
getattr(req, "reasoning_max_tokens", None)
if getattr(req, "reasoning_max_tokens", None) is not None
else cls.reasoning_max_tokens
),
min_tokens=(
getattr(req, "min_tokens", None) if getattr(req, "min_tokens", None) is not None else cls.min_tokens
),
logprobs=logprobs_val,
prompt_logprobs=(
getattr(req, "prompt_logprobs", None)
if getattr(req, "prompt_logprobs", None) is not None
else cls.prompt_logprobs
),
temp_scaled_logprobs=(
getattr(req, "temp_scaled_logprobs", None)
if getattr(req, "temp_scaled_logprobs", None) is not None
else cls.temp_scaled_logprobs
),
top_p_normalized_logprobs=(
getattr(req, "top_p_normalized_logprobs", None)
if getattr(req, "top_p_normalized_logprobs", None) is not None
else cls.top_p_normalized_logprobs
),
bad_words=(
getattr(req, "bad_words", None) if getattr(req, "bad_words", None) is not None else cls.bad_words
),
guided_decoding=(
getattr(req, "guided_decoding", None)
if getattr(req, "guided_decoding", None) is not None
else cls.guided_decoding
),
bad_words_token_ids=(
getattr(req, "bad_words_token_ids", None)
if getattr(req, "bad_words_token_ids", None) is not None
else cls.bad_words_token_ids
),
logits_processors_args=(
getattr(req, "logits_processors_args", None)
if getattr(req, "logits_processors_args", None) is not None
else cls.logits_processors_args
),
)
@classmethod
def from_optional(
cls,
@@ -200,7 +297,7 @@ class SamplingParams:
raise ValueError(f"max_tokens must be at least 1, got {self.max_tokens}.")
if self.reasoning_max_tokens is not None and self.reasoning_max_tokens > self.max_tokens:
raise ValueError(f"reasoning_max_tokens must be less than max_tokens, got {self.reasoning_max_tokens}.")
self.reasoning_max_tokens = self.max_tokens
if self.min_tokens < 0:
raise ValueError(f"min_tokens must be greater than or equal to 0, " f"got {self.min_tokens}.")