mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[Optimization] The pre- and post-processing pipeline do not perform dict conversion (#5494)
* to_request_for_infer initial commit * refact to from_chat_completion_request * preprocess use request initial commit * bugfix * processors refact to using request * bug fix * refact Request from_generic_request * post process initial commit * bugfix * postprocess second commit * bugfix * serving_embedding initial commit * serving_reward initial commit * bugfix * replace function name * async_llm initial commit * offline initial commit and fix bug * bugfix * fix async_llm * remove add speculate_metrics into data * fix logprobs bug * fix echo bug * fix bug * fix reasoning_max_tokens * bugfix * bugfix and modify unittest * bugfix and modify unit test * bugfix * bugfix * bugfix * modify unittest * fix error when reasong_content is none for text_processor * remove some unnessary logic * revert removed logic * implement add and set method for RequestOutput and refact code * modify unit test * modify unit test * union process_request and process_request_obj * remove a unit test * union process_response and process_response_obj * support qwen3_vl_processor * modify unittest and remove comments * fix prompt_logprobs * fix codestyle * add v1 * v1 * fix unit test * fix unit test * fix pre-commit * fix * add process request * add process request * fix * fix * fix unit test * fix unit test * fix unit test * fix unit test * fix unit test * remove file * add unit test * add unit test * add unit test * fix unit test * fix unit test * fix * fix --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com> Co-authored-by: luukunn <981429396@qq.com> Co-authored-by: luukunn <83932082+luukunn@users.noreply.github.com> Co-authored-by: Zhang Yulong <35552275+ZhangYulongg@users.noreply.github.com>
This commit is contained in:
@@ -19,10 +19,12 @@ from __future__ import annotations
|
||||
import random
|
||||
from dataclasses import dataclass, fields
|
||||
from enum import Enum
|
||||
from typing import Any, List, Optional, Union
|
||||
from typing import Any, List, Optional, TypeVar, Union
|
||||
|
||||
from fastdeploy import envs
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SamplingParams:
|
||||
@@ -118,6 +120,101 @@ class SamplingParams:
|
||||
}
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_generic_request(cls, req: T) -> SamplingParams:
|
||||
logprobs_val = None
|
||||
if hasattr(req, "top_logprobs"):
|
||||
if getattr(req, "logprobs", None):
|
||||
logprobs_val = getattr(req, "top_logprobs", None)
|
||||
else:
|
||||
logprobs_val = getattr(req, "logprobs", None)
|
||||
max_tokens_val = (
|
||||
req.max_completion_tokens or getattr(req, "max_tokens", cls.max_tokens)
|
||||
if hasattr(req, "max_completion_tokens")
|
||||
else getattr(req, "max_tokens", cls.max_tokens)
|
||||
)
|
||||
|
||||
return cls(
|
||||
n=getattr(req, "n", None) if getattr(req, "n", None) is not None else cls.n,
|
||||
best_of=getattr(req, "best_of", None) if getattr(req, "best_of", None) is not None else cls.best_of,
|
||||
presence_penalty=(
|
||||
getattr(req, "presence_penalty", None)
|
||||
if getattr(req, "presence_penalty", None) is not None
|
||||
else cls.presence_penalty
|
||||
),
|
||||
frequency_penalty=(
|
||||
getattr(req, "frequency_penalty", None)
|
||||
if getattr(req, "frequency_penalty", None) is not None
|
||||
else cls.frequency_penalty
|
||||
),
|
||||
repetition_penalty=(
|
||||
getattr(req, "repetition_penalty", None)
|
||||
if getattr(req, "repetition_penalty", None) is not None
|
||||
else cls.repetition_penalty
|
||||
),
|
||||
temperature=(
|
||||
getattr(req, "temperature", None) if getattr(req, "temperature", None) is not None else cls.temperature
|
||||
),
|
||||
top_p=getattr(req, "top_p", None) if getattr(req, "top_p", None) is not None else cls.top_p,
|
||||
top_k=getattr(req, "top_k", None) if getattr(req, "top_k", None) is not None else cls.top_k,
|
||||
min_p=getattr(req, "min_p", None) if getattr(req, "min_p", None) is not None else cls.min_p,
|
||||
seed=getattr(req, "seed", None) if getattr(req, "seed", None) is not None else cls.seed,
|
||||
stop=getattr(req, "stop", None) if getattr(req, "stop", None) is not None else cls.stop,
|
||||
stop_token_ids=(
|
||||
getattr(req, "stop_token_ids", None)
|
||||
if getattr(req, "stop_token_ids", None) is not None
|
||||
else cls.stop_token_ids
|
||||
),
|
||||
stop_seqs_len=(
|
||||
getattr(req, "stop_seqs_len", None)
|
||||
if getattr(req, "stop_seqs_len", None) is not None
|
||||
else cls.stop_seqs_len
|
||||
),
|
||||
max_tokens=max_tokens_val,
|
||||
reasoning_max_tokens=(
|
||||
getattr(req, "reasoning_max_tokens", None)
|
||||
if getattr(req, "reasoning_max_tokens", None) is not None
|
||||
else cls.reasoning_max_tokens
|
||||
),
|
||||
min_tokens=(
|
||||
getattr(req, "min_tokens", None) if getattr(req, "min_tokens", None) is not None else cls.min_tokens
|
||||
),
|
||||
logprobs=logprobs_val,
|
||||
prompt_logprobs=(
|
||||
getattr(req, "prompt_logprobs", None)
|
||||
if getattr(req, "prompt_logprobs", None) is not None
|
||||
else cls.prompt_logprobs
|
||||
),
|
||||
temp_scaled_logprobs=(
|
||||
getattr(req, "temp_scaled_logprobs", None)
|
||||
if getattr(req, "temp_scaled_logprobs", None) is not None
|
||||
else cls.temp_scaled_logprobs
|
||||
),
|
||||
top_p_normalized_logprobs=(
|
||||
getattr(req, "top_p_normalized_logprobs", None)
|
||||
if getattr(req, "top_p_normalized_logprobs", None) is not None
|
||||
else cls.top_p_normalized_logprobs
|
||||
),
|
||||
bad_words=(
|
||||
getattr(req, "bad_words", None) if getattr(req, "bad_words", None) is not None else cls.bad_words
|
||||
),
|
||||
guided_decoding=(
|
||||
getattr(req, "guided_decoding", None)
|
||||
if getattr(req, "guided_decoding", None) is not None
|
||||
else cls.guided_decoding
|
||||
),
|
||||
bad_words_token_ids=(
|
||||
getattr(req, "bad_words_token_ids", None)
|
||||
if getattr(req, "bad_words_token_ids", None) is not None
|
||||
else cls.bad_words_token_ids
|
||||
),
|
||||
logits_processors_args=(
|
||||
getattr(req, "logits_processors_args", None)
|
||||
if getattr(req, "logits_processors_args", None) is not None
|
||||
else cls.logits_processors_args
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_optional(
|
||||
cls,
|
||||
@@ -200,7 +297,7 @@ class SamplingParams:
|
||||
raise ValueError(f"max_tokens must be at least 1, got {self.max_tokens}.")
|
||||
|
||||
if self.reasoning_max_tokens is not None and self.reasoning_max_tokens > self.max_tokens:
|
||||
raise ValueError(f"reasoning_max_tokens must be less than max_tokens, got {self.reasoning_max_tokens}.")
|
||||
self.reasoning_max_tokens = self.max_tokens
|
||||
|
||||
if self.min_tokens < 0:
|
||||
raise ValueError(f"min_tokens must be greater than or equal to 0, " f"got {self.min_tokens}.")
|
||||
|
||||
Reference in New Issue
Block a user