mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
Optimizing the performance of think length limit using custom operators (#4279)
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FD Image Build (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FD Image Build (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
* delete impl * delete min_length&max_length * support limit thinking content strategy * fix * fix * fix * update * fix set_value_by_flags_and_idx * fix * fix * fix * fix * update * fix * fix * fix typo * fix ci * fix * fix * support mtp * fix * fix * update * update
This commit is contained in:
@@ -12,7 +12,7 @@
|
||||
import importlib
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from enum import IntFlag, auto
|
||||
from functools import lru_cache
|
||||
from typing import Dict, List, Optional, Tuple, Type, Union
|
||||
|
||||
@@ -26,20 +26,15 @@ from fastdeploy.config import (
|
||||
iter_architecture_defaults,
|
||||
try_match_architecture_defaults,
|
||||
)
|
||||
from fastdeploy.model_executor.models.interfaces_base import (
|
||||
determine_model_category,
|
||||
get_default_pooling_type,
|
||||
is_multimodal_model,
|
||||
is_pooling_model,
|
||||
is_text_generation_model,
|
||||
)
|
||||
from fastdeploy.model_executor.models.interfaces_base import get_default_pooling_type
|
||||
|
||||
|
||||
class ModelCategory(Enum):
|
||||
TEXT_GENERATION = "text_generation"
|
||||
MULTIMODAL = "multimodal"
|
||||
EMBEDDING = "embedding"
|
||||
REWARD = "reward"
|
||||
class ModelCategory(IntFlag):
|
||||
TEXT_GENERATION = auto()
|
||||
MULTIMODAL = auto()
|
||||
EMBEDDING = auto()
|
||||
REASONING = auto()
|
||||
REWARD = auto()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -48,18 +43,22 @@ class ModelInfo:
|
||||
category: ModelCategory
|
||||
is_text_generation: bool
|
||||
is_multimodal: bool
|
||||
is_reasoning: bool
|
||||
is_pooling: bool
|
||||
module_path: str
|
||||
default_pooling_type: str
|
||||
|
||||
@staticmethod
|
||||
def from_model_cls(model_cls: Type[nn.Layer], module_path: str = "") -> "ModelInfo":
|
||||
def from_model_cls(
|
||||
model_cls: Type[nn.Layer], module_path: str = "", category: ModelCategory = None
|
||||
) -> "ModelInfo":
|
||||
return ModelInfo(
|
||||
architecture=model_cls.__name__,
|
||||
category=determine_model_category(model_cls.__name__),
|
||||
is_text_generation=is_text_generation_model(model_cls),
|
||||
is_multimodal=is_multimodal_model(model_cls.__name__),
|
||||
is_pooling=is_pooling_model(model_cls),
|
||||
category=category,
|
||||
is_text_generation=ModelCategory.TEXT_GENERATION in category,
|
||||
is_multimodal=ModelCategory.MULTIMODAL in category,
|
||||
is_reasoning=ModelCategory.REASONING in category,
|
||||
is_pooling=ModelCategory.EMBEDDING in category,
|
||||
default_pooling_type=get_default_pooling_type(model_cls),
|
||||
module_path=module_path,
|
||||
)
|
||||
@@ -84,6 +83,7 @@ class LazyRegisteredModel(BaseRegisteredModel):
|
||||
module_name: str
|
||||
module_path: str
|
||||
class_name: str
|
||||
category: ModelCategory
|
||||
|
||||
def load_model_cls(self) -> Type[nn.Layer]:
|
||||
try:
|
||||
@@ -95,7 +95,7 @@ class LazyRegisteredModel(BaseRegisteredModel):
|
||||
|
||||
def inspect_model_cls(self) -> ModelInfo:
|
||||
model_cls = self.load_model_cls()
|
||||
return ModelInfo.from_model_cls(model_cls, self.module_name)
|
||||
return ModelInfo.from_model_cls(model_cls, self.module_name, self.category)
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
@@ -127,6 +127,7 @@ class ModelRegistry:
|
||||
module_name=model_info["module_name"],
|
||||
module_path=model_info["module_path"],
|
||||
class_name=model_info["class_name"],
|
||||
category=model_info["category"],
|
||||
)
|
||||
self.models[arch] = model
|
||||
self._registered_models[arch] = model
|
||||
@@ -317,6 +318,17 @@ class ModelRegistry:
|
||||
return model_info.is_multimodal
|
||||
return False
|
||||
|
||||
def is_reasoning_model(self, architectures: Union[str, List[str]], model_config: ModelConfig = None) -> bool:
|
||||
"""Check if it's a reasoning model"""
|
||||
if isinstance(architectures, str):
|
||||
architectures = [architectures]
|
||||
|
||||
for arch in architectures:
|
||||
model_info = self._try_inspect_model_cls(arch)
|
||||
if model_info is not None:
|
||||
return model_info.is_reasoning
|
||||
return False
|
||||
|
||||
def is_text_generation_model(self, architectures: Union[str, List[str]], model_config: ModelConfig = None) -> bool:
|
||||
"""Check if it's a text generation model"""
|
||||
if isinstance(architectures, str):
|
||||
|
||||
Reference in New Issue
Block a user