Optimizing the performance of think length limit using custom operators (#4279)
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FD Image Build (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled

* delete impl

* delete min_length&max_length

* support limit thinking content strategy

* fix

* fix

* fix

* update

* fix set_value_by_flags_and_idx

* fix

* fix

* fix

* fix

* update

* fix

* fix

* fix typo

* fix ci

* fix

* fix

* support mtp

* fix

* fix

* update

* update
This commit is contained in:
Yuanle Liu
2025-10-20 21:09:13 +08:00
committed by GitHub
parent 36af88ff3f
commit cef3164c3b
31 changed files with 747 additions and 1032 deletions
+31 -19
View File
@@ -12,7 +12,7 @@
import importlib
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from enum import IntFlag, auto
from functools import lru_cache
from typing import Dict, List, Optional, Tuple, Type, Union
@@ -26,20 +26,15 @@ from fastdeploy.config import (
iter_architecture_defaults,
try_match_architecture_defaults,
)
from fastdeploy.model_executor.models.interfaces_base import (
determine_model_category,
get_default_pooling_type,
is_multimodal_model,
is_pooling_model,
is_text_generation_model,
)
from fastdeploy.model_executor.models.interfaces_base import get_default_pooling_type
class ModelCategory(Enum):
TEXT_GENERATION = "text_generation"
MULTIMODAL = "multimodal"
EMBEDDING = "embedding"
REWARD = "reward"
class ModelCategory(IntFlag):
TEXT_GENERATION = auto()
MULTIMODAL = auto()
EMBEDDING = auto()
REASONING = auto()
REWARD = auto()
@dataclass(frozen=True)
@@ -48,18 +43,22 @@ class ModelInfo:
category: ModelCategory
is_text_generation: bool
is_multimodal: bool
is_reasoning: bool
is_pooling: bool
module_path: str
default_pooling_type: str
@staticmethod
def from_model_cls(model_cls: Type[nn.Layer], module_path: str = "") -> "ModelInfo":
def from_model_cls(
model_cls: Type[nn.Layer], module_path: str = "", category: ModelCategory = None
) -> "ModelInfo":
return ModelInfo(
architecture=model_cls.__name__,
category=determine_model_category(model_cls.__name__),
is_text_generation=is_text_generation_model(model_cls),
is_multimodal=is_multimodal_model(model_cls.__name__),
is_pooling=is_pooling_model(model_cls),
category=category,
is_text_generation=ModelCategory.TEXT_GENERATION in category,
is_multimodal=ModelCategory.MULTIMODAL in category,
is_reasoning=ModelCategory.REASONING in category,
is_pooling=ModelCategory.EMBEDDING in category,
default_pooling_type=get_default_pooling_type(model_cls),
module_path=module_path,
)
@@ -84,6 +83,7 @@ class LazyRegisteredModel(BaseRegisteredModel):
module_name: str
module_path: str
class_name: str
category: ModelCategory
def load_model_cls(self) -> Type[nn.Layer]:
try:
@@ -95,7 +95,7 @@ class LazyRegisteredModel(BaseRegisteredModel):
def inspect_model_cls(self) -> ModelInfo:
model_cls = self.load_model_cls()
return ModelInfo.from_model_cls(model_cls, self.module_name)
return ModelInfo.from_model_cls(model_cls, self.module_name, self.category)
@lru_cache(maxsize=128)
@@ -127,6 +127,7 @@ class ModelRegistry:
module_name=model_info["module_name"],
module_path=model_info["module_path"],
class_name=model_info["class_name"],
category=model_info["category"],
)
self.models[arch] = model
self._registered_models[arch] = model
@@ -317,6 +318,17 @@ class ModelRegistry:
return model_info.is_multimodal
return False
def is_reasoning_model(self, architectures: Union[str, List[str]], model_config: ModelConfig = None) -> bool:
"""Check if it's a reasoning model"""
if isinstance(architectures, str):
architectures = [architectures]
for arch in architectures:
model_info = self._try_inspect_model_cls(arch)
if model_info is not None:
return model_info.is_reasoning
return False
def is_text_generation_model(self, architectures: Union[str, List[str]], model_config: ModelConfig = None) -> bool:
"""Check if it's a text generation model"""
if isinstance(architectures, str):