[Feature] support pooling model dummy_run (#4345)

* support qwen3-embedding

* fix ci bug

* support pooling dummy_run

* fix

* delete print

* parallel_config.max_model_len

* delete is_pooling_model in dummy_run

* fix

* fd_model

* fix embedding load

* fix

* fix post_process
This commit is contained in:
lizexu123
2025-10-17 13:30:55 +08:00
committed by GitHub
parent 15b6b8dc25
commit c234b995ab
10 changed files with 291 additions and 126 deletions
@@ -12,9 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Type
from typing import ClassVar, Literal, Protocol, Type
import paddle
from paddle import nn
from typing_extensions import TypeVar, runtime_checkable
from fastdeploy.config import FDConfig
from fastdeploy.model_executor.forward_meta import ForwardMeta
from fastdeploy.model_executor.layers.pooler import Pooler
T = TypeVar("T", default=paddle.Tensor)
T_co = TypeVar("T_co", default=paddle.Tensor, covariant=True)
def is_text_generation_model(model_cls: Type[nn.Layer]) -> bool:
@@ -24,13 +33,7 @@ def is_text_generation_model(model_cls: Type[nn.Layer]) -> bool:
def is_pooling_model(model_cls: Type[nn.Layer]) -> bool:
class_name = model_cls.__name__
pooling_indicators = ["Embedding", "ForSequenceClassification"]
return (
any(indicator in class_name for indicator in pooling_indicators)
or hasattr(model_cls, "is_embedding_model")
and model_cls.is_embedding_model
)
return getattr(model_cls, "is_pooling_model", False)
def is_multimodal_model(class_name: str) -> bool:
@@ -52,3 +55,48 @@ def get_default_pooling_type(model_cls: Type[nn.Layer] = None) -> str:
if model_cls is not None:
return getattr(model_cls, "default_pooling_type", "LAST")
return "LAST"
@runtime_checkable
class FdModel(Protocol[T_co]):
"""The interface required for all models in FastDeploy."""
def __init__(
self,
fd_config: FDConfig,
prefix: str = "",
) -> None:
pass
def forward(
self,
ids_remove_padding: paddle.Tensor,
forward_metadata: ForwardMeta,
) -> T_co:
pass
class FdModelForPooling(FdModel[T_co], Protocol[T_co]):
"""The interface required for all pooling models in FastDeploy."""
is_pooling_model: ClassVar[Literal[True]] = True
"""
A flag that indicates this model supports pooling.
Note:
There is no need to redefine this flag if this class is in the
MRO of your model class.
"""
default_pooling_type: ClassVar[str] = "LAST"
"""
Indicates the
[fastdeploy.config.PoolerConfig.pooling_type][]
to use by default.
You can use the
[fastdeploy.model_executor.models.interfaces_base.default_pooling_type][]
decorator to conveniently set this field.
"""
pooler: Pooler
"""The pooler is only called on TP rank 0."""