mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
fix typo (#7147)
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
|
||||
import asyncio
|
||||
|
||||
from fastdeploy.input.tokenzier_client import (
|
||||
from fastdeploy.input.tokenizer_client import (
|
||||
AsyncTokenizerClient,
|
||||
ImageDecodeRequest,
|
||||
ImageEncodeRequest,
|
||||
@@ -18,7 +18,7 @@ import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastdeploy.entrypoints.openai.usage_calculator import count_tokens
|
||||
from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||
from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||
|
||||
|
||||
class ChatResponseProcessor:
|
||||
|
||||
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
|
||||
OpenAiServingBase,
|
||||
ServingResponseContext,
|
||||
)
|
||||
from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||
from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
@@ -638,12 +638,12 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
image_features_output is not None
|
||||
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
||||
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
|
||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
||||
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||
|
||||
# add feature to encoder cache
|
||||
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
||||
feature_idx += mm_token_lenght
|
||||
feature_idx += mm_token_length
|
||||
thw_idx += 1
|
||||
|
||||
feature_start = feature_position.offset
|
||||
@@ -663,13 +663,13 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
merge_image_features, thw_idx = [], 0
|
||||
for feature_position in feature_position_item:
|
||||
grid_thw = grid_thw_lst[thw_idx]
|
||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
||||
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||
|
||||
feature_start = feature_position.offset
|
||||
feature_end = feature_position.offset + feature_position.length
|
||||
merge_image_features.append(mm_feature[feature_start:feature_end])
|
||||
feature_idx += mm_token_lenght
|
||||
feature_idx += mm_token_length
|
||||
thw_idx += 1
|
||||
image_features_list.append(paddle.concat(merge_image_features, axis=0))
|
||||
for idx, index in req_idx_img_index_map.items():
|
||||
|
||||
@@ -571,12 +571,12 @@ class MetaxModelRunner(ModelRunnerBase):
|
||||
image_features_output is not None
|
||||
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
||||
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
|
||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
||||
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||
|
||||
# add feature to encoder cache
|
||||
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
||||
feature_idx += mm_token_lenght
|
||||
feature_idx += mm_token_length
|
||||
thw_idx += 1
|
||||
|
||||
feature_start = feature_position.offset
|
||||
@@ -596,13 +596,13 @@ class MetaxModelRunner(ModelRunnerBase):
|
||||
merge_image_features, thw_idx = [], 0
|
||||
for feature_position in feature_position_item:
|
||||
grid_thw = grid_thw_lst[thw_idx]
|
||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
||||
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||
|
||||
feature_start = feature_position.offset
|
||||
feature_end = feature_position.offset + feature_position.length
|
||||
merge_image_features.append(mm_feature[feature_start:feature_end])
|
||||
feature_idx += mm_token_lenght
|
||||
feature_idx += mm_token_length
|
||||
thw_idx += 1
|
||||
image_features_list.append(paddle.concat(merge_image_features, axis=0))
|
||||
for idx, index in req_idx_img_index_map.items():
|
||||
|
||||
@@ -485,12 +485,12 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
image_features_output is not None
|
||||
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
||||
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
|
||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
||||
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||
|
||||
# add feature to encoder cache
|
||||
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
||||
feature_idx += mm_token_lenght
|
||||
feature_idx += mm_token_length
|
||||
thw_idx += 1
|
||||
|
||||
feature_start = feature_position.offset
|
||||
@@ -510,13 +510,13 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
image_features_output = self.extract_vision_features(multi_vision_inputs)
|
||||
for feature_position in multi_vision_inputs["feature_position_list"]:
|
||||
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
|
||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
||||
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||
|
||||
feature_start = feature_position.offset
|
||||
feature_end = feature_position.offset + feature_position.length
|
||||
merge_image_features.append(mm_feature[feature_start:feature_end])
|
||||
feature_idx += mm_token_lenght
|
||||
feature_idx += mm_token_length
|
||||
thw_idx += 1
|
||||
self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ import httpx
|
||||
import pytest
|
||||
import respx
|
||||
|
||||
from fastdeploy.input.tokenzier_client import (
|
||||
from fastdeploy.input.tokenizer_client import (
|
||||
AsyncTokenizerClient,
|
||||
ImageEncodeRequest,
|
||||
VideoEncodeRequest,
|
||||
|
||||
Reference in New Issue
Block a user