mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
fix typo (#7147)
This commit is contained in:
@@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from fastdeploy.input.tokenzier_client import (
|
from fastdeploy.input.tokenizer_client import (
|
||||||
AsyncTokenizerClient,
|
AsyncTokenizerClient,
|
||||||
ImageDecodeRequest,
|
ImageDecodeRequest,
|
||||||
ImageEncodeRequest,
|
ImageEncodeRequest,
|
||||||
@@ -18,7 +18,7 @@ import inspect
|
|||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from fastdeploy.entrypoints.openai.usage_calculator import count_tokens
|
from fastdeploy.entrypoints.openai.usage_calculator import count_tokens
|
||||||
from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
|
from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||||
|
|
||||||
|
|
||||||
class ChatResponseProcessor:
|
class ChatResponseProcessor:
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
|
|||||||
OpenAiServingBase,
|
OpenAiServingBase,
|
||||||
ServingResponseContext,
|
ServingResponseContext,
|
||||||
)
|
)
|
||||||
from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
|
from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||||
from fastdeploy.metrics.metrics import main_process_metrics
|
from fastdeploy.metrics.metrics import main_process_metrics
|
||||||
from fastdeploy.utils import api_server_logger
|
from fastdeploy.utils import api_server_logger
|
||||||
from fastdeploy.worker.output import LogprobsLists
|
from fastdeploy.worker.output import LogprobsLists
|
||||||
|
|||||||
@@ -638,12 +638,12 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
image_features_output is not None
|
image_features_output is not None
|
||||||
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
||||||
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
|
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
|
||||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||||
|
|
||||||
# add feature to encoder cache
|
# add feature to encoder cache
|
||||||
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
||||||
feature_idx += mm_token_lenght
|
feature_idx += mm_token_length
|
||||||
thw_idx += 1
|
thw_idx += 1
|
||||||
|
|
||||||
feature_start = feature_position.offset
|
feature_start = feature_position.offset
|
||||||
@@ -663,13 +663,13 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
merge_image_features, thw_idx = [], 0
|
merge_image_features, thw_idx = [], 0
|
||||||
for feature_position in feature_position_item:
|
for feature_position in feature_position_item:
|
||||||
grid_thw = grid_thw_lst[thw_idx]
|
grid_thw = grid_thw_lst[thw_idx]
|
||||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||||
|
|
||||||
feature_start = feature_position.offset
|
feature_start = feature_position.offset
|
||||||
feature_end = feature_position.offset + feature_position.length
|
feature_end = feature_position.offset + feature_position.length
|
||||||
merge_image_features.append(mm_feature[feature_start:feature_end])
|
merge_image_features.append(mm_feature[feature_start:feature_end])
|
||||||
feature_idx += mm_token_lenght
|
feature_idx += mm_token_length
|
||||||
thw_idx += 1
|
thw_idx += 1
|
||||||
image_features_list.append(paddle.concat(merge_image_features, axis=0))
|
image_features_list.append(paddle.concat(merge_image_features, axis=0))
|
||||||
for idx, index in req_idx_img_index_map.items():
|
for idx, index in req_idx_img_index_map.items():
|
||||||
|
|||||||
@@ -571,12 +571,12 @@ class MetaxModelRunner(ModelRunnerBase):
|
|||||||
image_features_output is not None
|
image_features_output is not None
|
||||||
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
||||||
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
|
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
|
||||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||||
|
|
||||||
# add feature to encoder cache
|
# add feature to encoder cache
|
||||||
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
||||||
feature_idx += mm_token_lenght
|
feature_idx += mm_token_length
|
||||||
thw_idx += 1
|
thw_idx += 1
|
||||||
|
|
||||||
feature_start = feature_position.offset
|
feature_start = feature_position.offset
|
||||||
@@ -596,13 +596,13 @@ class MetaxModelRunner(ModelRunnerBase):
|
|||||||
merge_image_features, thw_idx = [], 0
|
merge_image_features, thw_idx = [], 0
|
||||||
for feature_position in feature_position_item:
|
for feature_position in feature_position_item:
|
||||||
grid_thw = grid_thw_lst[thw_idx]
|
grid_thw = grid_thw_lst[thw_idx]
|
||||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||||
|
|
||||||
feature_start = feature_position.offset
|
feature_start = feature_position.offset
|
||||||
feature_end = feature_position.offset + feature_position.length
|
feature_end = feature_position.offset + feature_position.length
|
||||||
merge_image_features.append(mm_feature[feature_start:feature_end])
|
merge_image_features.append(mm_feature[feature_start:feature_end])
|
||||||
feature_idx += mm_token_lenght
|
feature_idx += mm_token_length
|
||||||
thw_idx += 1
|
thw_idx += 1
|
||||||
image_features_list.append(paddle.concat(merge_image_features, axis=0))
|
image_features_list.append(paddle.concat(merge_image_features, axis=0))
|
||||||
for idx, index in req_idx_img_index_map.items():
|
for idx, index in req_idx_img_index_map.items():
|
||||||
|
|||||||
@@ -485,12 +485,12 @@ class XPUModelRunner(ModelRunnerBase):
|
|||||||
image_features_output is not None
|
image_features_output is not None
|
||||||
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
|
||||||
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
|
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
|
||||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||||
|
|
||||||
# add feature to encoder cache
|
# add feature to encoder cache
|
||||||
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
|
||||||
feature_idx += mm_token_lenght
|
feature_idx += mm_token_length
|
||||||
thw_idx += 1
|
thw_idx += 1
|
||||||
|
|
||||||
feature_start = feature_position.offset
|
feature_start = feature_position.offset
|
||||||
@@ -510,13 +510,13 @@ class XPUModelRunner(ModelRunnerBase):
|
|||||||
image_features_output = self.extract_vision_features(multi_vision_inputs)
|
image_features_output = self.extract_vision_features(multi_vision_inputs)
|
||||||
for feature_position in multi_vision_inputs["feature_position_list"]:
|
for feature_position in multi_vision_inputs["feature_position_list"]:
|
||||||
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
|
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
|
||||||
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
|
||||||
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
|
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
|
||||||
|
|
||||||
feature_start = feature_position.offset
|
feature_start = feature_position.offset
|
||||||
feature_end = feature_position.offset + feature_position.length
|
feature_end = feature_position.offset + feature_position.length
|
||||||
merge_image_features.append(mm_feature[feature_start:feature_end])
|
merge_image_features.append(mm_feature[feature_start:feature_end])
|
||||||
feature_idx += mm_token_lenght
|
feature_idx += mm_token_length
|
||||||
thw_idx += 1
|
thw_idx += 1
|
||||||
self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0)
|
self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0)
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import httpx
|
|||||||
import pytest
|
import pytest
|
||||||
import respx
|
import respx
|
||||||
|
|
||||||
from fastdeploy.input.tokenzier_client import (
|
from fastdeploy.input.tokenizer_client import (
|
||||||
AsyncTokenizerClient,
|
AsyncTokenizerClient,
|
||||||
ImageEncodeRequest,
|
ImageEncodeRequest,
|
||||||
VideoEncodeRequest,
|
VideoEncodeRequest,
|
||||||
|
|||||||
Reference in New Issue
Block a user