This commit is contained in:
Nana
2026-04-07 16:30:32 +08:00
committed by GitHub
parent f422f835e8
commit 367d37b523
8 changed files with 22 additions and 22 deletions
@@ -16,7 +16,7 @@
import asyncio import asyncio
from fastdeploy.input.tokenzier_client import ( from fastdeploy.input.tokenizer_client import (
AsyncTokenizerClient, AsyncTokenizerClient,
ImageDecodeRequest, ImageDecodeRequest,
ImageEncodeRequest, ImageEncodeRequest,
@@ -18,7 +18,7 @@ import inspect
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from fastdeploy.entrypoints.openai.usage_calculator import count_tokens from fastdeploy.entrypoints.openai.usage_calculator import count_tokens
from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
class ChatResponseProcessor: class ChatResponseProcessor:
@@ -44,7 +44,7 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
OpenAiServingBase, OpenAiServingBase,
ServingResponseContext, ServingResponseContext,
) )
from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.utils import api_server_logger from fastdeploy.utils import api_server_logger
from fastdeploy.worker.output import LogprobsLists from fastdeploy.worker.output import LogprobsLists
+6 -6
View File
@@ -638,12 +638,12 @@ class GPUModelRunner(ModelRunnerBase):
image_features_output is not None image_features_output is not None
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}" ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx] grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
# add feature to encoder cache # add feature to encoder cache
self.encoder_cache[mm_hash] = mm_feature.detach().cpu() self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
feature_idx += mm_token_lenght feature_idx += mm_token_length
thw_idx += 1 thw_idx += 1
feature_start = feature_position.offset feature_start = feature_position.offset
@@ -663,13 +663,13 @@ class GPUModelRunner(ModelRunnerBase):
merge_image_features, thw_idx = [], 0 merge_image_features, thw_idx = [], 0
for feature_position in feature_position_item: for feature_position in feature_position_item:
grid_thw = grid_thw_lst[thw_idx] grid_thw = grid_thw_lst[thw_idx]
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
feature_start = feature_position.offset feature_start = feature_position.offset
feature_end = feature_position.offset + feature_position.length feature_end = feature_position.offset + feature_position.length
merge_image_features.append(mm_feature[feature_start:feature_end]) merge_image_features.append(mm_feature[feature_start:feature_end])
feature_idx += mm_token_lenght feature_idx += mm_token_length
thw_idx += 1 thw_idx += 1
image_features_list.append(paddle.concat(merge_image_features, axis=0)) image_features_list.append(paddle.concat(merge_image_features, axis=0))
for idx, index in req_idx_img_index_map.items(): for idx, index in req_idx_img_index_map.items():
+6 -6
View File
@@ -571,12 +571,12 @@ class MetaxModelRunner(ModelRunnerBase):
image_features_output is not None image_features_output is not None
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}" ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx] grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
# add feature to encoder cache # add feature to encoder cache
self.encoder_cache[mm_hash] = mm_feature.detach().cpu() self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
feature_idx += mm_token_lenght feature_idx += mm_token_length
thw_idx += 1 thw_idx += 1
feature_start = feature_position.offset feature_start = feature_position.offset
@@ -596,13 +596,13 @@ class MetaxModelRunner(ModelRunnerBase):
merge_image_features, thw_idx = [], 0 merge_image_features, thw_idx = [], 0
for feature_position in feature_position_item: for feature_position in feature_position_item:
grid_thw = grid_thw_lst[thw_idx] grid_thw = grid_thw_lst[thw_idx]
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
feature_start = feature_position.offset feature_start = feature_position.offset
feature_end = feature_position.offset + feature_position.length feature_end = feature_position.offset + feature_position.length
merge_image_features.append(mm_feature[feature_start:feature_end]) merge_image_features.append(mm_feature[feature_start:feature_end])
feature_idx += mm_token_lenght feature_idx += mm_token_length
thw_idx += 1 thw_idx += 1
image_features_list.append(paddle.concat(merge_image_features, axis=0)) image_features_list.append(paddle.concat(merge_image_features, axis=0))
for idx, index in req_idx_img_index_map.items(): for idx, index in req_idx_img_index_map.items():
+6 -6
View File
@@ -485,12 +485,12 @@ class XPUModelRunner(ModelRunnerBase):
image_features_output is not None image_features_output is not None
), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}" ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx] grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
# add feature to encoder cache # add feature to encoder cache
self.encoder_cache[mm_hash] = mm_feature.detach().cpu() self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
feature_idx += mm_token_lenght feature_idx += mm_token_length
thw_idx += 1 thw_idx += 1
feature_start = feature_position.offset feature_start = feature_position.offset
@@ -510,13 +510,13 @@ class XPUModelRunner(ModelRunnerBase):
image_features_output = self.extract_vision_features(multi_vision_inputs) image_features_output = self.extract_vision_features(multi_vision_inputs)
for feature_position in multi_vision_inputs["feature_position_list"]: for feature_position in multi_vision_inputs["feature_position_list"]:
grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx] grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
feature_start = feature_position.offset feature_start = feature_position.offset
feature_end = feature_position.offset + feature_position.length feature_end = feature_position.offset + feature_position.length
merge_image_features.append(mm_feature[feature_start:feature_end]) merge_image_features.append(mm_feature[feature_start:feature_end])
feature_idx += mm_token_lenght feature_idx += mm_token_length
thw_idx += 1 thw_idx += 1
self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0) self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0)
+1 -1
View File
@@ -18,7 +18,7 @@ import httpx
import pytest import pytest
import respx import respx
from fastdeploy.input.tokenzier_client import ( from fastdeploy.input.tokenizer_client import (
AsyncTokenizerClient, AsyncTokenizerClient,
ImageEncodeRequest, ImageEncodeRequest,
VideoEncodeRequest, VideoEncodeRequest,