From 61fbcbf9f01f064cc1013af50b544003679b7bd1 Mon Sep 17 00:00:00 2001 From: ApplEOFDiscord <31272106+ApplEOFDiscord@users.noreply.github.com> Date: Tue, 3 Feb 2026 20:13:43 +0800 Subject: [PATCH] fix tokenizer oom (#6324) --- fastdeploy/input/ernie4_5_vl_processor/process.py | 3 ++- tests/input/test_ernie_vl_processor.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fastdeploy/input/ernie4_5_vl_processor/process.py b/fastdeploy/input/ernie4_5_vl_processor/process.py index 4ccdf287f2..f430e56b77 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/process.py +++ b/fastdeploy/input/ernie4_5_vl_processor/process.py @@ -339,7 +339,8 @@ class DataProcessor: def _add_text(self, tokens, outputs: Dict) -> None: if isinstance(tokens, str): - tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"] + tokens = self.tokenizer.tokenize(tokens) + tokens = self.tokenizer.convert_tokens_to_ids(tokens) outputs["input_ids"].extend(tokens) outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens)) diff --git a/tests/input/test_ernie_vl_processor.py b/tests/input/test_ernie_vl_processor.py index 92d24d5b96..f95f07a1b8 100644 --- a/tests/input/test_ernie_vl_processor.py +++ b/tests/input/test_ernie_vl_processor.py @@ -1,4 +1,4 @@ -import unittest +import unittest from unittest.mock import MagicMock, patch from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor