fix tokenizer oom (#6324)
CE Compile Job / ce_job_pre_check (push) Waiting to run
CE Compile Job / print_ce_job_pre_check_outputs (push) Blocked by required conditions
CE Compile Job / FD-Clone-Linux (push) Blocked by required conditions
CE Compile Job / Show Code Archive Output (push) Blocked by required conditions
CE Compile Job / BUILD_SM8090 (push) Blocked by required conditions
CE Compile Job / BUILD_SM8689 (push) Blocked by required conditions
CE Compile Job / CE_UPLOAD (push) Blocked by required conditions

This commit is contained in:
ApplEOFDiscord
2026-02-03 20:13:43 +08:00
committed by GitHub
parent 696c4fa39a
commit 61fbcbf9f0
2 changed files with 3 additions and 2 deletions
@@ -339,7 +339,8 @@ class DataProcessor:
def _add_text(self, tokens, outputs: Dict) -> None:
if isinstance(tokens, str):
tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"]
tokens = self.tokenizer.tokenize(tokens)
tokens = self.tokenizer.convert_tokens_to_ids(tokens)
outputs["input_ids"].extend(tokens)
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens))
+1 -1
View File
@@ -1,4 +1,4 @@
import unittest
import unittest
from unittest.mock import MagicMock, patch
from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor