mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
fix tokenizer oom (#6324)
CE Compile Job / ce_job_pre_check (push) Waiting to run
CE Compile Job / print_ce_job_pre_check_outputs (push) Blocked by required conditions
CE Compile Job / FD-Clone-Linux (push) Blocked by required conditions
CE Compile Job / Show Code Archive Output (push) Blocked by required conditions
CE Compile Job / BUILD_SM8090 (push) Blocked by required conditions
CE Compile Job / BUILD_SM8689 (push) Blocked by required conditions
CE Compile Job / CE_UPLOAD (push) Blocked by required conditions
CE Compile Job / ce_job_pre_check (push) Waiting to run
CE Compile Job / print_ce_job_pre_check_outputs (push) Blocked by required conditions
CE Compile Job / FD-Clone-Linux (push) Blocked by required conditions
CE Compile Job / Show Code Archive Output (push) Blocked by required conditions
CE Compile Job / BUILD_SM8090 (push) Blocked by required conditions
CE Compile Job / BUILD_SM8689 (push) Blocked by required conditions
CE Compile Job / CE_UPLOAD (push) Blocked by required conditions
This commit is contained in:
@@ -339,7 +339,8 @@ class DataProcessor:
|
||||
|
||||
def _add_text(self, tokens, outputs: Dict) -> None:
|
||||
if isinstance(tokens, str):
|
||||
tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"]
|
||||
tokens = self.tokenizer.tokenize(tokens)
|
||||
tokens = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||
outputs["input_ids"].extend(tokens)
|
||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens))
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import unittest
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor
|
||||
|
||||
Reference in New Issue
Block a user