mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Bug Fix] fix tokenizer oom (#6287)
* fix tokenizer oom * fix unit test
This commit is contained in:
@@ -480,7 +480,8 @@ class DataProcessor(MMBaseDataProcessor):
|
||||
|
||||
def _add_text(self, tokens, outputs: Dict) -> None:
|
||||
if isinstance(tokens, str):
|
||||
tokens = self.tokenizer.encode(tokens, add_special_tokens=False)["input_ids"]
|
||||
tokens = self.tokenizer.tokenize(tokens)
|
||||
tokens = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||
outputs["input_ids"].extend(tokens)
|
||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]] * len(tokens))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user