[Optimization]update prompt & prompt_token_ids (#6334)

* fix prompt

* add unit test

* add unit test

* fix
This commit is contained in:
luukunn
2026-02-04 20:08:01 +08:00
committed by GitHub
parent bf78a48eb3
commit 765df94e6c
4 changed files with 28 additions and 31 deletions
+6 -14
View File
@@ -107,21 +107,13 @@ class Ernie4_5Processor(BaseDataProcessor):
# processing prompt_token_ids
if request.prompt_token_ids is None or len(request.prompt_token_ids) == 0:
if request.prompt is not None:
# prompt = request.prompt if request.prompt is not None else request.messages[0]
prompt = request.prompt
assert isinstance(prompt, str) or (
isinstance(prompt, list) and all([isinstance(t, int) for t in prompt])
), f"prompt must be a string or a list of integers, but got {type(prompt)}"
if isinstance(prompt, list): # if prompt is a token id list
request.prompt_token_ids = prompt
else:
tokens = self.tokenizer.tokenize(prompt)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
request.prompt_token_ids = token_ids
data_processor_logger.debug(
f"request_ids: {request.request_id}, prompt: {prompt}, tokens: {tokens}, token_ids: {token_ids}"
)
tokens = self.tokenizer.tokenize(prompt)
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
request.prompt_token_ids = token_ids
data_processor_logger.debug(
f"request_ids: {request.request_id}, prompt: {prompt}, tokens: {tokens}, token_ids: {token_ids}"
)
elif request.messages is not None:
task = request.to_dict()
chat_template_kwargs = kwargs.get("chat_template_kwargs", {})