[Feature] Support include_stop_str_in_output in completion api (#3096)

* [Feature] Support include_stop_str_in_output in completion api

* Fix ci test

---------

Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
Jiang-Jia-Jun
2025-07-30 22:18:48 +08:00
committed by GitHub
parent 7dfdd157ac
commit 0616c208d2
4 changed files with 56 additions and 17 deletions
+46 -4
View File
@@ -357,7 +357,7 @@ def test_non_streaming_with_stop_str(openai_client):
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=1,
max_tokens=5,
metadata={"include_stop_str_in_output": True},
extra_body={"include_stop_str_in_output": True},
stream=False,
)
# Assertions to check the response structure
@@ -370,7 +370,7 @@ def test_non_streaming_with_stop_str(openai_client):
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=1,
max_tokens=5,
metadata={"include_stop_str_in_output": False},
extra_body={"include_stop_str_in_output": False},
stream=False,
)
# Assertions to check the response structure
@@ -378,6 +378,25 @@ def test_non_streaming_with_stop_str(openai_client):
assert len(response.choices) > 0
assert not response.choices[0].message.content.endswith("</s>")
response = openai_client.completions.create(
model="default",
prompt="Hello, how are you?",
temperature=1,
max_tokens=1024,
stream=False,
)
assert not response.choices[0].text.endswith("</s>")
response = openai_client.completions.create(
model="default",
prompt="Hello, how are you?",
temperature=1,
max_tokens=1024,
extra_body={"include_stop_str_in_output": True},
stream=False,
)
assert response.choices[0].text.endswith("</s>")
def test_streaming_with_stop_str(openai_client):
"""
@@ -388,7 +407,7 @@ def test_streaming_with_stop_str(openai_client):
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=1,
max_tokens=5,
metadata={"include_stop_str_in_output": True},
extra_body={"include_stop_str_in_output": True},
stream=True,
)
# Assertions to check the response structure
@@ -402,7 +421,7 @@ def test_streaming_with_stop_str(openai_client):
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=1,
max_tokens=5,
metadata={"include_stop_str_in_output": False},
extra_body={"include_stop_str_in_output": False},
stream=True,
)
# Assertions to check the response structure
@@ -411,6 +430,29 @@ def test_streaming_with_stop_str(openai_client):
last_token = chunk.choices[0].delta.content
assert last_token != "</s>"
response_1 = openai_client.completions.create(
model="default",
prompt="Hello, how are you?",
max_tokens=10,
stream=True,
)
last_token = ""
for chunk in response_1:
last_token = chunk.choices[0].text
assert not last_token.endswith("</s>")
response_1 = openai_client.completions.create(
model="default",
prompt="Hello, how are you?",
max_tokens=10,
extra_body={"include_stop_str_in_output": True},
stream=True,
)
last_token = ""
for chunk in response_1:
last_token = chunk.choices[0].text
assert last_token.endswith("</s>")
def test_non_streaming_chat_with_return_token_ids(openai_client, capsys):
"""