[Feature] Support include_stop_str_in_output in completion api (#3096)

* [Feature] Support include_stop_str_in_output in completion api * Fix ci test --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2026-04-24 01:29:57 +08:00 · 2025-07-30 22:18:48 +08:00
parent 7dfdd157ac
commit 0616c208d2
4 changed files with 56 additions and 17 deletions
@@ -357,7 +357,7 @@ def test_non_streaming_with_stop_str(openai_client):
        messages=[{"role": "user", "content": "Hello, how are you?"}],
        temperature=1,
        max_tokens=5,
-        metadata={"include_stop_str_in_output": True},
+        extra_body={"include_stop_str_in_output": True},
        stream=False,
    )
    # Assertions to check the response structure
@@ -370,7 +370,7 @@ def test_non_streaming_with_stop_str(openai_client):
        messages=[{"role": "user", "content": "Hello, how are you?"}],
        temperature=1,
        max_tokens=5,
-        metadata={"include_stop_str_in_output": False},
+        extra_body={"include_stop_str_in_output": False},
        stream=False,
    )
    # Assertions to check the response structure
@@ -378,6 +378,25 @@ def test_non_streaming_with_stop_str(openai_client):
    assert len(response.choices) > 0
    assert not response.choices[0].message.content.endswith("</s>")

+    response = openai_client.completions.create(
+        model="default",
+        prompt="Hello, how are you?",
+        temperature=1,
+        max_tokens=1024,
+        stream=False,
+    )
+    assert not response.choices[0].text.endswith("</s>")
+
+    response = openai_client.completions.create(
+        model="default",
+        prompt="Hello, how are you?",
+        temperature=1,
+        max_tokens=1024,
+        extra_body={"include_stop_str_in_output": True},
+        stream=False,
+    )
+    assert response.choices[0].text.endswith("</s>")
+

 def test_streaming_with_stop_str(openai_client):
    """
@@ -388,7 +407,7 @@ def test_streaming_with_stop_str(openai_client):
        messages=[{"role": "user", "content": "Hello, how are you?"}],
        temperature=1,
        max_tokens=5,
-        metadata={"include_stop_str_in_output": True},
+        extra_body={"include_stop_str_in_output": True},
        stream=True,
    )
    # Assertions to check the response structure
@@ -402,7 +421,7 @@ def test_streaming_with_stop_str(openai_client):
        messages=[{"role": "user", "content": "Hello, how are you?"}],
        temperature=1,
        max_tokens=5,
-        metadata={"include_stop_str_in_output": False},
+        extra_body={"include_stop_str_in_output": False},
        stream=True,
    )
    # Assertions to check the response structure
@@ -411,6 +430,29 @@ def test_streaming_with_stop_str(openai_client):
        last_token = chunk.choices[0].delta.content
    assert last_token != "</s>"

+    response_1 = openai_client.completions.create(
+        model="default",
+        prompt="Hello, how are you?",
+        max_tokens=10,
+        stream=True,
+    )
+    last_token = ""
+    for chunk in response_1:
+        last_token = chunk.choices[0].text
+    assert not last_token.endswith("</s>")
+
+    response_1 = openai_client.completions.create(
+        model="default",
+        prompt="Hello, how are you?",
+        max_tokens=10,
+        extra_body={"include_stop_str_in_output": True},
+        stream=True,
+    )
+    last_token = ""
+    for chunk in response_1:
+        last_token = chunk.choices[0].text
+    assert last_token.endswith("</s>")
+

 def test_non_streaming_chat_with_return_token_ids(openai_client, capsys):
    """