feat: add support for API usage with multimodal models (#4548)

* feat: add support for API usage with multimodal models * completion_tokens contains num_image_tokens * remove test_request.py * fix: paddle.device.is_compiled_with_cuda() * fix test_unstream_without_logprobs
2026-04-23 00:17:25 +08:00 · 2025-10-28 20:23:46 +08:00
parent e1ac90d787
commit 2a9ed72533
10 changed files with 256 additions and 21 deletions
@@ -32,13 +32,9 @@ def test_unstream_with_logprobs():
        "bytes": [231, 137, 155, 233, 161, 191],
        "top_logprobs": None,
    }
-    assert resp_json["usage"] == {
-        "prompt_tokens": 22,
-        "total_tokens": 25,
-        "completion_tokens": 3,
-        "prompt_tokens_details": {"cached_tokens": 0},
-        "completion_tokens_details": {"reasoning_tokens": 0},
-    }
+    assert resp_json["usage"]["prompt_tokens"] == 22
+    assert resp_json["usage"]["completion_tokens"] == 3
+    assert resp_json["usage"]["total_tokens"] == 25


 def test_unstream_without_logprobs():
@@ -65,13 +61,9 @@ def test_unstream_without_logprobs():
    # 校验返回内容与 logprobs 字段
    assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
    assert resp_json["choices"][0]["logprobs"] is None
-    assert resp_json["usage"] == {
-        "prompt_tokens": 22,
-        "total_tokens": 25,
-        "completion_tokens": 3,
-        "prompt_tokens_details": {"cached_tokens": 0},
-        "completion_tokens_details": {"reasoning_tokens": 0},
-    }
+    assert resp_json["usage"]["prompt_tokens"] == 22
+    assert resp_json["usage"]["completion_tokens"] == 3
+    assert resp_json["usage"]["total_tokens"] == 25


 def test_stream_with_logprobs():