mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[XPU] Support W4A8C8-TP4-300B Model (#4068)
* support w4a8 * delete ep block attn * delete moe_topk_select * update note * update * delte useless info * update * add some note * fix some format * update scale info * add ans baseline --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
@@ -13,6 +13,7 @@ from core import TEMPLATE, URL, build_request_payload, send_request
|
||||
|
||||
COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
|
||||
|
||||
|
||||
def test_completion_total_tokens():
|
||||
data = {
|
||||
"prompt": "你是谁",
|
||||
@@ -48,7 +49,7 @@ def test_completion_echo_stream_one_prompt_rti():
|
||||
"max_tokens": 2,
|
||||
"return_token_ids": True,
|
||||
}
|
||||
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(COMPLETIONS_URL, payload, stream=True)
|
||||
last_data = None
|
||||
@@ -60,7 +61,7 @@ def test_completion_echo_stream_one_prompt_rti():
|
||||
break
|
||||
if line.strip() == "" or not line.startswith("data: "):
|
||||
continue
|
||||
line = line[len("data: "):]
|
||||
line = line[len("data: ") :]
|
||||
stream_data = json.loads(line)
|
||||
counter += 1
|
||||
if counter == 2: # 当计数器为2时,保存第二包数据
|
||||
@@ -81,9 +82,9 @@ def test_completion_echo_stream_one_prompt():
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
|
||||
"echo": True,
|
||||
"max_tokens": 2
|
||||
"max_tokens": 2,
|
||||
}
|
||||
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(COMPLETIONS_URL, payload, stream=True)
|
||||
last_data = None
|
||||
@@ -95,7 +96,7 @@ def test_completion_echo_stream_one_prompt():
|
||||
break
|
||||
if line.strip() == "" or not line.startswith("data: "):
|
||||
continue
|
||||
line = line[len("data: "):]
|
||||
line = line[len("data: ") :]
|
||||
stream_data = json.loads(line)
|
||||
counter += 1
|
||||
if counter == 1: # 当计数器为1时,保存第一包数据
|
||||
@@ -112,14 +113,14 @@ def test_completion_echo_stream_more_prompt():
|
||||
测试echo参数在流式回复中,且设置为回复多个prompt
|
||||
"""
|
||||
data = {
|
||||
"prompt": ["水果的营养价值是如何的?","水的化学式是什么?"],
|
||||
"prompt": ["水果的营养价值是如何的?", "水的化学式是什么?"],
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
|
||||
"echo": True,
|
||||
"max_tokens": 2,
|
||||
"return_token_ids": True
|
||||
"return_token_ids": True,
|
||||
}
|
||||
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
resp = send_request(COMPLETIONS_URL, payload, stream=True)
|
||||
last_data = None
|
||||
@@ -136,9 +137,9 @@ def test_completion_echo_stream_more_prompt():
|
||||
break
|
||||
if line.strip() == "" or not line.startswith("data: "):
|
||||
continue
|
||||
line = line[len("data: "):]
|
||||
line = line[len("data: ") :]
|
||||
stream_data = json.loads(line)
|
||||
|
||||
|
||||
for choice in stream_data.get("choices", []):
|
||||
index = choice.get("index")
|
||||
if index in packet_count_by_index:
|
||||
@@ -183,13 +184,13 @@ def test_completion_echo_more_prompt():
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
"prompt": ["水果的营养价值是如何的?","水的化学式是什么?"],
|
||||
"prompt": ["水果的营养价值是如何的?", "水的化学式是什么?"],
|
||||
"echo": True,
|
||||
"max_tokens": 100
|
||||
"max_tokens": 100,
|
||||
}
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(COMPLETIONS_URL, payload).json()
|
||||
|
||||
|
||||
text_0 = response["choices"][0]["text"]
|
||||
text_1 = response["choices"][1]["text"]
|
||||
assert data["prompt"][0] in text_0, "echo回显不正确"
|
||||
@@ -204,12 +205,8 @@ def test_completion_finish_length():
|
||||
"""
|
||||
非流式回复中,因达到max_token截断检查finish_reasoning参数
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
"prompt": "水果的营养价值是如何的?",
|
||||
"max_tokens": 10
|
||||
}
|
||||
|
||||
data = {"stream": False, "prompt": "水果的营养价值是如何的?", "max_tokens": 10}
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(COMPLETIONS_URL, payload).json()
|
||||
|
||||
@@ -221,15 +218,10 @@ def test_completion_finish_stop():
|
||||
"""
|
||||
非流式回复中,模型自然回复完成,检查finish_reasoning参数
|
||||
"""
|
||||
data = {
|
||||
"stream": False,
|
||||
"prompt": "简短的回答我:苹果是水果吗?"
|
||||
}
|
||||
|
||||
data = {"stream": False, "prompt": "简短的回答我:苹果是水果吗?"}
|
||||
|
||||
payload = build_request_payload(TEMPLATE, data)
|
||||
response = send_request(COMPLETIONS_URL, payload).json()
|
||||
|
||||
finish_reason = response["choices"][0]["finish_reason"]
|
||||
assert finish_reason == "stop", "无任何中介,finish_reason不为stop"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user