mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
Use triton qk_norm both in Prefill and Decode (#7213)
Co-authored-by: “liuruian” <liuruian@baidu.com>
This commit is contained in:
@@ -173,7 +173,7 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
|
||||
content1 = result1["choices"][0]["message"]["content"]
|
||||
|
||||
# base result
|
||||
content2 = "视频中手机支架的颜色是黑色的。"
|
||||
content2 = "视频中手机支架的颜色是黑色。"
|
||||
|
||||
# Verify that result is same as the base result
|
||||
assert content1.startswith(content2), content1
|
||||
|
||||
Reference in New Issue
Block a user