Use triton qk_norm both in Prefill and Decode (#7213)

Co-authored-by: “liuruian” <liuruian@baidu.com>
This commit is contained in:
K11OntheBoat
2026-04-10 15:44:01 +08:00
committed by GitHub
parent 5c9fa43150
commit 870dbac370
2 changed files with 2 additions and 2 deletions
+1 -1
View File
@@ -173,7 +173,7 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
content1 = result1["choices"][0]["message"]["content"]
# base result
content2 = "视频中手机支架的颜色是黑色"
content2 = "视频中手机支架的颜色是黑色。"
# Verify that result is same as the base result
assert content1.startswith(content2), content1