mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] use phi permute/unpermute & rm swiglu (#6361)
* tp文字输出正常 * B eb5 mini文字输出正常 * eb5mini ep B卡 文字输出正常 * default use phi moe op * stash * tp H卡正常 * ep ok * rm debug * rm debug tool * rm del ffn_out * rm swiglu * add envs to swiglu * merge dev * fix ci baseline Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix ci baseline 2 --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -25,10 +25,10 @@ def test_unstream_with_logprobs():
|
||||
# 校验返回内容与概率信息
|
||||
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.03113006055355072
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
|
||||
assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
|
||||
"token": "牛顿",
|
||||
"logprob": -0.03113006055355072,
|
||||
"logprob": -0.031025361269712448,
|
||||
"bytes": [231, 137, 155, 233, 161, 191],
|
||||
"top_logprobs": None,
|
||||
}
|
||||
@@ -102,10 +102,10 @@ def test_stream_with_logprobs():
|
||||
# 校验概率字段
|
||||
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.03113006055355072
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
|
||||
"token": "牛顿",
|
||||
"logprob": -0.03113006055355072,
|
||||
"logprob": -0.031025361269712448,
|
||||
"bytes": [231, 137, 155, 233, 161, 191],
|
||||
}
|
||||
|
||||
@@ -187,10 +187,10 @@ def test_stream_with_temp_scaled_logprobs():
|
||||
# 校验概率字段
|
||||
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.0068125599063932896
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.006811376195400953
|
||||
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
|
||||
"token": "牛顿",
|
||||
"logprob": -0.0068125599063932896,
|
||||
"logprob": -0.006811376195400953,
|
||||
"bytes": [231, 137, 155, 233, 161, 191],
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user