[Feature] use phi permute/unpermute & rm swiglu (#6361)

* tp文字输出正常

* B eb5 mini文字输出正常

* eb5mini ep B卡 文字输出正常

* default use phi moe op

* stash

* tp H卡正常

* ep ok

* rm debug

* rm debug tool

* rm del ffn_out

* rm swiglu

* add envs to swiglu

* merge dev

* fix ci baseline

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix ci baseline 2

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
fxyfxy777
2026-03-12 17:01:57 +08:00
committed by GitHub
parent a3d7979711
commit 250ce40b40
18 changed files with 187 additions and 112 deletions
+6 -6
View File
@@ -25,10 +25,10 @@ def test_unstream_with_logprobs():
# 校验返回内容与概率信息
assert resp_json["choices"][0]["message"]["content"] == "牛顿的"
assert resp_json["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.03113006055355072
assert resp_json["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
assert resp_json["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
"token": "牛顿",
"logprob": -0.03113006055355072,
"logprob": -0.031025361269712448,
"bytes": [231, 137, 155, 233, 161, 191],
"top_logprobs": None,
}
@@ -102,10 +102,10 @@ def test_stream_with_logprobs():
# 校验概率字段
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.03113006055355072
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.031025361269712448
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
"token": "牛顿",
"logprob": -0.03113006055355072,
"logprob": -0.031025361269712448,
"bytes": [231, 137, 155, 233, 161, 191],
}
@@ -187,10 +187,10 @@ def test_stream_with_temp_scaled_logprobs():
# 校验概率字段
assert result_chunk["choices"][0]["delta"]["content"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"]["content"][0]["token"] == "牛顿"
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.0068125599063932896
assert result_chunk["choices"][0]["logprobs"]["content"][0]["logprob"] == -0.006811376195400953
assert result_chunk["choices"][0]["logprobs"]["content"][0]["top_logprobs"][0] == {
"token": "牛顿",
"logprob": -0.0068125599063932896,
"logprob": -0.006811376195400953,
"bytes": [231, 137, 155, 233, 161, 191],
}