mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[Feature] Unify fp8 block_wise quant ops (#5991)
* quant stash * blockwise_quant * precommit * rm tensor.cut * tp ok * add swiglu * rm outdate code * fix activate ut * change baseline * fix baseline error
This commit is contained in:
@@ -185,7 +185,7 @@ jobs:
|
||||
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"1+1=?\"}], \"logprobs\": true}"
|
||||
set +e
|
||||
rm -rf ./baseline_output
|
||||
cp -r baseline/ERNIE-4.5-0.3B-Paddle ./baseline_output
|
||||
cp -r baseline_dev/ERNIE-4.5-0.3B-Paddle ./baseline_output
|
||||
LOGPROB_EXIT_CODE=0
|
||||
python3.10 lanucher.py --request_template TOKEN_LOGPROB --url http://localhost:${FD_API_PORT}/v1/chat/completions --case ./cases/demo.yaml --concurrency 1 --name demo --exe logprob || LOGPROB_EXIT_CODE=$?
|
||||
echo "LOGPROB_EXIT_CODE=${LOGPROB_EXIT_CODE}" > /workspace/exit_code.env
|
||||
|
||||
Reference in New Issue
Block a user