[Feature] Unify fp8 block_wise quant ops (#5991)

* quant stash

* blockwise_quant

* precommit

* rm tensor.cut

* tp ok

* add swiglu

* rm outdate code

* fix activate ut

* change baseline

* fix baseline error
This commit is contained in:
fxyfxy777
2026-01-15 21:50:37 +08:00
committed by GitHub
parent d38cd8b40b
commit 4c92035f2d
17 changed files with 55 additions and 571 deletions
+1 -1
View File
@@ -185,7 +185,7 @@ jobs:
-d "{\"messages\": [{\"role\": \"user\", \"content\": \"1+1=?\"}], \"logprobs\": true}"
set +e
rm -rf ./baseline_output
cp -r baseline/ERNIE-4.5-0.3B-Paddle ./baseline_output
cp -r baseline_dev/ERNIE-4.5-0.3B-Paddle ./baseline_output
LOGPROB_EXIT_CODE=0
python3.10 lanucher.py --request_template TOKEN_LOGPROB --url http://localhost:${FD_API_PORT}/v1/chat/completions --case ./cases/demo.yaml --concurrency 1 --name demo --exe logprob || LOGPROB_EXIT_CODE=$?
echo "LOGPROB_EXIT_CODE=${LOGPROB_EXIT_CODE}" > /workspace/exit_code.env