MoE Default use triton's blockwise fp8 in TP Case (#3678)

This commit is contained in:
周周周
2025-08-29 11:07:30 +08:00
committed by GitHub
parent b6edd15d55
commit 17b414c2df
7 changed files with 5 additions and 10 deletions
@@ -116,7 +116,6 @@ export FD_ATTENTION_BACKEND=FLASH_ATTN
export FD_LOG_DIR="prefill_log"
quant_type=block_wise_fp8
export FD_USE_DEEP_GEMM=0
python -m fastdeploy.entrypoints.openai.api_server --model baidu/ERNIE-4.5-21B-A3B-Paddle \
--max-model-len 131072 \
@@ -136,7 +135,6 @@ export FLAGS_max_partition_size=2048
export FD_LOG_DIR="decode_log"
quant_type=block_wise_fp8
export FD_USE_DEEP_GEMM=0
python -m fastdeploy.entrypoints.openai.api_server --model baidu/ERNIE-4.5-21B-A3B-Paddle \
--max-model-len 131072 \