[Iluvatar][CI] Replace ci in ernie-300B-4layer with ernie-21b. (#6747)

This commit is contained in:
yzwu
2026-03-10 17:25:52 +08:00
committed by GitHub
parent 596519831c
commit 67388ce2f3
4 changed files with 22 additions and 33 deletions
+11 -4
View File
@@ -5,7 +5,7 @@ echo "$DIR"
ixsmi ixsmi
#先kill一遍 #先kill一遍
ps -efww | grep -E 'run_ernie300B_4layer' | grep -v grep | awk '{print $2}' | xargs kill -9 || true ps -efww | grep -E 'run_ernie_21b' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
unset http_proxy unset http_proxy
unset https_proxy unset https_proxy
@@ -41,7 +41,7 @@ echo "Run paddle.utils.run_check()"
python -c "import paddle; paddle.utils.run_check()" python -c "import paddle; paddle.utils.run_check()"
INCLUDE_FOLDERS=( INCLUDE_FOLDERS=(
"ERNIE_300B_4L" # "ERNIE_300B_4L"
"ERNIE-4.5-21B-A3B-Paddle" "ERNIE-4.5-21B-A3B-Paddle"
"ERNIE-4.5-VL-28B-A3B-Paddle" "ERNIE-4.5-VL-28B-A3B-Paddle"
"PaddleOCR-VL" "PaddleOCR-VL"
@@ -59,15 +59,22 @@ for filename in "${INCLUDE_FOLDERS[@]}"; do
cp -r $file $MODEL_DIR cp -r $file $MODEL_DIR
done done
CONTAINER_PP_DOC_DIR=/root/.paddlex/official_models
mkdir -p $CONTAINER_PP_DOC_DIR
echo "start copy $SOURCE_DIR/PP-DocLayoutV2 into $CONTAINER_PP_DOC_DIR"
cp -r $SOURCE_DIR/PP-DocLayoutV2 $CONTAINER_PP_DOC_DIR
echo "copy done" echo "copy done"
echo "ls $MODEL_DIR" echo "ls $MODEL_DIR"
ls $MODEL_DIR ls $MODEL_DIR
echo "ls $CONTAINER_PP_DOC_DIR"
ls $CONTAINER_PP_DOC_DIR
echo "build whl" echo "build whl"
bash build.sh || exit 1 bash build.sh || exit 1
function print_error_message() { function print_error_message() {
if [ -f "log/launch_worker.0" ]; then if [ -f "log/launch_worker.log" ]; then
echo "------------------- log/launch_worker.log -----------------" echo "------------------- log/launch_worker.log -----------------"
cat log/launch_worker.log cat log/launch_worker.log
fi fi
@@ -92,7 +99,7 @@ export FD_SAMPLING_CLASS=rejection
################# Test offline ################### ################# Test offline ###################
offline_ci_list=( offline_ci_list=(
${CI_PATH}/run_ernie300B_4layer.py ${CI_PATH}/run_ernie_21b.py
${CI_PATH}/run_ernie_vl_28B.py ${CI_PATH}/run_ernie_vl_28B.py
) )
echo "test offline ci files: ${offline_ci_list[@]}" echo "test offline ci files: ${offline_ci_list[@]}"
@@ -21,20 +21,20 @@ from fastdeploy.utils import set_random_seed
tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
sys.path.insert(0, tests_dir) sys.path.insert(0, tests_dir)
from ci_use.iluvatar_UT.utils import TIMEOUT_MSG, timeout from ci_use.iluvatar_UT.utils import timeout
@timeout(80) @timeout(240)
def offline_infer_check(): def offline_infer_check():
set_random_seed(123) set_random_seed(123)
prompts = [ prompts = [
"Hello, my name is", "The largest ocean is",
] ]
sampling_params = SamplingParams(temperature=0.8, top_p=0.00001, max_tokens=16) sampling_params = SamplingParams(temperature=0.8, top_p=0.00001, max_tokens=128)
graph_optimization_config = {"use_cudagraph": False} graph_optimization_config = {"use_cudagraph": False}
llm = LLM( llm = LLM(
model="/model_data/ERNIE_300B_4L", model="/model_data/ERNIE-4.5-21B-A3B-Paddle",
tensor_parallel_size=2, tensor_parallel_size=2,
max_model_len=8192, max_model_len=8192,
quantization="wint8", quantization="wint8",
@@ -43,24 +43,11 @@ def offline_infer_check():
) )
outputs = llm.generate(prompts, sampling_params) outputs = llm.generate(prompts, sampling_params)
assert outputs[0].outputs.token_ids == [ for output in outputs:
23768, generated_text = output.outputs.text
97000, print(f"generated_text={generated_text}")
47814, assert "pacific ocean" in generated_text.lower()
59335,
68170,
183,
97404,
100088,
36310,
95633,
95913,
41459,
95049,
94970,
96840,
2,
], f"{outputs[0].outputs.token_ids}"
print("PASSED") print("PASSED")
@@ -69,7 +56,6 @@ if __name__ == "__main__":
result = offline_infer_check() result = offline_infer_check()
sys.exit(0) sys.exit(0)
except TimeoutError: except TimeoutError:
print(TIMEOUT_MSG)
sys.exit(124) sys.exit(124)
except Exception: except Exception:
sys.exit(1) sys.exit(1)
+1 -2
View File
@@ -26,7 +26,7 @@ from fastdeploy.utils import set_random_seed
tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
sys.path.insert(0, tests_dir) sys.path.insert(0, tests_dir)
from ci_use.iluvatar_UT.utils import TIMEOUT_MSG, timeout from ci_use.iluvatar_UT.utils import timeout
@timeout(240) @timeout(240)
@@ -97,7 +97,6 @@ if __name__ == "__main__":
result = offline_infer_check() result = offline_infer_check()
sys.exit(0) sys.exit(0)
except TimeoutError: except TimeoutError:
print(TIMEOUT_MSG)
sys.exit(124) sys.exit(124)
except Exception: except Exception:
sys.exit(1) sys.exit(1)
-3
View File
@@ -23,6 +23,3 @@ def timeout(seconds):
return wrapper return wrapper
return decorator return decorator
TIMEOUT_MSG = "The timeout exit may be due to multiple processes sharing the same gpu card. You can check this using ixsmi on the device."