[Iluvatar][CI] Replace ci in ernie-300B-4layer with ernie-21b. (#6747)

2026-04-22 16:07:51 +08:00 · 2026-03-10 17:25:52 +08:00
parent 596519831c
commit 67388ce2f3
4 changed files with 22 additions and 33 deletions
@@ -5,7 +5,7 @@ echo "$DIR"
 ixsmi

 #先kill一遍
-ps -efww | grep -E 'run_ernie300B_4layer' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
+ps -efww | grep -E 'run_ernie_21b' | grep -v grep | awk '{print $2}' | xargs kill -9 || true

 unset http_proxy
 unset https_proxy
@@ -41,7 +41,7 @@ echo "Run paddle.utils.run_check()"
 python -c "import paddle; paddle.utils.run_check()"

 INCLUDE_FOLDERS=(
-    "ERNIE_300B_4L"
+    # "ERNIE_300B_4L"
    "ERNIE-4.5-21B-A3B-Paddle"
    "ERNIE-4.5-VL-28B-A3B-Paddle"
    "PaddleOCR-VL"
@@ -59,15 +59,22 @@ for filename in "${INCLUDE_FOLDERS[@]}"; do
    cp -r $file $MODEL_DIR
 done

+CONTAINER_PP_DOC_DIR=/root/.paddlex/official_models
+mkdir -p $CONTAINER_PP_DOC_DIR
+echo "start copy $SOURCE_DIR/PP-DocLayoutV2 into $CONTAINER_PP_DOC_DIR"
+cp -r $SOURCE_DIR/PP-DocLayoutV2 $CONTAINER_PP_DOC_DIR
+
 echo "copy done"
 echo "ls $MODEL_DIR"
 ls $MODEL_DIR
+echo "ls $CONTAINER_PP_DOC_DIR"
+ls $CONTAINER_PP_DOC_DIR

 echo "build whl"
 bash build.sh || exit 1

 function print_error_message() {
-    if [ -f "log/launch_worker.0" ]; then
+    if [ -f "log/launch_worker.log" ]; then
        echo "------------------- log/launch_worker.log -----------------"
        cat log/launch_worker.log
    fi
@@ -92,7 +99,7 @@ export FD_SAMPLING_CLASS=rejection
 ################# Test offline ###################

 offline_ci_list=(
-    ${CI_PATH}/run_ernie300B_4layer.py
+    ${CI_PATH}/run_ernie_21b.py
    ${CI_PATH}/run_ernie_vl_28B.py
 )
 echo "test offline ci files: ${offline_ci_list[@]}"
@@ -21,20 +21,20 @@ from fastdeploy.utils import set_random_seed
 tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 sys.path.insert(0, tests_dir)

-from ci_use.iluvatar_UT.utils import TIMEOUT_MSG, timeout
+from ci_use.iluvatar_UT.utils import timeout


-@timeout(80)
+@timeout(240)
 def offline_infer_check():
    set_random_seed(123)

    prompts = [
-        "Hello, my name is",
+        "The largest ocean is",
    ]
-    sampling_params = SamplingParams(temperature=0.8, top_p=0.00001, max_tokens=16)
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.00001, max_tokens=128)
    graph_optimization_config = {"use_cudagraph": False}
    llm = LLM(
-        model="/model_data/ERNIE_300B_4L",
+        model="/model_data/ERNIE-4.5-21B-A3B-Paddle",
        tensor_parallel_size=2,
        max_model_len=8192,
        quantization="wint8",
@@ -43,24 +43,11 @@ def offline_infer_check():
    )
    outputs = llm.generate(prompts, sampling_params)

-    assert outputs[0].outputs.token_ids == [
-        23768,
-        97000,
-        47814,
-        59335,
-        68170,
-        183,
-        97404,
-        100088,
-        36310,
-        95633,
-        95913,
-        41459,
-        95049,
-        94970,
-        96840,
-        2,
-    ], f"{outputs[0].outputs.token_ids}"
+    for output in outputs:
+        generated_text = output.outputs.text
+        print(f"generated_text={generated_text}")
+        assert "pacific ocean" in generated_text.lower()
+
    print("PASSED")


@@ -69,7 +56,6 @@ if __name__ == "__main__":
        result = offline_infer_check()
        sys.exit(0)
    except TimeoutError:
-        print(TIMEOUT_MSG)
        sys.exit(124)
    except Exception:
        sys.exit(1)
@@ -26,7 +26,7 @@ from fastdeploy.utils import set_random_seed
 tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 sys.path.insert(0, tests_dir)

-from ci_use.iluvatar_UT.utils import TIMEOUT_MSG, timeout
+from ci_use.iluvatar_UT.utils import timeout


@timeout(240)
@@ -97,7 +97,6 @@ if __name__ == "__main__":
        result = offline_infer_check()
        sys.exit(0)
    except TimeoutError:
-        print(TIMEOUT_MSG)
        sys.exit(124)
    except Exception:
        sys.exit(1)
@@ -23,6 +23,3 @@ def timeout(seconds):
        return wrapper

    return decorator
-
-
-TIMEOUT_MSG = "The timeout exit may be due to multiple processes sharing the same gpu card. You can check this using ixsmi on the device."