mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
[XPU] [CI] Fix xpu ci bug (#7014)
* fix xpu ci bug * Remove unnecessary blank line in conftest.py * Update upload-artifact action to version 6 * Update _xpu_8cards_case_test.yml * fix ci bug * Change exit code on test failure to 1 * fix ci bug * fix ci bug * fix ci bug * fix ci bug * Update conftest.py
This commit is contained in:
@@ -193,13 +193,29 @@ jobs:
|
||||
echo "============================开始运行pytest测试============================"
|
||||
export PYTHONPATH=/workspace/FastDeploy/
|
||||
export PYTHONPATH=$(pwd)/tests/xpu_ci:$PYTHONPATH
|
||||
mkdir -p case_logs
|
||||
set +e
|
||||
python -m pytest -v -s --tb=short tests/xpu_ci/4cards_cases/
|
||||
exit_code=$?
|
||||
set -e
|
||||
|
||||
# 修改case_logs权限,确保Docker外部的runner用户可以读取并上传
|
||||
chmod -R a+rX case_logs/ 2>/dev/null || true
|
||||
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
echo "============================4卡cases测试通过!============================"
|
||||
exit $exit_code
|
||||
else
|
||||
echo "============================4卡cases测试失败,请检查日志!============================"
|
||||
exit $exit_code
|
||||
fi
|
||||
'
|
||||
|
||||
- name: Upload case logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: xpu-4cards-case-logs
|
||||
path: FastDeploy/case_logs/
|
||||
retention-days: 7
|
||||
if-no-files-found: ignore
|
||||
|
||||
@@ -182,8 +182,14 @@ jobs:
|
||||
echo "============================开始运行pytest测试============================"
|
||||
export PYTHONPATH=/workspace/FastDeploy/
|
||||
export PYTHONPATH=$(pwd)/tests/xpu_ci:$PYTHONPATH
|
||||
mkdir -p case_logs
|
||||
set +e
|
||||
python -m pytest -v -s --tb=short tests/xpu_ci/8cards_cases/
|
||||
exit_code=$?
|
||||
set -e
|
||||
|
||||
# 修改case_logs权限,确保Docker外部的runner用户可以读取并上传
|
||||
chmod -R a+rX case_logs/ 2>/dev/null || true
|
||||
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
echo "============================8卡cases测试通过!============================"
|
||||
@@ -192,3 +198,12 @@ jobs:
|
||||
exit $exit_code
|
||||
fi
|
||||
'
|
||||
|
||||
- name: Upload case logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: xpu-8cards-case-logs
|
||||
path: FastDeploy/case_logs/
|
||||
retention-days: 7
|
||||
if-no-files-found: ignore
|
||||
|
||||
@@ -109,7 +109,7 @@ def print_pd_logs_on_failure():
|
||||
log_dirs = ["log_router", "log_prefill", "log_decode"]
|
||||
|
||||
for log_dir in log_dirs:
|
||||
nohup_path = os.path.join(log_dir, "log_0/worklog.0")
|
||||
nohup_path = os.path.join(log_dir, "log_0/workerlog.0")
|
||||
if os.path.exists(nohup_path):
|
||||
print(f"\n========== {nohup_path} ==========")
|
||||
with open(nohup_path, "r") as f:
|
||||
|
||||
@@ -109,7 +109,7 @@ def print_pd_logs_on_failure():
|
||||
log_dirs = ["log_router", "log_prefill", "log_decode"]
|
||||
|
||||
for log_dir in log_dirs:
|
||||
nohup_path = os.path.join(log_dir, "log_0/worklog.0")
|
||||
nohup_path = os.path.join(log_dir, "log_0/workerlog.0")
|
||||
if os.path.exists(nohup_path):
|
||||
print(f"\n========== {nohup_path} ==========")
|
||||
with open(nohup_path, "r") as f:
|
||||
|
||||
@@ -109,7 +109,7 @@ def print_pd_logs_on_failure():
|
||||
log_dirs = ["log_router", "log_prefill", "log_decode"]
|
||||
|
||||
for log_dir in log_dirs:
|
||||
nohup_path = os.path.join(log_dir, "log_0/worklog.0")
|
||||
nohup_path = os.path.join(log_dir, "log_0/workerlog.0")
|
||||
if os.path.exists(nohup_path):
|
||||
print(f"\n========== {nohup_path} ==========")
|
||||
with open(nohup_path, "r") as f:
|
||||
|
||||
@@ -110,7 +110,7 @@ def print_pd_logs_on_failure():
|
||||
log_dirs = ["log_router", "log_prefill", "log_decode"]
|
||||
|
||||
for log_dir in log_dirs:
|
||||
nohup_path = os.path.join(log_dir, "log_0/worklog.0")
|
||||
nohup_path = os.path.join(log_dir, "log_0/workerlog.0")
|
||||
if os.path.exists(nohup_path):
|
||||
print(f"\n========== {nohup_path} ==========")
|
||||
with open(nohup_path, "r") as f:
|
||||
|
||||
@@ -23,6 +23,7 @@ XPU CI测试框架 - 通用配置和辅助函数
|
||||
4. 环境配置 - 设置XPU相关环境变量
|
||||
"""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
@@ -31,6 +32,8 @@ import time
|
||||
|
||||
import pytest
|
||||
|
||||
CASE_LOGS_DIR = os.path.join(os.getcwd(), "case_logs")
|
||||
|
||||
|
||||
def get_xpu_id():
|
||||
"""获取XPU_ID环境变量"""
|
||||
@@ -457,3 +460,42 @@ def setup_logprobs_zmq_env():
|
||||
os.environ[key] = value
|
||||
print(f"设置环境变量: {key}={value}")
|
||||
return original_values
|
||||
|
||||
|
||||
# ============ 日志归档 pytest hook ============
|
||||
|
||||
|
||||
def _archive_case_logs(test_name):
|
||||
"""
|
||||
将当前工作目录下所有 log 开头的文件夹和 server.log 复制到 case_logs/{test_name}/ 下
|
||||
"""
|
||||
dest_dir = os.path.join(CASE_LOGS_DIR, test_name)
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
|
||||
# 复制所有 log* 目录
|
||||
for entry in glob.glob("log*"):
|
||||
if os.path.isdir(entry):
|
||||
shutil.copytree(entry, os.path.join(dest_dir, entry), dirs_exist_ok=True)
|
||||
elif os.path.isfile(entry):
|
||||
# 处理 server.log 等 log 开头的文件
|
||||
shutil.copy2(entry, os.path.join(dest_dir, entry))
|
||||
|
||||
# 单独处理 server.log(不以 log 开头但也是关键日志)
|
||||
if os.path.exists("server.log") and not os.path.exists(os.path.join(dest_dir, "server.log")):
|
||||
shutil.copy2("server.log", os.path.join(dest_dir, "server.log"))
|
||||
|
||||
|
||||
@pytest.hookimpl(hookwrapper=True, trylast=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""每个测试阶段结束后归档日志(仅在 call 阶段后执行)"""
|
||||
outcome = yield
|
||||
report = outcome.get_result()
|
||||
|
||||
if report.when == "call":
|
||||
# 使用测试文件名(不含 .py)作为归档目录名
|
||||
test_file = os.path.basename(item.fspath)
|
||||
test_name = os.path.splitext(test_file)[0]
|
||||
try:
|
||||
_archive_case_logs(test_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user