import os import shutil import signal import subprocess import sys import time import openai import pytest from utils.rollout_routing_replay_test_utils import check_routing_replay_chat_completion from utils.serving_utils import ( FD_API_PORT, FD_CACHE_QUEUE_PORT, FD_ENGINE_QUEUE_PORT, FD_METRICS_PORT, clean_ports, is_port_open, ) @pytest.fixture(scope="session", autouse=True) def setup_and_run_server(): """ Pytest fixture that runs once per test session: - Cleans ports before tests - Starts the API server as a subprocess - Waits for server port to open (up to 30 seconds) - Tears down server after all tests finish """ print("Pre-test port cleanup...") clean_ports() print("log dir clean ") if os.path.exists("log") and os.path.isdir("log"): shutil.rmtree("log") base_path = os.getenv("MODEL_PATH") if base_path: model_path = os.path.join(base_path, "ernie-4_5-21b-a3b-bf16-paddle") else: model_path = "./ernie-4_5-21b-a3b-bf16-paddle" log_path = "server.log" cmd = [ sys.executable, "-m", "fastdeploy.entrypoints.openai.api_server", "--model", model_path, "--port", str(FD_API_PORT), "--tensor-parallel-size", "1", "--engine-worker-queue-port", str(FD_ENGINE_QUEUE_PORT), "--metrics-port", str(FD_METRICS_PORT), "--cache-queue-port", str(FD_CACHE_QUEUE_PORT), "--max-model-len", "32768", "--max-num-seqs", "1", "--quantization", "wint4", "--graph-optimization-config", '{"use_cudagraph":true}', "--routing-replay-config", '{"enable_routing_replay":true, "routing_store_type":"local", "local_store_dir":"./R3_tmp/routing_replay_output_eb45"}', ] # Start subprocess in new process group with open(log_path, "w") as logfile: process = subprocess.Popen( cmd, stdout=logfile, stderr=subprocess.STDOUT, start_new_session=True, # Enables killing full group via os.killpg ) # Wait up to 300 seconds for API server to be ready for _ in range(300): if is_port_open("127.0.0.1", FD_API_PORT): print(f"API server is up on port {FD_API_PORT}") break time.sleep(1) else: print("[TIMEOUT] API server failed to start in 5 minutes. Cleaning up...") try: os.killpg(process.pid, signal.SIGTERM) except Exception as e: print(f"Failed to kill process group: {e}") raise RuntimeError(f"API server did not start on port {FD_API_PORT}") yield # Run tests print("\n===== Post-test server cleanup... =====") try: os.killpg(process.pid, signal.SIGTERM) print(f"API server (pid={process.pid}) terminated") except Exception as e: print(f"Failed to terminate API server: {e}") @pytest.fixture def openai_client(): ip = "0.0.0.0" service_http_port = str(FD_API_PORT) client = openai.Client( base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY", ) return client # ========================== # Test Rollout Routing Replay # ========================== def test_r3_accuracy(openai_client): moe_layer_num = 27 # EB45 moe layer num: 27 check_routing_replay_chat_completion(openai_client=openai_client, moe_layer_num=moe_layer_num, model_name="eb45")