mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
955785e2e0
* fix typo
120 lines
3.5 KiB
Python
120 lines
3.5 KiB
Python
import os
|
|
import shutil
|
|
import signal
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
import openai
|
|
import pytest
|
|
from utils.rollout_routing_replay_test_utils import check_routing_replay_chat_completion
|
|
from utils.serving_utils import (
|
|
FD_API_PORT,
|
|
FD_CACHE_QUEUE_PORT,
|
|
FD_ENGINE_QUEUE_PORT,
|
|
FD_METRICS_PORT,
|
|
clean_ports,
|
|
is_port_open,
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="session", autouse=True)
|
|
def setup_and_run_server():
|
|
"""
|
|
Pytest fixture that runs once per test session:
|
|
- Cleans ports before tests
|
|
- Starts the API server as a subprocess
|
|
- Waits for server port to open (up to 30 seconds)
|
|
- Tears down server after all tests finish
|
|
"""
|
|
print("Pre-test port cleanup...")
|
|
clean_ports()
|
|
print("log dir clean ")
|
|
if os.path.exists("log") and os.path.isdir("log"):
|
|
shutil.rmtree("log")
|
|
base_path = os.getenv("MODEL_PATH")
|
|
if base_path:
|
|
model_path = os.path.join(base_path, "ernie-4_5-21b-a3b-bf16-paddle")
|
|
else:
|
|
model_path = "./ernie-4_5-21b-a3b-bf16-paddle"
|
|
|
|
log_path = "server.log"
|
|
cmd = [
|
|
sys.executable,
|
|
"-m",
|
|
"fastdeploy.entrypoints.openai.api_server",
|
|
"--model",
|
|
model_path,
|
|
"--port",
|
|
str(FD_API_PORT),
|
|
"--tensor-parallel-size",
|
|
"1",
|
|
"--engine-worker-queue-port",
|
|
str(FD_ENGINE_QUEUE_PORT),
|
|
"--metrics-port",
|
|
str(FD_METRICS_PORT),
|
|
"--cache-queue-port",
|
|
str(FD_CACHE_QUEUE_PORT),
|
|
"--max-model-len",
|
|
"32768",
|
|
"--max-num-seqs",
|
|
"1",
|
|
"--quantization",
|
|
"wint4",
|
|
"--graph-optimization-config",
|
|
'{"use_cudagraph":true}',
|
|
"--routing-replay-config",
|
|
'{"enable_routing_replay":true, "routing_store_type":"local", "local_store_dir":"./R3_tmp/routing_replay_output_eb45"}',
|
|
]
|
|
|
|
# Start subprocess in new process group
|
|
with open(log_path, "w") as logfile:
|
|
process = subprocess.Popen(
|
|
cmd,
|
|
stdout=logfile,
|
|
stderr=subprocess.STDOUT,
|
|
start_new_session=True, # Enables killing full group via os.killpg
|
|
)
|
|
|
|
# Wait up to 300 seconds for API server to be ready
|
|
for _ in range(300):
|
|
if is_port_open("127.0.0.1", FD_API_PORT):
|
|
print(f"API server is up on port {FD_API_PORT}")
|
|
break
|
|
time.sleep(1)
|
|
else:
|
|
print("[TIMEOUT] API server failed to start in 5 minutes. Cleaning up...")
|
|
try:
|
|
os.killpg(process.pid, signal.SIGTERM)
|
|
except Exception as e:
|
|
print(f"Failed to kill process group: {e}")
|
|
raise RuntimeError(f"API server did not start on port {FD_API_PORT}")
|
|
|
|
yield # Run tests
|
|
|
|
print("\n===== Post-test server cleanup... =====")
|
|
try:
|
|
os.killpg(process.pid, signal.SIGTERM)
|
|
print(f"API server (pid={process.pid}) terminated")
|
|
except Exception as e:
|
|
print(f"Failed to terminate API server: {e}")
|
|
|
|
|
|
@pytest.fixture
|
|
def openai_client():
|
|
ip = "0.0.0.0"
|
|
service_http_port = str(FD_API_PORT)
|
|
client = openai.Client(
|
|
base_url=f"http://{ip}:{service_http_port}/v1",
|
|
api_key="EMPTY_API_KEY",
|
|
)
|
|
return client
|
|
|
|
|
|
# ==========================
|
|
# Test Rollout Routing Replay
|
|
# ==========================
|
|
def test_r3_accuracy(openai_client):
|
|
moe_layer_num = 27 # EB45 moe layer num: 27
|
|
check_routing_replay_chat_completion(openai_client=openai_client, moe_layer_num=moe_layer_num, model_name="eb45")
|