[CI] Disable unstable test jobs and cases (#4799)

[CI] Disable unstable test jobs and cases
2026-04-23 00:17:25 +08:00 · 2025-11-05 10:28:53 +08:00
parent 61856e55ce
commit 7cee8030af
7 changed files with 2 additions and 251 deletions
@@ -206,13 +206,6 @@ jobs:
          check_service 90
          python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1

-          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-            -H "Content-Type: application/json" \
-            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_mtp.yaml\", \"--enable-logprob\": \"False\"}"
-          check_service 180
-          export TEMPLATE=TOKEN_NORMAL
-          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
-
          popd
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
          '
@@ -75,23 +75,3 @@ jobs:
      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
-
-  accuracy_test:
-    name: Run Accuracy Tests
-    needs: [clone,build]
-    uses: ./.github/workflows/_accuracy_test.yml
-    with:
-      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
-      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
-      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
-      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
-
-  stable_test:
-    name: Run Stable Tests
-    needs: [clone,build]
-    uses: ./.github/workflows/_stable_test.yml
-    with:
-      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
-      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
-      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
-      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
@@ -1,8 +0,0 @@
-max_model_len: 32768
-max_num_seqs: 128
-tensor_parallel_size: 1
-quantization: wint4
-speculative_config:
-  method: mtp
-  num_speculative_tokens: 1
-  model: /MODELDATA/ernie-4_5-21b-a3b-bf16-paddle/mtp/
@@ -1,9 +0,0 @@
-max_model_len: 32768
-max_num_seqs: 128
-tensor_parallel_size: 1
-quantization: wint4
-graph_optimization_config:
-  graph_opt_level: 1
-  sot_warmup_sizes: [2,16,32,64]
-  use_cudagraph: True
-  full_cuda_graph: False
@@ -425,7 +425,7 @@ def test_streaming_with_stop_str(openai_client):
    last_token = ""
    for chunk in response:
        last_token = chunk.choices[0].delta.content
-    assert last_token == "</s>"
+    assert last_token.endswith("</s>")

    response = openai_client.chat.completions.create(
        model="default",
@@ -589,7 +589,7 @@ def test_streaming_with_stop_str(openai_client):
    last_token = ""
    for chunk in response:
        last_token = chunk.choices[0].delta.content
-    assert last_token == "</s>"
+    assert last_token.endswith("</s>")

    response = openai_client.chat.completions.create(
        model="default",
@@ -1,205 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-
-import pytest
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-project_root = os.path.abspath(os.path.join(current_dir, ".."))
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
-from tests.model_loader.utils import (
-    check_tokens_id_and_text_close,
-    form_model_get_output_topp0,
-    form_model_get_output_topp1,
-    get_paddle_model_path,
-    get_torch_model_path,
-    run_with_timeout,
-)
-
-FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
-FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
-
-prompts = ["解释下”温故而知新”", "Hello, how are you?"]
-
-
-model_param_map = {
-    "Qwen3-0.6B": {
-        "max_num_seqs": 1,
-        "quantizations": ["None", "wint8", "wint4"],
-    },
-    "ernie-4_5-21b-a3b-bf16-paddle": {
-        "max_num_seqs": 1,
-        "tensor_parallel_size": 2,
-        "quantizations": [
-            "wint8",
-        ],
-    },
-    "Qwen2-7B-Instruct": {
-        "max_num_seqs": 1,
-        "quantizations": ["wint4"],
-    },
-    "Qwen2.5-VL-7B-Instruct": {
-        "max_num_seqs": 1,
-        "quantizations": ["wint4"],
-        "is_mm": True,
-        "torch_model_name_or_path": "Qwen2.5-VL-7B-Instruct-PT",
-    },
-    "Qwen3-30B-A3B": {
-        "tensor_parallel_size": 2,
-        "max_num_seqs": 1,
-        "quantizations": [
-            {
-                "quant_type": "block_wise_fp8",
-                "backend": "triton",
-                "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
-            },
-            {
-                "quant_type": "block_wise_fp8",
-                "backend": "deepgemm",
-                "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"},
-            },
-        ],
-    },
-    "DeepSeek-V3-0324": {
-        "tensor_parallel_size": 2,
-        "quantizations": [
-            {
-                "quant_type": "wint4",
-                "env": {
-                    "FD_ATTENTION_BACKEND": "MLA_ATTN",
-                    "FLAGS_mla_use_tensorcore": "1",
-                    "FLAGS_flash_attn_version": "3",
-                    "FD_USE_MACHETE": "1",
-                },
-            },
-        ],
-    },
-}
-
-
-params = []
-for model, cfg in model_param_map.items():
-    for q in cfg["quantizations"]:
-        if isinstance(q, dict):
-            quant, backend, env = q["quant_type"], q.get("backend", "default"), q.get("env", {})
-        else:
-            quant, backend, env = q, "default", {}
-        params.append(
-            pytest.param(
-                model,
-                cfg.get("torch_model_name_or_path", ""),
-                cfg.get("tensor_parallel_size", 1),
-                cfg.get("max_num_seqs", 1),
-                cfg.get("max_model_len", 1024),
-                quant,
-                cfg.get("max_tokens", 32),
-                env,
-                cfg.get("is_mm", False),
-                marks=[pytest.mark.core_model],
-                id=f"{model}.{quant}.{backend}",
-            )
-        )
-
-
-@pytest.mark.parametrize(
-    "model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env,is_mm",
-    params,
-)
-def test_common_model(
-    fd_runner,
-    model_name_or_path: str,
-    torch_model_name_or_path: str,
-    tensor_parallel_size: int,
-    max_num_seqs,
-    max_model_len: int,
-    max_tokens: int,
-    quantization: str,
-    env,
-    is_mm: bool,
-    monkeypatch,
-) -> None:
-    model_path = get_paddle_model_path(model_name_or_path)
-    if env:
-        for k, v in env.items():
-            monkeypatch.setenv(k, v)
-
-    form_model_get_output = form_model_get_output_topp0 if not is_mm else form_model_get_output_topp1
-    fd_outputs_v0 = run_with_timeout(
-        target=form_model_get_output,
-        args=(
-            fd_runner,
-            model_path,
-            tensor_parallel_size,
-            max_num_seqs,
-            max_model_len,
-            max_tokens,
-            quantization,
-            "default",
-            FD_ENGINE_QUEUE_PORT,
-            prompts,
-            FD_CACHE_QUEUE_PORT,
-        ),
-    )
-    fd_outputs_v1 = run_with_timeout(
-        target=form_model_get_output,
-        args=(
-            fd_runner,
-            model_path,
-            tensor_parallel_size,
-            max_num_seqs,
-            max_model_len,
-            max_tokens,
-            quantization,
-            "default_v1",
-            FD_ENGINE_QUEUE_PORT,
-            prompts,
-            FD_CACHE_QUEUE_PORT,
-        ),
-    )
-
-    check_tokens_id_and_text_close(
-        outputs_0_lst=fd_outputs_v0,
-        outputs_1_lst=fd_outputs_v1,
-        name_0="default loader",
-        name_1="default_v1 loader",
-    )
-
-    if torch_model_name_or_path != "":
-        torch_model_path = get_torch_model_path(torch_model_name_or_path)
-        fd_outputs_v1_torch = run_with_timeout(
-            target=form_model_get_output,
-            args=(
-                fd_runner,
-                torch_model_path,
-                tensor_parallel_size,
-                max_num_seqs,
-                max_model_len,
-                max_tokens,
-                quantization,
-                "default_v1",
-                FD_ENGINE_QUEUE_PORT,
-                prompts,
-                FD_CACHE_QUEUE_PORT,
-            ),
-        )
-        check_tokens_id_and_text_close(
-            outputs_0_lst=fd_outputs_v1,
-            outputs_1_lst=fd_outputs_v1_torch,
-            name_0="default loader",
-            name_1="default_v1 loader",
-        )