[FDConfig]Turn on the CUDAGraph + RL switch (#4508)

* Turn on the CUDAGraph + RL switch * reduce max_num_seqs and number of request
2026-04-23 00:17:25 +08:00 · 2025-10-23 11:08:07 +08:00
parent 918e4e9850
commit 8a02ab43a8
3 changed files with 5 additions and 6 deletions
@@ -1510,9 +1510,7 @@ class FDConfig:
                self.structured_outputs_config.guided_decoding_backend = "xgrammar"
        # Adjustment GraphOptConfig
-        if (self.scheduler_config.splitwise_role != "mixed") or (
+        if self.scheduler_config.splitwise_role != "mixed":
            self.load_config is not None and self.load_config.dynamic_load_weight is True
        ):
            self.graph_opt_config.use_cudagraph = False
            logger.info(
                "CUDAGraph does not support to be started together with PD Disaggregation temporarily, but has been automatically closed!"
@@ -1630,11 +1628,12 @@ class FDConfig:
            self.scheduler_config.check()
        # Check graph optimization config
-        if self.graph_opt_config.graph_opt_level > 0 or self.graph_opt_config.use_cudagraph:
+        if self.graph_opt_config.graph_opt_level > 0:
            if self.load_config is not None:
                assert (
                    self.load_config.dynamic_load_weight is False
                ), "Static graph cannot be used in RL scene temporarily"
        if int(envs.ENABLE_V1_KVCACHE_SCHEDULER) == 1:
            assert (
                int(envs.FD_DISABLED_RECOVER) == 0
@@ -38,7 +38,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
       --cache-queue-port ${FD_CACHE_QUEUE_PORT} \
       --quantization wint8 \
       --max-model-len 32768 \
-       --max-num-seqs 256 \
+       --max-num-seqs 1 \
       --gpu-memory-utilization 0.9 \
       --model "$MODEL_PATH" \
       --load-strategy ipc_snapshot \
@@ -12,7 +12,7 @@ PORT="${FD_API_PORT}"  # 这里需要配合启动脚本那个URL PORT
 BASE_URL="http://$HOST:$PORT"
 TOTAL_ROUNDS=30
-CHAT_REQUESTS_PER_ROUND=5
+CHAT_REQUESTS_PER_ROUND=1
 export CUDA_VISIBLE_DEVICES=0,1
 MAX_MEMORY_MB=10240  # 10GB