Files
FastDeploy/tests/engine/test_scheduler_metrics_logger.py
T
2026-02-10 09:37:11 +08:00

102 lines
3.7 KiB
Python

"""
Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import types
from unittest import mock
from fastdeploy.engine.sched.scheduler_metrics_logger import SchedulerMetricsLogger
def test_on_decode_tokens_accumulates():
logger = SchedulerMetricsLogger(enabled=True, dp_rank=0)
logger._decode_tokens_since_last = 0
logger.on_decode_tokens(3)
logger.on_decode_tokens(0)
logger.on_decode_tokens(-1)
assert logger._decode_tokens_since_last == 3
def test_log_prefill_batch_logs_expected_message():
logger = SchedulerMetricsLogger(enabled=True, dp_rank=2)
logger._logger = mock.Mock()
reqs = [
types.SimpleNamespace(prefill_start_index=0, prefill_end_index=4, num_cached_tokens=2),
types.SimpleNamespace(prefill_start_index=3, prefill_end_index=3, num_cached_tokens=1),
]
logger.log_prefill_batch(prefill_reqs=reqs, running_cnt=5, queue_cnt=6, tokens_used=10, token_usage=0.75)
logger._logger.info.assert_called_once()
message = logger._logger.info.call_args[0][0]
assert "Prefill batch" in message
assert "dp_rank: 2" in message
assert "#new-seq: 2" in message
assert "#new-token: 4" in message
assert "#cached-token: 3" in message
assert "token usage: 0.75" in message
assert "#running-req: 5" in message
assert "#queue-req: 6" in message
def test_log_decode_batch_computes_throughput(monkeypatch):
logger = SchedulerMetricsLogger(enabled=True, dp_rank=1)
logger._logger = mock.Mock()
logger._decode_batch_count = logger._decode_log_interval - 1
logger._decode_tokens_since_last = 10
logger._last_decode_tic = 1.0
monkeypatch.setattr("fastdeploy.engine.sched.scheduler_metrics_logger.time.perf_counter", lambda: 3.0)
logger.log_decode_batch(running_cnt=4, queue_cnt=7, tokens_used=8, token_usage=0.5, use_cudagraph=True)
logger._logger.info.assert_called_once()
message = logger._logger.info.call_args[0][0]
assert "Decode batch" in message
assert "dp_rank: 1" in message
assert "gen throughput (token/s): 5.00" in message
assert "#queue-req: 7" in message
assert logger._decode_tokens_since_last == 0
assert logger._last_decode_tic == 3.0
def test_log_decode_batch_logs_every_decode_interval():
logger = SchedulerMetricsLogger(enabled=True, dp_rank=0)
logger._logger = mock.Mock()
for _ in range(logger._decode_log_interval - 1):
logger.log_decode_batch(running_cnt=1, queue_cnt=2, tokens_used=3, token_usage=0.2, use_cudagraph=False)
logger._logger.info.assert_not_called()
logger.log_decode_batch(running_cnt=1, queue_cnt=2, tokens_used=3, token_usage=0.2, use_cudagraph=False)
logger._logger.info.assert_called_once()
def test_decode_log_interval_reads_env(monkeypatch):
monkeypatch.setenv("FD_CONSOLE_DECODE_LOG_INTERVAL", "2")
logger = SchedulerMetricsLogger(enabled=True, dp_rank=0)
assert logger._decode_log_interval == 2
def test_decode_log_interval_non_positive_falls_back_to_default(monkeypatch):
monkeypatch.setenv("FD_CONSOLE_DECODE_LOG_INTERVAL", "0")
logger = SchedulerMetricsLogger(enabled=True, dp_rank=0)
assert logger._decode_log_interval == SchedulerMetricsLogger.DEFAULT_DECODE_LOG_INTERVAL