diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index b8c27f96d2..079df8ba9f 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -269,6 +269,15 @@ async def async_request_eb_openai_chat_completions( if request_func_input.response_format: payload["response_format"] = request_func_input.response_format + # 随机输入开关 + if request_func_input.random_flag: + payload["max_tokens"] = request_func_input.output_len + payload["min_tokens"] = request_func_input.output_len + # 随机token_ids场景 + if isinstance(request_func_input.prompt, list): + request_func_input.prompt_token_ids = request_func_input.prompt + request_func_input.prompt = "" + # 支持传入prompt_token_ids if request_func_input.prompt_token_ids: # 不走messages diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py index ab7c8deb3e..572935f210 100644 --- a/benchmarks/benchmark_dataset.py +++ b/benchmarks/benchmark_dataset.py @@ -818,3 +818,61 @@ class RandomTextDataset(BenchmarkDataset): ) ) return samples + + +class RandomTokenDataset(BenchmarkDataset): + """ + Generates random English words for pure text benchmarking. + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def sample( + self, + num_requests: int, + lora_path: Optional[str] = None, + max_loras: Optional[int] = None, + random_input_len: Optional[int] = None, + random_output_len: Optional[int] = None, + random_range_ratio: Optional[float] = None, + enable_multimodal_chat: bool = False, + **kwargs, + ) -> list: + samples = [] + + def sample_len(base_len: int, ratio: float) -> int: + if base_len is None: + return None + if ratio is None or ratio <= 0: + return base_len + lo = max(1, int(base_len * (1 - ratio))) + hi = int(base_len * (1 + ratio)) + return random.randint(lo, hi) + + for i in range(1, num_requests + 1): + # [length * (1 - range_ratio), length * (1 + range_ratio)] + sampled_input_len = sample_len(random_input_len, random_range_ratio) + sampled_output_len = sample_len(random_output_len, random_range_ratio) + + random.seed(21) + token_ids = [random.randint(2000, 10000) for _ in range(sampled_input_len)] + # prompt_text = " ".join(words) + + data = { + "messages": [{"role": "user", "content": [{"type": "text", "text": ""}]}], + "prompt_token_ids": token_ids, + } + + samples.append( + SampleRequest( + no=i, + json_data=data, + prompt=token_ids, + prompt_len=sampled_input_len, + history_QA=data["messages"], + expected_output_len=sampled_output_len, + random_flag=True, + ) + ) + return samples diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index c7cb9c5806..0624afe7dc 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -39,7 +39,13 @@ from backend_request_func import ( RequestFuncInput, RequestFuncOutput, ) -from benchmark_dataset import EBChatDataset, EBDataset, RandomTextDataset, SampleRequest +from benchmark_dataset import ( + EBChatDataset, + EBDataset, + RandomTextDataset, + RandomTokenDataset, + SampleRequest, +) from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from tqdm.asyncio import tqdm @@ -1156,6 +1162,12 @@ def main(args: argparse.Namespace): random_output_len=args.random_output_len, random_range_ratio=args.random_range_ratio, ), + "random_token_ids": lambda: RandomTokenDataset().sample( + num_requests=args.num_prompts, + random_input_len=args.random_input_len, + random_output_len=args.random_output_len, + random_range_ratio=args.random_range_ratio, + ), } try: @@ -1338,6 +1350,7 @@ if __name__ == "__main__": "EB", "EBChat", "random", + "random_token_ids", ], help="Name of the dataset to benchmark on.", )