max_model_len: 16384 max_num_seqs: 256 max_num_batched_tokens: 16384 tensor_parallel_size: 1 gpu_memory_utilization: 0.7 workers: 4