max_model_len: 16384
max_num_seqs: 256
max_num_batched_tokens: 16384
tensor_parallel_size: 1
gpu_memory_utilization: 0.7
workers: 4