CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
swift deploy \
--model /opt/tiger/ms_swift/Qwen2.5-VL-3B-Instruct \
--infer_backend vllm \
--vllm_gpu_memory_utilization 0.3 \
--vllm_max_model_len 8192 \
--max_new_tokens 2048 \
--temperature 0.0 \
--vllm_data_parallel_size 8