130 lines
5.9 KiB
Python
130 lines
5.9 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
MODELS_ON_S3 = [
|
|
"adept/fuyu-8b",
|
|
"ai21labs/AI21-Jamba-1.5-Mini",
|
|
"ai21labs/Jamba-tiny-random",
|
|
"ai21labs/Jamba-tiny-reward-dev",
|
|
"allenai/Molmo-7B-D-0924",
|
|
"allenai/OLMo-1B-hf",
|
|
"allenai/OLMoE-1B-7B-0924-Instruct",
|
|
"amd/Llama-3.1-8B-Instruct-FP8-KV-Quark-test",
|
|
"AMead10/Llama-3.2-1B-Instruct-AWQ",
|
|
"ArthurZ/Ilama-3.2-1B",
|
|
"BAAI/bge-base-en-v1.5",
|
|
"BAAI/bge-multilingual-gemma2",
|
|
"BAAI/bge-reranker-v2-m3",
|
|
"bigcode/starcoder2-3b",
|
|
"cross-encoder/ms-marco-MiniLM-L-6-v2",
|
|
"cross-encoder/quora-roberta-base",
|
|
"deepseek-ai/deepseek-vl2-tiny",
|
|
"distilbert/distilgpt2",
|
|
"facebook/bart-base",
|
|
"facebook/bart-large-cnn",
|
|
# "fixie-ai/ultravox-v0_5-llama-3_2-1b",
|
|
"google/gemma-1.1-2b-it",
|
|
"google/gemma-2-2b-it",
|
|
"google/paligemma-3b-pt-224",
|
|
"h2oai/h2ovl-mississippi-800m",
|
|
"HuggingFaceM4/Idefics3-8B-Llama3",
|
|
"internlm/internlm2-1_8b-reward",
|
|
"intfloat/e5-mistral-7b-instruct",
|
|
"intfloat/multilingual-e5-small",
|
|
"jason9693/Qwen2.5-1.5B-apeach",
|
|
"llava-hf/llava-1.5-7b-hf",
|
|
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
|
"llava-hf/llava-v1.6-mistral-7b-hf",
|
|
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
|
# "meta-llama/Llama-2-7b-hf",
|
|
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
"meta-llama/Llama-3.2-1B",
|
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
"meta-llama/Meta-Llama-3-8B",
|
|
"microsoft/phi-2",
|
|
"microsoft/Phi-3-mini-4k-instruct",
|
|
"microsoft/Phi-3-small-8k-instruct",
|
|
"microsoft/Phi-3-vision-128k-instruct",
|
|
"microsoft/Phi-3.5-MoE-instruct",
|
|
"microsoft/Phi-3.5-vision-instruct",
|
|
# "mistralai/Mistral-7B-Instruct-v0.1",
|
|
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
"mistralai/Pixtral-12B-2409",
|
|
"mistral-community/Mixtral-8x22B-v0.1-AWQ",
|
|
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head",
|
|
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse",
|
|
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue",
|
|
"ModelCloud/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit-10-25-2024",
|
|
"neuralmagic/Llama-3.2-1B-quantized.w8a8",
|
|
"neuralmagic/Meta-Llama-3-8B-Instruct-FP8",
|
|
"neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
|
|
"nm-testing/asym-w8w8-int8-static-per-tensor-tiny-llama",
|
|
"nm-testing/llama2.c-stories42M-pruned2.4-compressed",
|
|
"nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t",
|
|
"nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-2of4-testing",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-IA-Per-Tensor-Weight-testing",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-Per-Tensor-testing",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-testing",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dynamic-Asym",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Asym",
|
|
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Sym",
|
|
"nm-testing/Phi-3-mini-128k-instruct-FP8",
|
|
"nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV",
|
|
"nm-testing/Qwen2-1.5B-Instruct-FP8-K-V",
|
|
"nm-testing/tinyllama-oneshot-w4a16-channel-v2",
|
|
"nm-testing/tinyllama-oneshot-w4a16-group128-v2",
|
|
"nm-testing/tinyllama-oneshot-w8-channel-a8-tensor",
|
|
"nm-testing/tinyllama-oneshot-w8a16-per-channel",
|
|
"nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2",
|
|
"nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2-asym",
|
|
"nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2",
|
|
"nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2-asym",
|
|
"nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-2of4-Sparse-Dense-Compressor",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_int8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_fp8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_int8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_fp8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_int8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_fp8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_int8-BitM",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Channel-Weight-testing",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Tensor-Weight-testing",
|
|
"nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Static-testing",
|
|
"nm-testing/TinyLlama-1.1B-compressed-tensors-kv-cache-scheme",
|
|
"nvidia/NVLM-D-72B",
|
|
"openai-community/gpt2",
|
|
# "openai/whisper-large-v3",
|
|
"openbmb/MiniCPM-o-2_6",
|
|
"openbmb/MiniCPM-V-2_6",
|
|
"OpenGVLab/InternVL2-1B",
|
|
"parasail-ai/GritLM-7B-vllm",
|
|
"Qwen/Qwen1.5-MoE-A2.7B-Chat",
|
|
"Qwen/Qwen2-7B-Instruct",
|
|
"Qwen/Qwen2-Audio-7B-Instruct",
|
|
"Qwen/Qwen2-VL-2B-Instruct",
|
|
"Qwen/Qwen2.5-1.5B-Instruct",
|
|
"Qwen/Qwen2.5-Math-PRM-7B",
|
|
"Qwen/Qwen2.5-Math-RM-72B",
|
|
"Qwen/Qwen2.5-VL-3B-Instruct",
|
|
"royokong/e5-v",
|
|
"sentence-transformers/all-roberta-large-v1",
|
|
"sentence-transformers/stsb-roberta-base-v2",
|
|
"shanearora/OLMo-7B-1124-hf",
|
|
"shuyuej/Llama-3.2-1B-Instruct-GPTQ",
|
|
"ssmits/Qwen2-7B-Instruct-embed-base",
|
|
"stabilityai/stablelm-3b-4e1t",
|
|
"stabilityai/stablelm-zephyr-3b",
|
|
"state-spaces/mamba-130m-hf",
|
|
"TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ",
|
|
"THUDM/glm-4v-9b",
|
|
"TIGER-Lab/Mantis-8B-siglip-llama3",
|
|
"TIGER-Lab/VLM2Vec-Full",
|
|
"tiiuae/falcon-40b",
|
|
"tiiuae/falcon-mamba-7b-instruct",
|
|
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
|
"upstage/solar-pro-preview-instruct",
|
|
]
|
|
|
|
MODEL_WEIGHTS_S3_BUCKET = "s3://vllm-ci-model-weights"
|