[Model] Remove image mm limit for LLaMa4 (#16365)
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
This commit is contained in:
parent
ec1f9c8c91
commit
61de3ef74b
@ -22,6 +22,16 @@ QUESTION = "What is the content of each image?"
|
|||||||
IMAGE_URLS = [
|
IMAGE_URLS = [
|
||||||
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
|
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
|
||||||
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
|
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/2/26/Ultramarine_Flycatcher_%28Ficedula_superciliaris%29_Naggar%2C_Himachal_Pradesh%2C_2013_%28cropped%29.JPG",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/e/e5/Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg/2560px-Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/d/d4/Starfish%2C_Caswell_Bay_-_geograph.org.uk_-_409413.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/6/69/Grapevinesnail_01.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Texas_invasive_Musk_Thistle_1.jpg/1920px-Texas_invasive_Musk_Thistle_1.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/Huskiesatrest.jpg/2880px-Huskiesatrest.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg/1920px-Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/3/30/George_the_amazing_guinea_pig.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/1/1f/Oryctolagus_cuniculus_Rcdo.jpg/1920px-Oryctolagus_cuniculus_Rcdo.jpg",
|
||||||
|
"https://upload.wikimedia.org/wikipedia/commons/9/98/Horse-and-pony.jpg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -285,8 +295,7 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
|
|||||||
|
|
||||||
engine_args = EngineArgs(
|
engine_args = EngineArgs(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
max_model_len=8192,
|
max_model_len=131072,
|
||||||
max_num_seqs=4,
|
|
||||||
tensor_parallel_size=8,
|
tensor_parallel_size=8,
|
||||||
limit_mm_per_prompt={"image": len(image_urls)},
|
limit_mm_per_prompt={"image": len(image_urls)},
|
||||||
)
|
)
|
||||||
@ -660,7 +669,7 @@ def run_generate(model, question: str, image_urls: list[str],
|
|||||||
llm.llm_engine.add_lora(lora_request=lora_request)
|
llm.llm_engine.add_lora(lora_request=lora_request)
|
||||||
|
|
||||||
sampling_params = SamplingParams(temperature=0.0,
|
sampling_params = SamplingParams(temperature=0.0,
|
||||||
max_tokens=128,
|
max_tokens=256,
|
||||||
stop_token_ids=req_data.stop_token_ids)
|
stop_token_ids=req_data.stop_token_ids)
|
||||||
|
|
||||||
outputs = llm.generate(
|
outputs = llm.generate(
|
||||||
@ -694,7 +703,7 @@ def run_chat(model: str, question: str, image_urls: list[str],
|
|||||||
llm.llm_engine.add_lora(lora_request=lora_request)
|
llm.llm_engine.add_lora(lora_request=lora_request)
|
||||||
|
|
||||||
sampling_params = SamplingParams(temperature=0.0,
|
sampling_params = SamplingParams(temperature=0.0,
|
||||||
max_tokens=128,
|
max_tokens=256,
|
||||||
stop_token_ids=req_data.stop_token_ids)
|
stop_token_ids=req_data.stop_token_ids)
|
||||||
outputs = llm.chat(
|
outputs = llm.chat(
|
||||||
[{
|
[{
|
||||||
@ -729,10 +738,12 @@ def main(args: Namespace):
|
|||||||
method = args.method
|
method = args.method
|
||||||
seed = args.seed
|
seed = args.seed
|
||||||
|
|
||||||
|
image_urls = IMAGE_URLS[:args.num_images]
|
||||||
|
|
||||||
if method == "generate":
|
if method == "generate":
|
||||||
run_generate(model, QUESTION, IMAGE_URLS, seed)
|
run_generate(model, QUESTION, image_urls, seed)
|
||||||
elif method == "chat":
|
elif method == "chat":
|
||||||
run_chat(model, QUESTION, IMAGE_URLS, seed)
|
run_chat(model, QUESTION, image_urls, seed)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid method: {method}")
|
raise ValueError(f"Invalid method: {method}")
|
||||||
|
|
||||||
@ -757,6 +768,12 @@ if __name__ == "__main__":
|
|||||||
type=int,
|
type=int,
|
||||||
default=None,
|
default=None,
|
||||||
help="Set the seed when initializing `vllm.LLM`.")
|
help="Set the seed when initializing `vllm.LLM`.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--num-images",
|
||||||
|
"-n",
|
||||||
|
choices=list(range(1, 13)), # 12 is the max number of images
|
||||||
|
default=2,
|
||||||
|
help="Number of images to use for the demo.")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args)
|
main(args)
|
||||||
|
@ -477,7 +477,9 @@ class Mllama4ProcessingInfo(BaseProcessingInfo):
|
|||||||
**kwargs)
|
**kwargs)
|
||||||
|
|
||||||
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
|
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
|
||||||
return {"image": 10}
|
# Although vLLM can support more images from an infra capability
|
||||||
|
# perspective, we do not recommend using >10 images in practice.
|
||||||
|
return {"image": None}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_patch_per_chunk(vision_config: Llama4VisionConfig) -> int:
|
def get_patch_per_chunk(vision_config: Llama4VisionConfig) -> int:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user