[Doc] Update vlm.rst to include an example on videos (#9155)
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
parent
9a94ca4a5d
commit
1874c6a1b0
@ -135,6 +135,33 @@ Instead of passing in a single image, you can pass in a list of images.
|
|||||||
|
|
||||||
A code example can be found in `examples/offline_inference_vision_language_multi_image.py <https://github.com/vllm-project/vllm/blob/main/examples/offline_inference_vision_language_multi_image.py>`_.
|
A code example can be found in `examples/offline_inference_vision_language_multi_image.py <https://github.com/vllm-project/vllm/blob/main/examples/offline_inference_vision_language_multi_image.py>`_.
|
||||||
|
|
||||||
|
Multi-image input can be extended to perform video captioning. We show this with `Qwen2-VL <https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct>`_ as it supports videos:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
# Specify the maximum number of frames per video to be 4. This can be changed.
|
||||||
|
llm = LLM("Qwen/Qwen2-VL-2B-Instruct", limit_mm_per_prompt={"image": 4})
|
||||||
|
|
||||||
|
# Create the request payload.
|
||||||
|
video_frames = ... # load your video making sure it only has the number of frames specified earlier.
|
||||||
|
message = {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "Describe this set of frames. Consider the frames to be a part of the same video."},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
for i in range(len(video_frames)):
|
||||||
|
base64_image = encode_image(video_frames[i]) # base64 encoding.
|
||||||
|
new_image = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
|
||||||
|
message["content"].append(new_image)
|
||||||
|
|
||||||
|
# Perform inference and log output.
|
||||||
|
outputs = llm.chat([message])
|
||||||
|
|
||||||
|
for o in outputs:
|
||||||
|
generated_text = o.outputs[0].text
|
||||||
|
print(generated_text)
|
||||||
|
|
||||||
Online Inference
|
Online Inference
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user