[VLM][Doc] Add stop_token_ids to InternVL example (#7354)

This commit is contained in:
Isotr0py 2024-08-09 22:51:04 +08:00 committed by GitHub
parent 07ab160741
commit 67abdbb42f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -124,16 +124,27 @@ def run_minicpmv(question):
# InternVL # InternVL
def run_internvl(question): def run_internvl(question):
# Generally, InternVL can use chatml template for conversation model_name = "OpenGVLab/InternVL2-2B"
TEMPLATE = "<|im_start|>User\n{prompt}<|im_end|>\n<|im_start|>Assistant\n"
prompt = f"<image>\n{question}\n"
prompt = TEMPLATE.format(prompt=prompt)
llm = LLM( llm = LLM(
model="OpenGVLab/InternVL2-4B", model=model_name,
trust_remote_code=True, trust_remote_code=True,
max_num_seqs=5, max_num_seqs=5,
) )
stop_token_ids = None
tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
messages = [{'role': 'user', 'content': f"<image>\n{question}"}]
prompt = tokenizer.apply_chat_template(messages,
tokenize=False,
add_generation_prompt=True)
# Stop tokens for InternVL
# models variants may have different stop tokens
# please refer to the model card for the correct "stop words":
# https://huggingface.co/OpenGVLab/InternVL2-2B#service
stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"]
stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
return llm, prompt, stop_token_ids return llm, prompt, stop_token_ids