vllm/examples/offline_inference_embedding.py
Cyrus Leung 8f10d5e393
[Misc] Split up pooling tasks (#10820)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-12-11 01:28:00 -08:00

23 lines
561 B
Python

from vllm import LLM
# Sample prompts.
prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
# Create an LLM.
model = LLM(
model="intfloat/e5-mistral-7b-instruct",
task="embed", # You should pass task="embed" for embedding models
enforce_eager=True,
)
# Generate embedding. The output is a list of PoolingRequestOutputs.
outputs = model.encode(prompts)
# Print the outputs.
for output in outputs:
print(output.outputs.embedding) # list of 4096 floats