[Bugfix] Fix phi3v batch inference when images have different aspect ratio (#7392)
This commit is contained in:
parent
baa240252e
commit
4c5d8e8ea9
@ -81,7 +81,10 @@ def run_test(
|
|||||||
|
|
||||||
inputs_per_image = [(
|
inputs_per_image = [(
|
||||||
[prompt for _ in size_factors],
|
[prompt for _ in size_factors],
|
||||||
[rescale_image_size(image, factor) for factor in size_factors],
|
[
|
||||||
|
rescale_image_size(image, factor, transpose=idx)
|
||||||
|
for idx, factor in enumerate(size_factors)
|
||||||
|
],
|
||||||
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
|
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
|
||||||
|
|
||||||
# NOTE: take care of the order. run vLLM first, and then run HF.
|
# NOTE: take care of the order. run vLLM first, and then run HF.
|
||||||
|
@ -114,5 +114,5 @@ def test_traces(trace_service):
|
|||||||
SpanAttributes.LLM_LATENCY_TIME_TO_FIRST_TOKEN) == ttft
|
SpanAttributes.LLM_LATENCY_TIME_TO_FIRST_TOKEN) == ttft
|
||||||
e2e_time = metrics.finished_time - metrics.arrival_time
|
e2e_time = metrics.finished_time - metrics.arrival_time
|
||||||
assert attributes.get(SpanAttributes.LLM_LATENCY_E2E) == e2e_time
|
assert attributes.get(SpanAttributes.LLM_LATENCY_E2E) == e2e_time
|
||||||
assert attributes.get(SpanAttributes.LLM_LATENCY_TIME_IN_SCHEDULER
|
assert attributes.get(
|
||||||
) == metrics.scheduler_time
|
SpanAttributes.LLM_LATENCY_TIME_IN_SCHEDULER) == metrics.scheduler_time
|
||||||
|
@ -189,7 +189,7 @@ class Phi3HDImageEmbedding(Phi3ImageEmbeddingBase):
|
|||||||
global_image_features_hd_newline = self.add_image_newline(
|
global_image_features_hd_newline = self.add_image_newline(
|
||||||
global_image_features_hd)
|
global_image_features_hd)
|
||||||
|
|
||||||
all_image_embeddings = []
|
batch_image_features_proj = []
|
||||||
# need a for loop to process each image because of different image sizes
|
# need a for loop to process each image because of different image sizes
|
||||||
# (patch arrangement is different for each image)
|
# (patch arrangement is different for each image)
|
||||||
for i, img_size in enumerate(image_sizes):
|
for i, img_size in enumerate(image_sizes):
|
||||||
@ -207,19 +207,17 @@ class Phi3HDImageEmbedding(Phi3ImageEmbeddingBase):
|
|||||||
sub_image_features_hd)
|
sub_image_features_hd)
|
||||||
|
|
||||||
# [sub features, separator, global features]
|
# [sub features, separator, global features]
|
||||||
all_image_embeddings.append(
|
image_embeddings = torch.cat([
|
||||||
torch.cat([
|
|
||||||
sub_image_features_hd_newline.squeeze(
|
sub_image_features_hd_newline.squeeze(
|
||||||
0), # (h_crop*12*(w_crop*12+1), 4096)
|
0), # (h_crop*12*(w_crop*12+1), 4096)
|
||||||
self.glb_GN.squeeze(0),
|
self.glb_GN.squeeze(0),
|
||||||
global_image_features_hd_newline[i],
|
global_image_features_hd_newline[i],
|
||||||
]))
|
])
|
||||||
|
img_proj = self.img_projection(
|
||||||
|
image_embeddings.to(target_device, target_dtype))
|
||||||
|
batch_image_features_proj.append(img_proj)
|
||||||
|
|
||||||
image_features_proj = self.img_projection(
|
return batch_image_features_proj
|
||||||
torch.stack(all_image_embeddings).to(target_device, target_dtype)
|
|
||||||
) # (num_images, (h_crop*12*(w_crop*12+1)+1), hidden_size)
|
|
||||||
|
|
||||||
return image_features_proj
|
|
||||||
|
|
||||||
def reshape_hd_patches_2x2merge(self, image_features, h_crop, w_crop):
|
def reshape_hd_patches_2x2merge(self, image_features, h_crop, w_crop):
|
||||||
"""
|
"""
|
||||||
|
@ -90,8 +90,13 @@ def load_image_from_base64(image: Union[bytes, str]) -> Image.Image:
|
|||||||
return _load_image_from_bytes(base64.b64decode(image))
|
return _load_image_from_bytes(base64.b64decode(image))
|
||||||
|
|
||||||
|
|
||||||
def rescale_image_size(image: Image.Image, size_factor: float) -> Image.Image:
|
def rescale_image_size(image: Image.Image,
|
||||||
|
size_factor: float,
|
||||||
|
transpose: int = -1) -> Image.Image:
|
||||||
"""Rescale the dimensions of an image by a constant factor."""
|
"""Rescale the dimensions of an image by a constant factor."""
|
||||||
new_width = int(image.width * size_factor)
|
new_width = int(image.width * size_factor)
|
||||||
new_height = int(image.height * size_factor)
|
new_height = int(image.height * size_factor)
|
||||||
return image.resize((new_width, new_height))
|
image = image.resize((new_width, new_height))
|
||||||
|
if transpose >= 0:
|
||||||
|
image = image.transpose(Image.Transpose(transpose))
|
||||||
|
return image
|
||||||
|
Loading…
x
Reference in New Issue
Block a user