[Bugfix] Fix precommit - line too long in pixtral.py (#14960)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
89fca671fb
commit
e1eb45d397
@ -235,7 +235,7 @@ mbstrdecoder==1.1.3
|
|||||||
# typepy
|
# typepy
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
# via markdown-it-py
|
# via markdown-it-py
|
||||||
mistral-common==1.5.1
|
mistral-common==1.5.4
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
more-itertools==10.5.0
|
more-itertools==10.5.0
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
|
@ -73,7 +73,7 @@ class PixtralImagePixelInputs(TypedDict):
|
|||||||
"""
|
"""
|
||||||
A boolean mask indicating which image embeddings correspond
|
A boolean mask indicating which image embeddings correspond
|
||||||
to patch tokens.
|
to patch tokens.
|
||||||
|
|
||||||
Shape: `(batch_size, num_images, num_embeds)`
|
Shape: `(batch_size, num_images, num_embeds)`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -849,10 +849,10 @@ class VisionTransformer(nn.Module):
|
|||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
images: list of N_img images of variable sizes,
|
images: list of N_img images of variable sizes,
|
||||||
each of shape (C, H, W)
|
each of shape (C, H, W)
|
||||||
Returns:
|
Returns:
|
||||||
image_features: tensor of token features for
|
image_features: tensor of token features for
|
||||||
all tokens of all images of shape (N_toks, D)
|
all tokens of all images of shape (N_toks, D)
|
||||||
"""
|
"""
|
||||||
# pass images through initial convolution independently
|
# pass images through initial convolution independently
|
||||||
@ -935,7 +935,8 @@ class PatchMerger(nn.Module):
|
|||||||
# x is (N, vision_encoder_dim)
|
# x is (N, vision_encoder_dim)
|
||||||
x = self.permute(x, image_sizes)
|
x = self.permute(x, image_sizes)
|
||||||
|
|
||||||
# x is (N / spatial_merge_size ** 2, vision_encoder_dim * spatial_merge_size ** 2)
|
# x is (N / spatial_merge_size ** 2,
|
||||||
|
# vision_encoder_dim * spatial_merge_size ** 2)
|
||||||
x = self.merging_layer(x)
|
x = self.merging_layer(x)
|
||||||
|
|
||||||
# x is (N / spatial_merge_size ** 2, vision_encoder_dim)
|
# x is (N / spatial_merge_size ** 2, vision_encoder_dim)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user