2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
2025-03-03 01:34:51 +00:00
|
|
|
from collections.abc import Sequence
|
2024-10-16 14:31:00 +08:00
|
|
|
|
|
|
|
import torch
|
|
|
|
import torch.nn.functional as F
|
|
|
|
|
|
|
|
|
|
|
|
def check_embeddings_close(
|
|
|
|
*,
|
2025-03-03 01:34:51 +00:00
|
|
|
embeddings_0_lst: Sequence[list[float]],
|
|
|
|
embeddings_1_lst: Sequence[list[float]],
|
2024-10-16 14:31:00 +08:00
|
|
|
name_0: str,
|
|
|
|
name_1: str,
|
|
|
|
tol: float = 1e-3,
|
|
|
|
) -> None:
|
|
|
|
assert len(embeddings_0_lst) == len(embeddings_1_lst)
|
|
|
|
|
|
|
|
for prompt_idx, (embeddings_0, embeddings_1) in enumerate(
|
|
|
|
zip(embeddings_0_lst, embeddings_1_lst)):
|
2024-10-23 11:35:29 +08:00
|
|
|
assert len(embeddings_0) == len(embeddings_1), (
|
|
|
|
f"Length mismatch: {len(embeddings_0)} vs. {len(embeddings_1)}")
|
2024-10-16 14:31:00 +08:00
|
|
|
|
|
|
|
sim = F.cosine_similarity(torch.tensor(embeddings_0),
|
|
|
|
torch.tensor(embeddings_1),
|
|
|
|
dim=0)
|
|
|
|
|
|
|
|
fail_msg = (f"Test{prompt_idx}:"
|
2024-11-25 17:51:20 +08:00
|
|
|
f"\n{name_0}:\t{embeddings_0[:16]!r}"
|
|
|
|
f"\n{name_1}:\t{embeddings_1[:16]!r}")
|
2024-10-16 14:31:00 +08:00
|
|
|
|
|
|
|
assert sim >= 1 - tol, fail_msg
|