| from datasets import load_dataset | |
| from sentence_transformers import SentenceTransformer | |
| from torch.nn.functional import cosine_similarity as cos_sim | |
| model_name = "jinaai/jina-embedding-b-en-v1" | |
| model = SentenceTransformer(model_name) | |
| dataset = load_dataset('jinaai/negation-dataset', split='test') | |
| anchor_embeddings = model.encode([item['anchor'] for item in dataset], convert_to_tensor=True) | |
| entailment_embeddings = model.encode([item['entailment'] for item in dataset], convert_to_tensor=True) | |
| negative_embeddings = model.encode([item['negative'] for item in dataset], convert_to_tensor=True) | |
| positive_similarities = cos_sim(anchor_embeddings, entailment_embeddings) | |
| entailment_negatives = cos_sim(negative_embeddings, entailment_embeddings) | |
| anchor_negatives = cos_sim(anchor_embeddings, negative_embeddings) | |
| entailment_score = sum(positive_similarities > entailment_negatives).item() / len(anchor_embeddings) | |
| anchor_score = sum(positive_similarities > anchor_negatives).item() / len(anchor_embeddings) | |
| print('entailment_score: ', entailment_score) | |
| print('anchor_score: ', anchor_score) | |