Spaces:
Sleeping
Sleeping
File size: 1,046 Bytes
c891946 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 19 14:45:37 2023
@author: Hua
"""
import pandas as pd
import json
from sentence_transformers import SentenceTransformer
from sentence_transformers import util
import numpy as np
df = pd.read_csv('RAMEmbeddings.csv')
# load gpt2 embeddings
gpt2bds = df.GPT2Embeddings # get a pd.Series
gpt2list = [np.float32(np.array(json.loads(i))) for i in gpt2bds] # list of embeddings
# define the search function
def search(inputs):
# GPT2 embedding
gpt2_model = SentenceTransformer('sembeddings/model_gpt_trained')
embeddings = gpt2_model.encode(inputs)
# calculate the similarity list to a given embedding
sims = []
for i in range(len(gpt2list)):
sim = util.pytorch_cos_sim(embeddings, gpt2list[i])
sims.append(sim.item())
# find the top-5 similarity items
sims_arr = np.array(sims, dtype=object)
inds = np.argpartition(sims_arr, -5)[-5:]
# return top 5 items
return df.loc[inds].reset_index(drop=True)
|