| import pandas as pd | |
| import streamlit as st | |
| import torch | |
| from sentence_transformers import SentenceTransformer | |
| def load_model(model_name): | |
| # Lazy downloading | |
| model = SentenceTransformer(model_name) | |
| return model | |
| def load_embeddings(): | |
| # embedding pre-generated | |
| corpus_emb = torch.load( | |
| "./embeddings/descriptions_emb_200000_examples_github.pt", | |
| map_location=torch.device("cpu"), | |
| ) | |
| return corpus_emb | |
| def load_texts(): | |
| # texts database pre-generated | |
| corpus_texts = pd.read_csv("./data/codesearchnet_200000_python_examples_github.csv") | |
| return corpus_texts | |