Commit
·
ff4ec71
1
Parent(s):
a6e7feb
add backend code
Browse files- backend/__init__.py +0 -0
- backend/__pycache__/__init__.cpython-36.pyc +0 -0
- backend/__pycache__/__init__.cpython-38.pyc +0 -0
- backend/__pycache__/config.cpython-36.pyc +0 -0
- backend/__pycache__/config.cpython-38.pyc +0 -0
- backend/__pycache__/inference.cpython-36.pyc +0 -0
- backend/__pycache__/inference.cpython-38.pyc +0 -0
- backend/__pycache__/utils.cpython-36.pyc +0 -0
- backend/__pycache__/utils.cpython-38.pyc +0 -0
- backend/inference.py +25 -0
- backend/utils.py +28 -0
backend/__init__.py
ADDED
|
File without changes
|
backend/__pycache__/__init__.cpython-36.pyc
ADDED
|
Binary file (159 Bytes). View file
|
|
|
backend/__pycache__/__init__.cpython-38.pyc
ADDED
|
Binary file (181 Bytes). View file
|
|
|
backend/__pycache__/config.cpython-36.pyc
ADDED
|
Binary file (737 Bytes). View file
|
|
|
backend/__pycache__/config.cpython-38.pyc
ADDED
|
Binary file (767 Bytes). View file
|
|
|
backend/__pycache__/inference.cpython-36.pyc
ADDED
|
Binary file (2.2 kB). View file
|
|
|
backend/__pycache__/inference.cpython-38.pyc
ADDED
|
Binary file (887 Bytes). View file
|
|
|
backend/__pycache__/utils.cpython-36.pyc
ADDED
|
Binary file (1.54 kB). View file
|
|
|
backend/__pycache__/utils.cpython-38.pyc
ADDED
|
Binary file (1.91 kB). View file
|
|
|
backend/inference.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from backend.utils import load_model, load_embeddings, load_texts
|
| 3 |
+
|
| 4 |
+
# Search
|
| 5 |
+
def query_search(query: str, n_answers: int, model_name: str):
|
| 6 |
+
model = load_model(model_name)
|
| 7 |
+
|
| 8 |
+
# Creating embeddings
|
| 9 |
+
# query_emb = model.encode(query, convert_to_tensor=True)[None, :]
|
| 10 |
+
query_emb = model.encode(query, convert_to_tensor=True)
|
| 11 |
+
|
| 12 |
+
print("loading embedding")
|
| 13 |
+
corpus_emb = load_embeddings()
|
| 14 |
+
corpus_texts = load_texts()
|
| 15 |
+
|
| 16 |
+
# Getting hits
|
| 17 |
+
hits = torch.nn.functional.cosine_similarity(
|
| 18 |
+
query_emb[None, :], corpus_emb, dim=1, eps=1e-8
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
corpus_texts["Similarity"] = hits.tolist()
|
| 22 |
+
|
| 23 |
+
return corpus_texts.sort_values(by="Similarity", ascending=False).head(n_answers)[
|
| 24 |
+
["Description", "Code"]
|
| 25 |
+
]
|
backend/utils.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@st.cache(allow_output_mutation=True)
|
| 8 |
+
def load_model(model_name):
|
| 9 |
+
# Lazy downloading
|
| 10 |
+
model = SentenceTransformer(model_name)
|
| 11 |
+
return model
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@st.cache(allow_output_mutation=True)
|
| 15 |
+
def load_embeddings():
|
| 16 |
+
# embedding pre-generated
|
| 17 |
+
corpus_emb = torch.load(
|
| 18 |
+
"./embeddings/descriptions_emb_100000_examples.pt",
|
| 19 |
+
map_location=torch.device("cpu"),
|
| 20 |
+
)
|
| 21 |
+
return corpus_emb
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@st.cache(allow_output_mutation=True)
|
| 25 |
+
def load_texts():
|
| 26 |
+
# texts database pre-generated
|
| 27 |
+
corpus_texts = pd.read_csv("./data/codesearchnet_100000_examples.csv")
|
| 28 |
+
return corpus_texts
|