Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import duckdb | |
| from annoy import AnnoyIndex | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer("sentence-transformers/LaBSE") | |
| annoy_index = AnnoyIndex(768, "angular") | |
| annoy_index.load("definitions.ann") | |
| conn = duckdb.connect("sonajaht.db") | |
| def search_query(query, top_k=10): | |
| query_vector = model.encode(query) | |
| similar_item_ids, distances = annoy_index.get_nns_by_vector( | |
| query_vector, top_k, include_distances=True | |
| ) | |
| id_list = ", ".join(map(str, similar_item_ids)) | |
| sql_query = f""" | |
| SELECT w.value AS sõna, d.value AS definitsioon | |
| FROM definitions d | |
| JOIN words w ON d.word_id = w.word_id | |
| WHERE d.entry_id IN ({id_list}) | |
| ORDER BY CASE d.entry_id | |
| {' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])} | |
| END | |
| """ | |
| results = conn.execute(sql_query).fetchdf() | |
| results["#"] = list(range(1, len(results) + 1)) | |
| new_order = ["#", "sõna", "definitsioon"] | |
| results = results[new_order] | |
| # results["relevance_score"] = [1 - d for d in distances] | |
| return results | |
| examples = [ | |
| "väga vana mees", | |
| "очень старый дед", | |
| "un très vieil homme", | |
| "a clear material that you can see through used to make windows", | |
| "to have a rule that you need a specific object or thing in some situation", | |
| "something that makes you happy or makes you laugh", | |
| "when an event happens or takes place", | |
| "часть стерео системы, из которой исходит музыка", | |
| "кто-то, кто использует что-то", | |
| ] | |
| def handle_example(example): | |
| return example, search_query(example) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Sõnajaht Demo") | |
| query_input = gr.Textbox(label="Sisestage teie otsingupäring") | |
| search_button = gr.Button("Otsi") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=query_input, | |
| label="Otsi päringunäiteid" | |
| ) | |
| results_output = gr.Dataframe(label="Otsingutulemused") | |
| search_button.click(search_query, inputs=query_input, outputs=results_output) | |
| if __name__ == "__main__": | |
| demo.launch() | |