output either in human-friendly or RAG-friendly format
Browse files
app.py
CHANGED
|
@@ -16,7 +16,9 @@ meilisearch_client = meilisearch.Client("https://edge.meilisearch.com", os.envir
|
|
| 16 |
meilisearch_index_name = "docs-embed"
|
| 17 |
meilisearch_index = meilisearch_client.index(meilisearch_index_name)
|
| 18 |
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
start_time_embedding = time.time()
|
| 21 |
query_prefix = 'Represent this sentence for searching code documentation: '
|
| 22 |
query_tokens = tokenizer(query_prefix + query_text, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
|
@@ -39,18 +41,23 @@ def search_embeddings(query_text):
|
|
| 39 |
hits = response["hits"]
|
| 40 |
|
| 41 |
# step3: present the results in markdown
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
demo = gr.Interface(
|
| 52 |
fn=search_embeddings,
|
| 53 |
-
inputs=gr.Textbox(label="enter your query", placeholder="Type Markdown here...", lines=10),
|
| 54 |
outputs=gr.Markdown(),
|
| 55 |
title="HF Docs Emebddings Explorer",
|
| 56 |
allow_flagging="never"
|
|
|
|
| 16 |
meilisearch_index_name = "docs-embed"
|
| 17 |
meilisearch_index = meilisearch_client.index(meilisearch_index_name)
|
| 18 |
|
| 19 |
+
output_options = ["RAG-friendly", "human-friendly"]
|
| 20 |
+
|
| 21 |
+
def search_embeddings(query_text, output_option):
|
| 22 |
start_time_embedding = time.time()
|
| 23 |
query_prefix = 'Represent this sentence for searching code documentation: '
|
| 24 |
query_tokens = tokenizer(query_prefix + query_text, padding=True, truncation=True, return_tensors='pt', max_length=512)
|
|
|
|
| 41 |
hits = response["hits"]
|
| 42 |
|
| 43 |
# step3: present the results in markdown
|
| 44 |
+
if output_option == "human-friendly":
|
| 45 |
+
md = f"Stats:\n\nembedding time: {elapsed_time_embedding:.2f}s\n\nmeilisearch time: {elapsed_time_meilisearch:.2f}s\n\n---\n\n"
|
| 46 |
+
for hit in hits:
|
| 47 |
+
text, source, library = hit["text"], hit["source"], hit["library"]
|
| 48 |
+
source = f"[source](https://huggingface.co/docs/{library}/{source})"
|
| 49 |
+
md += text + f"\n\n{source}\n\n---\n\n"
|
| 50 |
+
return md
|
| 51 |
+
elif output_option == "RAG-friendly":
|
| 52 |
+
hit_texts = [hit["text"] for hit in hits]
|
| 53 |
+
hit_text_str = "\n\n".join(hit_texts)
|
| 54 |
+
md = f'Here are some relevant docs regarding to the user query "{query_text}":\n\n' + hit_text_str
|
| 55 |
+
return md
|
| 56 |
|
| 57 |
|
| 58 |
demo = gr.Interface(
|
| 59 |
fn=search_embeddings,
|
| 60 |
+
inputs=[gr.Textbox(label="enter your query", placeholder="Type Markdown here...", lines=10), gr.Radio(label="Select an output option", choices=output_options, value="RAG-friendly")],
|
| 61 |
outputs=gr.Markdown(),
|
| 62 |
title="HF Docs Emebddings Explorer",
|
| 63 |
allow_flagging="never"
|