Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -155,18 +155,8 @@ def calculate_statistics(results, search_time):
|
|
| 155 |
"search_time": search_time
|
| 156 |
}
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
{
|
| 161 |
-
"Content": doc.page_content,
|
| 162 |
-
"Source": doc.metadata.get("source", "Unknown"),
|
| 163 |
-
"Relevance Score": doc.metadata.get("score", "N/A")
|
| 164 |
-
} for doc in results
|
| 165 |
-
])
|
| 166 |
-
|
| 167 |
-
formatted_stats = pd.DataFrame([stats])
|
| 168 |
-
|
| 169 |
-
return gr.DataFrame(df), gr.DataFrame(formatted_stats)
|
| 170 |
|
| 171 |
def compare_embeddings(file, query, model_types, model_names, split_strategy, chunk_size, overlap_size, custom_separators, vector_store_type, search_type, top_k):
|
| 172 |
all_results = []
|
|
@@ -195,11 +185,24 @@ def compare_embeddings(file, query, model_types, model_names, split_strategy, ch
|
|
| 195 |
stats = calculate_statistics(results, search_time)
|
| 196 |
stats["model"] = f"{model_type} - {model_name}"
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
|
|
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
# Gradio interface
|
| 205 |
iface = gr.Interface(
|
|
@@ -217,9 +220,10 @@ iface = gr.Interface(
|
|
| 217 |
gr.Radio(choices=["similarity", "mmr"], label="Search Type", value="similarity"),
|
| 218 |
gr.Slider(1, 10, step=1, value=5, label="Top K")
|
| 219 |
],
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
| 223 |
title="Embedding Comparison Tool",
|
| 224 |
description="Compare different embedding models and retrieval strategies"
|
| 225 |
)
|
|
|
|
| 155 |
"search_time": search_time
|
| 156 |
}
|
| 157 |
|
| 158 |
+
import gradio as gr
|
| 159 |
+
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
def compare_embeddings(file, query, model_types, model_names, split_strategy, chunk_size, overlap_size, custom_separators, vector_store_type, search_type, top_k):
|
| 162 |
all_results = []
|
|
|
|
| 185 |
stats = calculate_statistics(results, search_time)
|
| 186 |
stats["model"] = f"{model_type} - {model_name}"
|
| 187 |
|
| 188 |
+
formatted_results, formatted_stats = format_results(results, stats)
|
| 189 |
+
all_results.append(formatted_results)
|
| 190 |
+
all_stats.append(formatted_stats)
|
| 191 |
|
| 192 |
+
return all_results + all_stats
|
| 193 |
+
|
| 194 |
+
def format_results(results, stats):
|
| 195 |
+
df = pd.DataFrame([
|
| 196 |
+
{
|
| 197 |
+
"Content": doc.page_content,
|
| 198 |
+
"Source": doc.metadata.get("source", "Unknown"),
|
| 199 |
+
"Relevance Score": doc.metadata.get("score", "N/A")
|
| 200 |
+
} for doc in results
|
| 201 |
+
])
|
| 202 |
+
|
| 203 |
+
formatted_stats = pd.DataFrame([stats])
|
| 204 |
+
|
| 205 |
+
return df, formatted_stats
|
| 206 |
|
| 207 |
# Gradio interface
|
| 208 |
iface = gr.Interface(
|
|
|
|
| 220 |
gr.Radio(choices=["similarity", "mmr"], label="Search Type", value="similarity"),
|
| 221 |
gr.Slider(1, 10, step=1, value=5, label="Top K")
|
| 222 |
],
|
| 223 |
+
outputs=[
|
| 224 |
+
gr.Dataframe(label="Results"),
|
| 225 |
+
gr.Dataframe(label="Statistics")
|
| 226 |
+
],
|
| 227 |
title="Embedding Comparison Tool",
|
| 228 |
description="Compare different embedding models and retrieval strategies"
|
| 229 |
)
|