Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,6 +49,8 @@ def download_nltk_resources():
|
|
| 49 |
|
| 50 |
download_nltk_resources()
|
| 51 |
|
|
|
|
|
|
|
| 52 |
FILES_DIR = './files'
|
| 53 |
|
| 54 |
# Model Management
|
|
@@ -159,9 +161,9 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
|
|
| 159 |
|
| 160 |
def optimize_query(query, llm_model):
|
| 161 |
llm = HuggingFacePipeline.from_model_id(
|
| 162 |
-
model_id=
|
| 163 |
task="text2text-generation",
|
| 164 |
-
model_kwargs={"temperature": 0, "
|
| 165 |
)
|
| 166 |
multi_query_retriever = MultiQueryRetriever.from_llm(
|
| 167 |
retriever=get_retriever(vector_store, search_type, search_kwargs),
|
|
@@ -391,6 +393,8 @@ def visualize_results(results_df, stats_df):
|
|
| 391 |
|
| 392 |
sns.barplot(x='model', y='search_time', data=stats_df, ax=axs[0, 0])
|
| 393 |
axs[0, 0].set_title('Search Time by Model')
|
|
|
|
|
|
|
| 394 |
axs[0, 0].set_xticklabels(axs[0, 0].get_xticklabels(), rotation=45, ha='right')
|
| 395 |
|
| 396 |
sns.scatterplot(x='result_diversity', y='rank_correlation', hue='model', data=stats_df, ax=axs[0, 1])
|
|
@@ -398,6 +402,7 @@ def visualize_results(results_df, stats_df):
|
|
| 398 |
|
| 399 |
sns.boxplot(x='model', y='avg_content_length', data=stats_df, ax=axs[1, 0])
|
| 400 |
axs[1, 0].set_title('Distribution of Result Content Lengths')
|
|
|
|
| 401 |
axs[1, 0].set_xticklabels(axs[1, 0].get_xticklabels(), rotation=45, ha='right')
|
| 402 |
|
| 403 |
embeddings = np.array([embedding for embedding in results_df['embedding'] if isinstance(embedding, np.ndarray)])
|
|
@@ -514,6 +519,8 @@ def compare_embeddings(file, query, embedding_models, custom_embedding_model, sp
|
|
| 514 |
|
| 515 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, top_k, expected_result)
|
| 516 |
stats["model"] = f"{model_type} - {model_name}"
|
|
|
|
|
|
|
| 517 |
stats.update(settings)
|
| 518 |
|
| 519 |
formatted_results = format_results(results_raw, stats)
|
|
@@ -605,6 +612,8 @@ def automated_testing(file, query, test_params, expected_result=None):
|
|
| 605 |
|
| 606 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
| 607 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
|
|
|
|
|
|
| 608 |
stats.update(params)
|
| 609 |
|
| 610 |
all_results.extend(format_results(results_raw, stats))
|
|
@@ -705,7 +714,7 @@ Provide your suggestions in a Python dictionary format."""
|
|
| 705 |
llm = HuggingFacePipeline.from_model_id(
|
| 706 |
model_id="google/flan-t5-large",
|
| 707 |
task="text2text-generation",
|
| 708 |
-
model_kwargs={"temperature": 0.7, "
|
| 709 |
)
|
| 710 |
|
| 711 |
# Generate suggestions
|
|
|
|
| 49 |
|
| 50 |
download_nltk_resources()
|
| 51 |
|
| 52 |
+
nltk.download('punkt')
|
| 53 |
+
|
| 54 |
FILES_DIR = './files'
|
| 55 |
|
| 56 |
# Model Management
|
|
|
|
| 161 |
|
| 162 |
def optimize_query(query, llm_model):
|
| 163 |
llm = HuggingFacePipeline.from_model_id(
|
| 164 |
+
model_id="google/flan-t5-large",
|
| 165 |
task="text2text-generation",
|
| 166 |
+
model_kwargs={"do_sample": True, "temperature": 0.7, "max_new_tokens": 512},
|
| 167 |
)
|
| 168 |
multi_query_retriever = MultiQueryRetriever.from_llm(
|
| 169 |
retriever=get_retriever(vector_store, search_type, search_kwargs),
|
|
|
|
| 393 |
|
| 394 |
sns.barplot(x='model', y='search_time', data=stats_df, ax=axs[0, 0])
|
| 395 |
axs[0, 0].set_title('Search Time by Model')
|
| 396 |
+
axs[0, 0].set_xticks(range(len(axs[0, 0].get_xticklabels())))
|
| 397 |
+
|
| 398 |
axs[0, 0].set_xticklabels(axs[0, 0].get_xticklabels(), rotation=45, ha='right')
|
| 399 |
|
| 400 |
sns.scatterplot(x='result_diversity', y='rank_correlation', hue='model', data=stats_df, ax=axs[0, 1])
|
|
|
|
| 402 |
|
| 403 |
sns.boxplot(x='model', y='avg_content_length', data=stats_df, ax=axs[1, 0])
|
| 404 |
axs[1, 0].set_title('Distribution of Result Content Lengths')
|
| 405 |
+
axs[1, 0].set_xticks(range(len(axs[0, 0].get_xticklabels())))
|
| 406 |
axs[1, 0].set_xticklabels(axs[1, 0].get_xticklabels(), rotation=45, ha='right')
|
| 407 |
|
| 408 |
embeddings = np.array([embedding for embedding in results_df['embedding'] if isinstance(embedding, np.ndarray)])
|
|
|
|
| 519 |
|
| 520 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, top_k, expected_result)
|
| 521 |
stats["model"] = f"{model_type} - {model_name}"
|
| 522 |
+
stats["model_type"] = model_type
|
| 523 |
+
stats["model_name"] = model_name
|
| 524 |
stats.update(settings)
|
| 525 |
|
| 526 |
formatted_results = format_results(results_raw, stats)
|
|
|
|
| 612 |
|
| 613 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
| 614 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
| 615 |
+
stats["model_type"] = model_type
|
| 616 |
+
stats["model_name"] = model_name
|
| 617 |
stats.update(params)
|
| 618 |
|
| 619 |
all_results.extend(format_results(results_raw, stats))
|
|
|
|
| 714 |
llm = HuggingFacePipeline.from_model_id(
|
| 715 |
model_id="google/flan-t5-large",
|
| 716 |
task="text2text-generation",
|
| 717 |
+
model_kwargs={"do_sample": True, "temperature": 0.7, "max_new_tokens": 512},
|
| 718 |
)
|
| 719 |
|
| 720 |
# Generate suggestions
|