Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -161,11 +161,17 @@ def phonetic_match(text, query, method='levenshtein_distance', apply_phonetic=Tr
|
|
| 161 |
|
| 162 |
#def optimize_query(query, llm_model):
|
| 163 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
# Create a temporary vector store for query optimization
|
| 171 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
@@ -443,11 +449,17 @@ def optimize_vocabulary(texts, vocab_size=10000, min_frequency=2):
|
|
| 443 |
|
| 444 |
# New preprocessing function
|
| 445 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
|
| 452 |
# Create a temporary vector store for query optimization
|
| 453 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
@@ -633,8 +645,8 @@ def automated_testing(file, query, test_params, expected_result=None):
|
|
| 633 |
|
| 634 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
| 635 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
| 636 |
-
stats["model_type"] = model_type
|
| 637 |
-
stats["model_name"] = model_name
|
| 638 |
stats.update(params)
|
| 639 |
|
| 640 |
all_results.extend(format_results(results_raw, stats))
|
|
@@ -732,15 +744,16 @@ Text chunks:
|
|
| 732 |
Provide your suggestions in a Python dictionary format."""
|
| 733 |
|
| 734 |
# Use a HuggingFace model for text generation
|
| 735 |
-
model_id = "google/flan-t5-large"
|
| 736 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 737 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
| 738 |
-
pipe = pipeline(
|
| 739 |
-
|
| 740 |
-
)
|
| 741 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
| 742 |
-
|
| 743 |
|
|
|
|
|
|
|
| 744 |
|
| 745 |
#llm = HuggingFacePipeline.from_model_id(
|
| 746 |
# model_id="google/flan-t5-large",
|
|
|
|
| 161 |
|
| 162 |
#def optimize_query(query, llm_model):
|
| 163 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
| 164 |
+
# Use a HuggingFace model for text generation
|
| 165 |
+
#model_id = "google/flan-t5-large"
|
| 166 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 167 |
+
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
| 168 |
+
#pipe = pipeline(
|
| 169 |
+
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
| 170 |
+
#)
|
| 171 |
+
#llm = HuggingFacePipeline(pipeline=pipe)
|
| 172 |
+
|
| 173 |
+
llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
| 174 |
+
|
| 175 |
|
| 176 |
# Create a temporary vector store for query optimization
|
| 177 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
|
|
| 449 |
|
| 450 |
# New preprocessing function
|
| 451 |
def optimize_query(query, llm_model, chunks, embedding_model, vector_store_type, search_type, top_k):
|
| 452 |
+
# Use a HuggingFace model for text generation
|
| 453 |
+
#model_id = "google/flan-t5-large"
|
| 454 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 455 |
+
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
| 456 |
+
#pipe = pipeline(
|
| 457 |
+
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
| 458 |
+
#)
|
| 459 |
+
#llm = HuggingFacePipeline(pipeline=pipe)
|
| 460 |
+
|
| 461 |
+
llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
| 462 |
+
|
| 463 |
|
| 464 |
# Create a temporary vector store for query optimization
|
| 465 |
temp_vector_store = get_vector_store(vector_store_type, chunks, embedding_model)
|
|
|
|
| 645 |
|
| 646 |
stats = calculate_statistics(results_raw, search_time, vector_store, num_tokens, embedding_model, query, params['top_k'], expected_result)
|
| 647 |
stats["model"] = f"{params['model_type']} - {params['model_name']}"
|
| 648 |
+
stats["model_type"] = params['model_type']
|
| 649 |
+
stats["model_name"] = params['model_name']
|
| 650 |
stats.update(params)
|
| 651 |
|
| 652 |
all_results.extend(format_results(results_raw, stats))
|
|
|
|
| 744 |
Provide your suggestions in a Python dictionary format."""
|
| 745 |
|
| 746 |
# Use a HuggingFace model for text generation
|
| 747 |
+
#model_id = "google/flan-t5-large"
|
| 748 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 749 |
+
#model = AutoModelForCausalLM.from_pretrained(model_id)
|
| 750 |
+
#pipe = pipeline(
|
| 751 |
+
# "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512
|
| 752 |
+
#)
|
| 753 |
+
#llm = HuggingFacePipeline(pipeline=pipe)
|
|
|
|
| 754 |
|
| 755 |
+
llm = HuggingFacePipeline(pipeline(model="HuggingFaceH4/zephyr-7b-beta"))
|
| 756 |
+
|
| 757 |
|
| 758 |
#llm = HuggingFacePipeline.from_model_id(
|
| 759 |
# model_id="google/flan-t5-large",
|