Spaces:
Sleeping
Sleeping
| import argparse | |
| import logging | |
| from document_handler import load_documents_from_disk, load_documents_from_sitemap, save_documents_to_disk | |
| from vectorstore_handler import load_or_create_vectorstore, get_embeddings | |
| from query_executor import QuestionAnsweringAssistant | |
| import re | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ | |
| logging.FileHandler("query_executor.log"), | |
| logging.StreamHandler() | |
| ]) | |
| logger = logging.getLogger(__name__) | |
| def main(messages,query): | |
| # Path to save the documents | |
| sitemap_url = "https://www.originws.it/page-sitemap.xml" | |
| sitemap_str = re.sub(r'[^a-zA-Z0-9]', '_', sitemap_url) | |
| docs_file_path = sitemap_str+'.pkl' | |
| qaa = QuestionAnsweringAssistant(logger) | |
| # Try to load documents from disk | |
| docs = load_documents_from_disk(docs_file_path) | |
| if docs is None: | |
| logging.info("Documents not found on disk, loading from sitemap...") | |
| # Load documents using SitemapLoader | |
| docs = load_documents_from_sitemap(sitemap_url) | |
| save_documents_to_disk(docs, docs_file_path) | |
| logging.info("Documents saved to disk.") | |
| else: | |
| logging.info("Documents loaded from disk.") | |
| # Get embeddings and load/create the vectorstore | |
| embeddings = get_embeddings() | |
| vectorstore = load_or_create_vectorstore(docs, embeddings, sitemap_str) | |
| # Now that the vectorstore is ready, let's query it | |
| question = query | |
| logging.info(f"Executing query: {question}") | |
| condensed = qaa.condense_query(messages,question) | |
| response = qaa.execute_query(condensed, vectorstore) | |
| # Log the response | |
| logging.info(f"Query response: {response}") | |
| return response | |