import os import json import pandas as pd QUERY_LOG_JSONL = "logs/query_log.jsonl" QUERY_LOG_CSV = "logs/query_log.csv" FAILED_URLS_LOG = "logs/failed_urls.txt" GLOSSARY_OVERFLOW_LOG = "logs/glossary_overflow.txt" # ---------------------------- # Query Logs # ---------------------------- def log_query(query: str, answer: str, source: str = "unknown"): """ Append query + answer to JSONL and CSV logs. """ entry = {"query": query, "answer": answer, "source": source} os.makedirs(os.path.dirname(QUERY_LOG_JSONL), exist_ok=True) with open(QUERY_LOG_JSONL, "a", encoding="utf-8") as f: f.write(json.dumps(entry, ensure_ascii=False) + "\n") # Append to CSV try: df = pd.DataFrame([entry]) if os.path.exists(QUERY_LOG_CSV): df.to_csv(QUERY_LOG_CSV, mode="a", index=False, header=False) else: df.to_csv(QUERY_LOG_CSV, index=False) except Exception as e: print(f"⚠️ Failed to write CSV log: {e}") def load_queries(limit: int = 20): """ Load last N query log entries. """ if not os.path.exists(QUERY_LOG_JSONL): return [] with open(QUERY_LOG_JSONL, "r", encoding="utf-8") as f: lines = f.readlines() entries = [json.loads(line) for line in lines] return entries[-limit:] # ---------------------------- # Failed URLs # ---------------------------- def log_failed_url(url: str): """Log a failed URL fetch.""" os.makedirs(os.path.dirname(FAILED_URLS_LOG), exist_ok=True) with open(FAILED_URLS_LOG, "a", encoding="utf-8") as f: f.write(url + "\n") def load_failed_urls(): """Load failed URLs.""" if not os.path.exists(FAILED_URLS_LOG): return [] with open(FAILED_URLS_LOG, "r", encoding="utf-8") as f: return [line.strip() for line in f.readlines()] # ---------------------------- # Glossary Overflow # ---------------------------- def log_glossary_overflow(term: str, definition: str): """Save glossary entries that couldn’t be parsed properly.""" os.makedirs(os.path.dirname(GLOSSARY_OVERFLOW_LOG), exist_ok=True) with open(GLOSSARY_OVERFLOW_LOG, "a", encoding="utf-8") as f: f.write(f"{term}: {definition}\n") def load_glossary_overflow(): """Load glossary overflow terms.""" if not os.path.exists(GLOSSARY_OVERFLOW_LOG): return [] with open(GLOSSARY_OVERFLOW_LOG, "r", encoding="utf-8") as f: return [line.strip() for line in f.readlines()]