Spaces:
Running
Running
File size: 2,498 Bytes
3304684 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import os
import json
import pandas as pd
QUERY_LOG_JSONL = "logs/query_log.jsonl"
QUERY_LOG_CSV = "logs/query_log.csv"
FAILED_URLS_LOG = "logs/failed_urls.txt"
GLOSSARY_OVERFLOW_LOG = "logs/glossary_overflow.txt"
# ----------------------------
# Query Logs
# ----------------------------
def log_query(query: str, answer: str, source: str = "unknown"):
"""
Append query + answer to JSONL and CSV logs.
"""
entry = {"query": query, "answer": answer, "source": source}
os.makedirs(os.path.dirname(QUERY_LOG_JSONL), exist_ok=True)
with open(QUERY_LOG_JSONL, "a", encoding="utf-8") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
# Append to CSV
try:
df = pd.DataFrame([entry])
if os.path.exists(QUERY_LOG_CSV):
df.to_csv(QUERY_LOG_CSV, mode="a", index=False, header=False)
else:
df.to_csv(QUERY_LOG_CSV, index=False)
except Exception as e:
print(f"⚠️ Failed to write CSV log: {e}")
def load_queries(limit: int = 20):
"""
Load last N query log entries.
"""
if not os.path.exists(QUERY_LOG_JSONL):
return []
with open(QUERY_LOG_JSONL, "r", encoding="utf-8") as f:
lines = f.readlines()
entries = [json.loads(line) for line in lines]
return entries[-limit:]
# ----------------------------
# Failed URLs
# ----------------------------
def log_failed_url(url: str):
"""Log a failed URL fetch."""
os.makedirs(os.path.dirname(FAILED_URLS_LOG), exist_ok=True)
with open(FAILED_URLS_LOG, "a", encoding="utf-8") as f:
f.write(url + "\n")
def load_failed_urls():
"""Load failed URLs."""
if not os.path.exists(FAILED_URLS_LOG):
return []
with open(FAILED_URLS_LOG, "r", encoding="utf-8") as f:
return [line.strip() for line in f.readlines()]
# ----------------------------
# Glossary Overflow
# ----------------------------
def log_glossary_overflow(term: str, definition: str):
"""Save glossary entries that couldn’t be parsed properly."""
os.makedirs(os.path.dirname(GLOSSARY_OVERFLOW_LOG), exist_ok=True)
with open(GLOSSARY_OVERFLOW_LOG, "a", encoding="utf-8") as f:
f.write(f"{term}: {definition}\n")
def load_glossary_overflow():
"""Load glossary overflow terms."""
if not os.path.exists(GLOSSARY_OVERFLOW_LOG):
return []
with open(GLOSSARY_OVERFLOW_LOG, "r", encoding="utf-8") as f:
return [line.strip() for line in f.readlines()]
|