Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
-
# JusticeAI Backend —
|
| 2 |
-
# -
|
| 3 |
-
#
|
| 4 |
-
#
|
| 5 |
-
#
|
| 6 |
-
#
|
| 7 |
-
# If language.bin requires insecure torch.load, set LANGUAGE_LOAD_ALLOW_INSECURE=1 (only if you trust the file).
|
| 8 |
|
| 9 |
from sqlalchemy.pool import NullPool
|
| 10 |
import os
|
|
@@ -119,7 +118,7 @@ def load_local_language_module():
|
|
| 119 |
_language_load_errors.append(f"safetensors load failed: {e}")
|
| 120 |
logger.debug(f"safetensors load failed: {e}")
|
| 121 |
|
| 122 |
-
# 2b) torch.load (
|
| 123 |
try:
|
| 124 |
language_module = torch.load(str(p), map_location="cpu")
|
| 125 |
logger.info("[JusticeAI] torch.load(language.bin) succeeded")
|
|
@@ -246,7 +245,6 @@ def ensure_tables():
|
|
| 246 |
|
| 247 |
ensure_tables()
|
| 248 |
|
| 249 |
-
# ensure columns exist for older DBs
|
| 250 |
def ensure_column_exists(table: str, column: str, col_def_sql: str):
|
| 251 |
dialect = engine.dialect.name
|
| 252 |
try:
|
|
@@ -566,14 +564,181 @@ def translate_from_english(text: str, tgt_lang: str) -> str:
|
|
| 566 |
return text
|
| 567 |
return translate_text(text, "en", tgt_code)
|
| 568 |
|
| 569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
def invalidate_knowledge_cache():
|
| 571 |
global knowledge_embeddings_cache, knowledge_version
|
| 572 |
knowledge_embeddings_cache = None
|
| 573 |
knowledge_version += 1
|
| 574 |
logger.debug("[index] invalidated knowledge cache")
|
| 575 |
|
| 576 |
-
# ----- Add endpoints
|
| 577 |
@app.post("/add")
|
| 578 |
async def add_knowledge(data: dict = Body(...)):
|
| 579 |
if not isinstance(data, dict):
|
|
@@ -660,7 +825,7 @@ async def add_bulk(data: List[dict] = Body(...)):
|
|
| 660 |
record_learn_event()
|
| 661 |
return {"added": added, "errors": errors}
|
| 662 |
|
| 663 |
-
# ----- Chat endpoint
|
| 664 |
@app.post("/chat")
|
| 665 |
async def chat(request: Request, data: dict = Body(...)):
|
| 666 |
t0 = time.time()
|
|
@@ -680,7 +845,7 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 680 |
except Exception:
|
| 681 |
msg_corrected = raw_msg
|
| 682 |
|
| 683 |
-
# translate to English for synthesis
|
| 684 |
en_msg = msg_corrected
|
| 685 |
if detected_lang and detected_lang.split("-")[0].lower() not in ("en", "eng", "", "und"):
|
| 686 |
try:
|
|
@@ -752,7 +917,6 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 752 |
matches = cand
|
| 753 |
confidence = matches[0]["score"] if matches else 0.0
|
| 754 |
else:
|
| 755 |
-
# substring fallback
|
| 756 |
for kr in knowledge_rows:
|
| 757 |
if en_msg.lower() in (kr["text"] or "").lower():
|
| 758 |
matches.append({"text": kr["text"], "reply": kr["reply"], "lang": kr["lang"], "score": 0.0})
|
|
@@ -761,7 +925,7 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 761 |
logger.warning(f"[retrieval] topic-scoped retrieval failure: {e}")
|
| 762 |
matches = []
|
| 763 |
|
| 764 |
-
#
|
| 765 |
used_global_fallback = False
|
| 766 |
if not matches or confidence < 0.35:
|
| 767 |
try:
|
|
@@ -794,7 +958,6 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 794 |
confidence = matches[0]["score"]
|
| 795 |
used_global_fallback = True
|
| 796 |
else:
|
| 797 |
-
# simple global substring fallback
|
| 798 |
cand = []
|
| 799 |
for r in all_rows:
|
| 800 |
if en_msg.lower() in (r["text"] or "").lower():
|
|
@@ -809,11 +972,10 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 809 |
if used_global_fallback:
|
| 810 |
logger.info("[retrieval] used global fallback search across topics to find candidates")
|
| 811 |
|
| 812 |
-
# Compose final reply
|
| 813 |
reply_user_lang = ""
|
| 814 |
try:
|
| 815 |
if matches:
|
| 816 |
-
# find best candidate matching user's language
|
| 817 |
chosen = None
|
| 818 |
for m in matches:
|
| 819 |
m_lang = (m.get("lang") or "und").split("-")[0].lower()
|
|
@@ -824,11 +986,9 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 824 |
chosen = matches[0]
|
| 825 |
chosen_text = chosen.get("reply") or chosen.get("text") or ""
|
| 826 |
chosen_lang = chosen.get("lang") or "und"
|
| 827 |
-
# if candidate already in user language, use it
|
| 828 |
if chosen_lang and reply_lang and chosen_lang.split("-")[0].lower() == reply_lang.split("-")[0].lower():
|
| 829 |
reply_user_lang = chosen_text
|
| 830 |
else:
|
| 831 |
-
# translate chosen_text -> user's language
|
| 832 |
try:
|
| 833 |
reply_user_lang = translate_text(chosen_text, chosen_lang or "und", reply_lang)
|
| 834 |
except Exception:
|
|
@@ -837,9 +997,8 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 837 |
logger.warning(f"[compose] candidate selection error: {e}")
|
| 838 |
reply_user_lang = ""
|
| 839 |
|
| 840 |
-
# If
|
| 841 |
if not reply_user_lang:
|
| 842 |
-
# Build scratchpad in English using matches converted to EN
|
| 843 |
facts_en = []
|
| 844 |
for m in matches[:6]:
|
| 845 |
t = m.get("reply") or m.get("text") or ""
|
|
@@ -853,11 +1012,9 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 853 |
t_en = t
|
| 854 |
if t_en:
|
| 855 |
facts_en.append(dedupe_sentences(t_en))
|
| 856 |
-
# synthesize simple reply
|
| 857 |
if not facts_en:
|
| 858 |
reply_en = "I don't have enough context yet — could you give more details or add knowledge with /add?"
|
| 859 |
else:
|
| 860 |
-
# simple synthesis: join top facts
|
| 861 |
reply_en = "\n\n".join(facts_en[:3])
|
| 862 |
if intent == "solution":
|
| 863 |
parts = []
|
|
@@ -868,8 +1025,6 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 868 |
reply_en = "Solutions:\n- " + "\n- ".join(parts[:6]) if parts else reply_en
|
| 869 |
if intent == "why":
|
| 870 |
reply_en = reply_en + " It is useful because it provides consolidated information."
|
| 871 |
-
|
| 872 |
-
# translate to user language
|
| 873 |
if reply_lang and reply_lang.split("-")[0].lower() not in ("en","eng","", "und"):
|
| 874 |
try:
|
| 875 |
reply_user_lang = translate_from_english(reply_en, reply_lang)
|
|
@@ -878,15 +1033,12 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 878 |
else:
|
| 879 |
reply_user_lang = reply_en
|
| 880 |
|
| 881 |
-
# Post-processing (intent formatting) - try to keep in user's language if possible
|
| 882 |
def postprocess_final(text: str, intent_label: str, lang_code: str) -> str:
|
| 883 |
if not text:
|
| 884 |
return text
|
| 885 |
if intent_label == "solution":
|
| 886 |
-
# if it already contains Solutions:, return as is, else try to construct bullets
|
| 887 |
if "Solutions:" in text or text.strip().startswith("- "):
|
| 888 |
return text
|
| 889 |
-
# attempt to create bullets by splitting sentences
|
| 890 |
parts = []
|
| 891 |
for p in re.split(r'\n+', text):
|
| 892 |
for s in re.split(r'[.?!]\s+', p):
|
|
@@ -970,7 +1122,7 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 970 |
|
| 971 |
return {"reply": reply_final, "topic": topic, "language": reply_lang, "emoji": emoji, "confidence": round(confidence, 2), "flags": flags}
|
| 972 |
|
| 973 |
-
# ----- Leaderboard, admin and other endpoints
|
| 974 |
@app.get("/leaderboard")
|
| 975 |
async def leaderboard(topic: str = Query("general")):
|
| 976 |
topic = str(topic or "general").strip() or "general"
|
|
@@ -1175,7 +1327,7 @@ async def frontend_dashboard():
|
|
| 1175 |
# ----- Startup behavior: load models and introspect language module -----
|
| 1176 |
@app.on_event("startup")
|
| 1177 |
async def startup_event():
|
| 1178 |
-
global embed_model, spell, moderator, llm_tokenizer, llm_model, startup_time, language_module
|
| 1179 |
t0 = time.time()
|
| 1180 |
logger.info("[JusticeAI] startup: loading components")
|
| 1181 |
|
|
@@ -1236,12 +1388,7 @@ async def startup_event():
|
|
| 1236 |
|
| 1237 |
startup_time = round(time.time() - t0, 2)
|
| 1238 |
logger.info(f"[JusticeAI] startup complete in {startup_time}s")
|
| 1239 |
-
|
| 1240 |
-
try:
|
| 1241 |
-
justice_brain.load_capabilities()
|
| 1242 |
-
justice_brain.warmup()
|
| 1243 |
-
except Exception as e:
|
| 1244 |
-
logger.warning(f"[JusticeBrain] warmup error: {e}")
|
| 1245 |
|
| 1246 |
if __name__ == "__main__":
|
| 1247 |
port = int(os.environ.get("PORT", 7860))
|
|
|
|
| 1 |
+
# JusticeAI Backend — Fixed full app.py
|
| 2 |
+
# - Bugfix: ensure infer_topic_from_message is defined before use and other helpers available.
|
| 3 |
+
# - Robust language loader, translation wrapper, embedding and retrieval with topic-scoped and global fallback.
|
| 4 |
+
# - /add and /add-bulk store language metadata and don't fail on embedding; caches invalidated.
|
| 5 |
+
# - Final reply attempted in user's language via language module first, then Helsinki fallback.
|
| 6 |
+
# - Full set of endpoints included.
|
|
|
|
| 7 |
|
| 8 |
from sqlalchemy.pool import NullPool
|
| 9 |
import os
|
|
|
|
| 118 |
_language_load_errors.append(f"safetensors load failed: {e}")
|
| 119 |
logger.debug(f"safetensors load failed: {e}")
|
| 120 |
|
| 121 |
+
# 2b) torch.load (default)
|
| 122 |
try:
|
| 123 |
language_module = torch.load(str(p), map_location="cpu")
|
| 124 |
logger.info("[JusticeAI] torch.load(language.bin) succeeded")
|
|
|
|
| 245 |
|
| 246 |
ensure_tables()
|
| 247 |
|
|
|
|
| 248 |
def ensure_column_exists(table: str, column: str, col_def_sql: str):
|
| 249 |
dialect = engine.dialect.name
|
| 250 |
try:
|
|
|
|
| 564 |
return text
|
| 565 |
return translate_text(text, "en", tgt_code)
|
| 566 |
|
| 567 |
+
def embed_text(text_data: str) -> bytes:
|
| 568 |
+
global embed_model
|
| 569 |
+
if embed_model is None:
|
| 570 |
+
raise RuntimeError("Embedding model not available.")
|
| 571 |
+
try:
|
| 572 |
+
emb = embed_model.encode(text_data, convert_to_tensor=True)
|
| 573 |
+
return emb.cpu().numpy().tobytes()
|
| 574 |
+
except Exception as e:
|
| 575 |
+
logger.warning(f"Embedding error: {e}")
|
| 576 |
+
raise
|
| 577 |
+
|
| 578 |
+
def is_boilerplate_candidate(s: str) -> bool:
|
| 579 |
+
s_low = (s or "").strip().lower()
|
| 580 |
+
generic_phrases = [
|
| 581 |
+
"justiceai is a unified intelligence dashboard providing chat, knowledge, and live metrics.",
|
| 582 |
+
"justiceai es un panel de inteligencia unificado que proporciona chat, conocimiento y métricas en vivo."
|
| 583 |
+
]
|
| 584 |
+
for g in generic_phrases:
|
| 585 |
+
if s_low == g.strip().lower():
|
| 586 |
+
return True
|
| 587 |
+
if g.split(" ")[0].lower() in s_low and len(s_low) < 90:
|
| 588 |
+
return True
|
| 589 |
+
return False
|
| 590 |
+
|
| 591 |
+
def generate_creative_reply(matches: List[str]) -> str:
|
| 592 |
+
clean = []
|
| 593 |
+
seen = set()
|
| 594 |
+
for m in matches:
|
| 595 |
+
s = sanitize_knowledge_text(m)
|
| 596 |
+
if not s or s in seen or is_boilerplate_candidate(s):
|
| 597 |
+
continue
|
| 598 |
+
seen.add(s)
|
| 599 |
+
s = dedupe_sentences(s)
|
| 600 |
+
clean.append(s)
|
| 601 |
+
if not clean:
|
| 602 |
+
return "I’m not sure yet."
|
| 603 |
+
if len(clean) == 1:
|
| 604 |
+
return clean[0]
|
| 605 |
+
return "\n\n".join(clean[:3])
|
| 606 |
+
|
| 607 |
+
# IMPORTANT: ensure infer_topic_from_message is defined BEFORE /chat uses it
|
| 608 |
+
def infer_topic_from_message(msg: str, known_topics=None) -> str:
|
| 609 |
+
"""
|
| 610 |
+
Infer a topic name from message using simple keyword match or embedding nearest neighbor.
|
| 611 |
+
"""
|
| 612 |
+
global embed_model
|
| 613 |
+
if not msg:
|
| 614 |
+
return "general"
|
| 615 |
+
if embed_model is None or not known_topics:
|
| 616 |
+
msg_low = (msg or "").lower()
|
| 617 |
+
if known_topics:
|
| 618 |
+
for t in known_topics:
|
| 619 |
+
if t and t.lower() in msg_low:
|
| 620 |
+
return t
|
| 621 |
+
return "general"
|
| 622 |
+
try:
|
| 623 |
+
msg_emb = embed_model.encode([msg], convert_to_tensor=True)
|
| 624 |
+
topic_embs = embed_model.encode(known_topics, convert_to_tensor=True)
|
| 625 |
+
sims = torch.nn.functional.cosine_similarity(msg_emb, topic_embs)
|
| 626 |
+
best_idx = int(torch.argmax(sims).item())
|
| 627 |
+
return known_topics[best_idx]
|
| 628 |
+
except Exception:
|
| 629 |
+
return "general"
|
| 630 |
+
|
| 631 |
+
def refine_or_update(matches, new_text, new_reply, confidence, topic="general"):
|
| 632 |
+
try:
|
| 633 |
+
if embed_model is None:
|
| 634 |
+
return
|
| 635 |
+
with engine.begin() as conn:
|
| 636 |
+
rows = conn.execute(sql_text("SELECT id, text FROM knowledge WHERE topic = :topic"), {"topic": topic}).fetchall()
|
| 637 |
+
if not rows:
|
| 638 |
+
emb = embed_text(new_text)
|
| 639 |
+
with engine.begin() as conn:
|
| 640 |
+
conn.execute(
|
| 641 |
+
sql_text("INSERT INTO knowledge (text, reply, language, embedding, category, topic) VALUES (:t, :r, :lang, :e, 'learned', :topic)"),
|
| 642 |
+
{"t": new_text, "r": new_reply or "", "lang": "en", "e": emb, "topic": topic}
|
| 643 |
+
)
|
| 644 |
+
return
|
| 645 |
+
texts = [r[1] for r in rows]
|
| 646 |
+
ids = [r[0] for r in rows]
|
| 647 |
+
embs = embed_model.encode(texts, convert_to_tensor=True)
|
| 648 |
+
new_emb = embed_model.encode(new_text, convert_to_tensor=True)
|
| 649 |
+
sims = torch.nn.functional.cosine_similarity(new_emb.unsqueeze(0), embs)
|
| 650 |
+
best_idx = int(torch.argmax(sims).item())
|
| 651 |
+
best_score = float(sims[best_idx])
|
| 652 |
+
if best_score > 0.75:
|
| 653 |
+
kid = ids[best_idx]
|
| 654 |
+
with engine.begin() as conn:
|
| 655 |
+
row = conn.execute(sql_text("SELECT meta FROM knowledge WHERE id = :id"), {"id": kid}).fetchone()
|
| 656 |
+
is_manual = False
|
| 657 |
+
try:
|
| 658 |
+
if row and row[0]:
|
| 659 |
+
meta_obj = json.loads(row[0]) if isinstance(row[0], str) else row[0]
|
| 660 |
+
is_manual = bool(meta_obj.get("manual"))
|
| 661 |
+
except Exception:
|
| 662 |
+
is_manual = False
|
| 663 |
+
if is_manual and confidence < 0.85:
|
| 664 |
+
with engine.begin() as conn:
|
| 665 |
+
conn.execute(
|
| 666 |
+
sql_text("UPDATE knowledge SET reply = :r, updated_at = CURRENT_TIMESTAMP WHERE id = :id"),
|
| 667 |
+
{"r": (new_reply or ""), "id": kid}
|
| 668 |
+
)
|
| 669 |
+
else:
|
| 670 |
+
with engine.begin() as conn:
|
| 671 |
+
conn.execute(
|
| 672 |
+
sql_text("UPDATE knowledge SET reply = :r, text = :t, language = :lang, updated_at = CURRENT_TIMESTAMP WHERE id = :id"),
|
| 673 |
+
{"r": new_reply or "", "t": new_text, "lang": "en", "id": kid}
|
| 674 |
+
)
|
| 675 |
+
else:
|
| 676 |
+
emb = new_emb.cpu().numpy().tobytes()
|
| 677 |
+
with engine.begin() as conn:
|
| 678 |
+
conn.execute(
|
| 679 |
+
sql_text("INSERT INTO knowledge (text, reply, language, embedding, category, topic, confidence, meta) VALUES (:t, :r, :lang, :e, 'learned', :topic, :conf, :meta)"),
|
| 680 |
+
{"t": new_text, "r": new_reply or "", "lang": "en", "e": emb, "topic": topic, "conf": min(0.7, float(confidence)), "meta": json.dumps({"refined": True})}
|
| 681 |
+
)
|
| 682 |
+
global knowledge_version
|
| 683 |
+
knowledge_version += 1
|
| 684 |
+
except Exception as e:
|
| 685 |
+
logger.warning(f"refine_or_update error: {e}")
|
| 686 |
+
|
| 687 |
+
def detect_mood(text: str) -> str:
|
| 688 |
+
lower = (text or "").lower()
|
| 689 |
+
positive = ["great", "thanks", "awesome", "happy", "love", "excellent", "cool", "yes", "good", "success", "helpful", "useful", "thank you"]
|
| 690 |
+
negative = ["sad", "bad", "problem", "angry", "hate", "fail", "no", "error", "not working", "disadvantage", "issue"]
|
| 691 |
+
emojis = extract_emojis(text)
|
| 692 |
+
e_score = emoji_sentiment_score(emojis)
|
| 693 |
+
if any(w in lower for w in positive) or e_score > 0.3:
|
| 694 |
+
return "positive"
|
| 695 |
+
elif any(w in lower for w in negative) or e_score < -0.2:
|
| 696 |
+
return "negative"
|
| 697 |
+
return "neutral"
|
| 698 |
+
|
| 699 |
+
def should_append_emoji(user_text: str, reply_text: str, mood: str, flags: Dict[str, Any]) -> str:
|
| 700 |
+
if flags.get("toxic"):
|
| 701 |
+
return ""
|
| 702 |
+
if extract_emojis(reply_text):
|
| 703 |
+
return ""
|
| 704 |
+
user_emojis = extract_emojis(user_text)
|
| 705 |
+
if user_emojis:
|
| 706 |
+
user_score = emoji_sentiment_score(user_emojis)
|
| 707 |
+
if user_score >= 0.2:
|
| 708 |
+
try:
|
| 709 |
+
cat = get_category_for_mood("positive")
|
| 710 |
+
return get_emoji(cat, intensity=min(1.0, 0.5 + user_score))
|
| 711 |
+
except Exception:
|
| 712 |
+
return user_emojis[0] if user_emojis else ""
|
| 713 |
+
elif user_score <= -0.2:
|
| 714 |
+
return ""
|
| 715 |
+
else:
|
| 716 |
+
if len(reply_text) < 200:
|
| 717 |
+
try:
|
| 718 |
+
cat = get_category_for_mood("neutral")
|
| 719 |
+
return get_emoji(cat, intensity=0.5)
|
| 720 |
+
except Exception:
|
| 721 |
+
return ""
|
| 722 |
+
return ""
|
| 723 |
+
if mood == "neutral":
|
| 724 |
+
return ""
|
| 725 |
+
if len(reply_text) > 400:
|
| 726 |
+
return ""
|
| 727 |
+
if re.search(r'[\{\}\[\]\(\)]', reply_text):
|
| 728 |
+
return ""
|
| 729 |
+
try:
|
| 730 |
+
cat = get_category_for_mood(mood)
|
| 731 |
+
return get_emoji(cat, intensity=random.random())
|
| 732 |
+
except Exception:
|
| 733 |
+
return ""
|
| 734 |
+
|
| 735 |
def invalidate_knowledge_cache():
|
| 736 |
global knowledge_embeddings_cache, knowledge_version
|
| 737 |
knowledge_embeddings_cache = None
|
| 738 |
knowledge_version += 1
|
| 739 |
logger.debug("[index] invalidated knowledge cache")
|
| 740 |
|
| 741 |
+
# ----- Add endpoints -----
|
| 742 |
@app.post("/add")
|
| 743 |
async def add_knowledge(data: dict = Body(...)):
|
| 744 |
if not isinstance(data, dict):
|
|
|
|
| 825 |
record_learn_event()
|
| 826 |
return {"added": added, "errors": errors}
|
| 827 |
|
| 828 |
+
# ----- Chat endpoint -----
|
| 829 |
@app.post("/chat")
|
| 830 |
async def chat(request: Request, data: dict = Body(...)):
|
| 831 |
t0 = time.time()
|
|
|
|
| 845 |
except Exception:
|
| 846 |
msg_corrected = raw_msg
|
| 847 |
|
| 848 |
+
# translate to English for synthesis
|
| 849 |
en_msg = msg_corrected
|
| 850 |
if detected_lang and detected_lang.split("-")[0].lower() not in ("en", "eng", "", "und"):
|
| 851 |
try:
|
|
|
|
| 917 |
matches = cand
|
| 918 |
confidence = matches[0]["score"] if matches else 0.0
|
| 919 |
else:
|
|
|
|
| 920 |
for kr in knowledge_rows:
|
| 921 |
if en_msg.lower() in (kr["text"] or "").lower():
|
| 922 |
matches.append({"text": kr["text"], "reply": kr["reply"], "lang": kr["lang"], "score": 0.0})
|
|
|
|
| 925 |
logger.warning(f"[retrieval] topic-scoped retrieval failure: {e}")
|
| 926 |
matches = []
|
| 927 |
|
| 928 |
+
# Global fallback if needed
|
| 929 |
used_global_fallback = False
|
| 930 |
if not matches or confidence < 0.35:
|
| 931 |
try:
|
|
|
|
| 958 |
confidence = matches[0]["score"]
|
| 959 |
used_global_fallback = True
|
| 960 |
else:
|
|
|
|
| 961 |
cand = []
|
| 962 |
for r in all_rows:
|
| 963 |
if en_msg.lower() in (r["text"] or "").lower():
|
|
|
|
| 972 |
if used_global_fallback:
|
| 973 |
logger.info("[retrieval] used global fallback search across topics to find candidates")
|
| 974 |
|
| 975 |
+
# Compose final reply
|
| 976 |
reply_user_lang = ""
|
| 977 |
try:
|
| 978 |
if matches:
|
|
|
|
| 979 |
chosen = None
|
| 980 |
for m in matches:
|
| 981 |
m_lang = (m.get("lang") or "und").split("-")[0].lower()
|
|
|
|
| 986 |
chosen = matches[0]
|
| 987 |
chosen_text = chosen.get("reply") or chosen.get("text") or ""
|
| 988 |
chosen_lang = chosen.get("lang") or "und"
|
|
|
|
| 989 |
if chosen_lang and reply_lang and chosen_lang.split("-")[0].lower() == reply_lang.split("-")[0].lower():
|
| 990 |
reply_user_lang = chosen_text
|
| 991 |
else:
|
|
|
|
| 992 |
try:
|
| 993 |
reply_user_lang = translate_text(chosen_text, chosen_lang or "und", reply_lang)
|
| 994 |
except Exception:
|
|
|
|
| 997 |
logger.warning(f"[compose] candidate selection error: {e}")
|
| 998 |
reply_user_lang = ""
|
| 999 |
|
| 1000 |
+
# If no candidate reply, synthesize
|
| 1001 |
if not reply_user_lang:
|
|
|
|
| 1002 |
facts_en = []
|
| 1003 |
for m in matches[:6]:
|
| 1004 |
t = m.get("reply") or m.get("text") or ""
|
|
|
|
| 1012 |
t_en = t
|
| 1013 |
if t_en:
|
| 1014 |
facts_en.append(dedupe_sentences(t_en))
|
|
|
|
| 1015 |
if not facts_en:
|
| 1016 |
reply_en = "I don't have enough context yet — could you give more details or add knowledge with /add?"
|
| 1017 |
else:
|
|
|
|
| 1018 |
reply_en = "\n\n".join(facts_en[:3])
|
| 1019 |
if intent == "solution":
|
| 1020 |
parts = []
|
|
|
|
| 1025 |
reply_en = "Solutions:\n- " + "\n- ".join(parts[:6]) if parts else reply_en
|
| 1026 |
if intent == "why":
|
| 1027 |
reply_en = reply_en + " It is useful because it provides consolidated information."
|
|
|
|
|
|
|
| 1028 |
if reply_lang and reply_lang.split("-")[0].lower() not in ("en","eng","", "und"):
|
| 1029 |
try:
|
| 1030 |
reply_user_lang = translate_from_english(reply_en, reply_lang)
|
|
|
|
| 1033 |
else:
|
| 1034 |
reply_user_lang = reply_en
|
| 1035 |
|
|
|
|
| 1036 |
def postprocess_final(text: str, intent_label: str, lang_code: str) -> str:
|
| 1037 |
if not text:
|
| 1038 |
return text
|
| 1039 |
if intent_label == "solution":
|
|
|
|
| 1040 |
if "Solutions:" in text or text.strip().startswith("- "):
|
| 1041 |
return text
|
|
|
|
| 1042 |
parts = []
|
| 1043 |
for p in re.split(r'\n+', text):
|
| 1044 |
for s in re.split(r'[.?!]\s+', p):
|
|
|
|
| 1122 |
|
| 1123 |
return {"reply": reply_final, "topic": topic, "language": reply_lang, "emoji": emoji, "confidence": round(confidence, 2), "flags": flags}
|
| 1124 |
|
| 1125 |
+
# ----- Leaderboard, admin and other endpoints -----
|
| 1126 |
@app.get("/leaderboard")
|
| 1127 |
async def leaderboard(topic: str = Query("general")):
|
| 1128 |
topic = str(topic or "general").strip() or "general"
|
|
|
|
| 1327 |
# ----- Startup behavior: load models and introspect language module -----
|
| 1328 |
@app.on_event("startup")
|
| 1329 |
async def startup_event():
|
| 1330 |
+
global embed_model, spell, moderator, llm_tokenizer, llm_model, startup_time, language_module, app_start_time
|
| 1331 |
t0 = time.time()
|
| 1332 |
logger.info("[JusticeAI] startup: loading components")
|
| 1333 |
|
|
|
|
| 1388 |
|
| 1389 |
startup_time = round(time.time() - t0, 2)
|
| 1390 |
logger.info(f"[JusticeAI] startup complete in {startup_time}s")
|
| 1391 |
+
app_start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1392 |
|
| 1393 |
if __name__ == "__main__":
|
| 1394 |
port = int(os.environ.get("PORT", 7860))
|