Princeaka commited on
Commit
491f92c
·
verified ·
1 Parent(s): 9f95176

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +333 -111
app.py CHANGED
@@ -111,6 +111,13 @@ try:
111
  except Exception:
112
  detect_lang = None
113
 
 
 
 
 
 
 
 
114
  # Moderator pipeline (optional)
115
  moderator = None
116
  try:
@@ -643,6 +650,15 @@ def infer_topic_with_ollama(msg: str, topics: List[str], model: str = OLLAMA_MOD
643
  # -------------------------
644
  # Simple fallback topic inference (NEW)
645
  # -------------------------
 
 
 
 
 
 
 
 
 
646
  def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
647
  """
648
  Fallback topic inference: tries keyword matching against topic names and
@@ -651,7 +667,8 @@ def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
651
  if not msg or not topics:
652
  return None
653
  low = msg.lower()
654
- # Try exact topic token matches
 
655
  for t in topics:
656
  if not t:
657
  continue
@@ -662,12 +679,36 @@ def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
662
  for w in re.split(r'[\s\-_]+', token):
663
  if w and re.search(r'\b' + re.escape(w) + r'\b', low):
664
  return t
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
  # If no direct match, try heuristics: map some keywords to topics
666
  heuristics = {
667
- "security": ["security", "vulnerability", "exploit", "attack", "auth"],
668
- "billing": ["bill", "invoice", "payment", "charge"],
669
- "installation": ["install", "setup", "deploy", "deployment"],
670
- "general": ["help", "question", "how", "what", "why"]
671
  }
672
  for topic, kws in heuristics.items():
673
  for kw in kws:
@@ -677,6 +718,70 @@ def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
677
  return topic
678
  return None
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  # -------------------------
681
  # Boilerplate detection & reply helpers
682
  # -------------------------
@@ -1440,29 +1545,59 @@ async def chat(request: Request, data: dict = Body(...)):
1440
  except Exception:
1441
  en_msg = raw_msg
1442
 
1443
- # Determine topic: Ollama first, then embeddings, then keyword
 
 
 
 
 
 
 
 
 
 
 
 
 
1444
  topic = "general"
1445
  try:
1446
  if not topic_hint:
1447
- with engine_knowledge.begin() as conn:
1448
- rows = conn.execute(sql_text("SELECT DISTINCT topic FROM knowledge")).fetchall()
1449
- known_topics = [r[0] for r in rows if r and r[0]]
1450
  chosen = None
1451
- try:
1452
- if (ollama_http_available() or ollama_cli_available()) and known_topics:
1453
- possible = infer_topic_with_ollama(en_msg, known_topics)
1454
- if possible:
1455
- chosen = possible
1456
- except Exception:
1457
- chosen = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1458
  if not chosen:
1459
- # use our local fallback inference
1460
  chosen = infer_topic_from_message(en_msg, known_topics)
 
 
 
1461
  topic = chosen or "general"
1462
  else:
1463
  topic = topic_hint or "general"
1464
- except Exception:
 
1465
  topic = topic_hint or "general"
 
 
1466
 
1467
  # Moderation
1468
  flags = {}
@@ -1477,107 +1612,179 @@ async def chat(request: Request, data: dict = Body(...)):
1477
  except Exception:
1478
  pass
1479
 
1480
- # Load knowledge entries for this topic only
1481
- try:
1482
- with engine_knowledge.begin() as conn:
1483
- rows = conn.execute(sql_text("SELECT id, text, reply, language, embedding FROM knowledge WHERE topic = :topic ORDER BY created_at DESC"), {"topic": topic}).fetchall()
1484
- except Exception as e:
1485
- record_request(time.time() - t0)
1486
- return JSONResponse(status_code=500, content={"error": "failed to read knowledge", "details": str(e)})
1487
-
1488
- knowledge_rows = [{"id": r[0], "text": r[1] or "", "reply": r[2] or "", "lang": r[3] or "und", "embedding": r[4]} for r in rows]
1489
 
1490
- # Retrieval (embedding-first) - Optimized and now uses stored embeddings when available
1491
  matches: List[str] = []
1492
  confidence = 0.0
 
 
1493
  try:
1494
- # If we have an embed model and any stored embeddings, prefer using stored embeddings to avoid recomputing
1495
- stored_embs = []
1496
- stored_indices = []
1497
- for i, kr in enumerate(knowledge_rows):
1498
- if kr.get("embedding") is not None:
1499
- t = bytes_to_tensor(kr["embedding"])
1500
- if t is not None:
1501
- stored_embs.append(t)
1502
- stored_indices.append(i)
1503
- # If we have stored embeddings and torch is available, compute similarity using them
1504
- if torch is not None and stored_embs and embed_model is not None:
1505
- try:
1506
- # Stack stored embeddings into a single tensor
1507
- embs_tensor = torch.stack(stored_embs)
1508
- # Compute query embedding
1509
- q_emb = await run_blocking_with_timeout(lambda: embed_model.encode([en_msg], convert_to_tensor=True, show_progress_bar=False)[0], timeout=MODEL_TIMEOUT)
1510
- if not isinstance(q_emb, torch.Tensor):
1511
- try:
 
 
 
 
 
 
 
 
1512
  q_emb = torch.from_numpy(q_emb.cpu().numpy())
 
 
 
 
1513
  except Exception:
1514
- pass
1515
- # Ensure shapes align: embs_tensor (N, dim), q_emb (dim,) -> unsqueeze
1516
- try:
1517
- scores = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), embs_tensor, dim=1)
1518
- except Exception:
1519
- # Try alternative orientation
1520
- scores = torch.nn.functional.cosine_similarity(embs_tensor, q_emb.unsqueeze(0), dim=1)
1521
- cand = []
1522
- for idx, s in enumerate(scores):
1523
- i_orig = stored_indices[idx]
1524
- kr = knowledge_rows[i_orig]
1525
- candidate_text = (kr["reply"] or kr["text"]).strip()
1526
- if is_boilerplate_candidate(candidate_text):
1527
- continue
1528
- s_float = float(s)
1529
- if s_float >= 0.30:
1530
- cand.append({"text": candidate_text, "lang": kr["lang"], "score": s_float})
1531
- cand = sorted(cand, key=lambda x: -x["score"])
1532
- matches = [c["text"] for c in cand]
1533
- confidence = float(cand[0]["score"]) if cand else 0.0
1534
- except asyncio.TimeoutError:
1535
- logger.warning("[retrieval] embedding encode timed out (query)")
1536
- matches = []
1537
- except Exception as e:
1538
- logger.warning(f"[retrieval] embedding (stored) error: {e}")
1539
- matches = []
1540
- # If we didn't find matches via stored embeddings, fallback to computing embeddings for all texts if embed_model available
1541
- if not matches and embed_model is not None and knowledge_rows:
1542
- texts = [kr["text"] for kr in knowledge_rows]
1543
- try:
1544
- # compute embeddings for texts and query
1545
- embs = await run_blocking_with_timeout(lambda: embed_model.encode(texts, convert_to_tensor=True, show_progress_bar=False), timeout=MODEL_TIMEOUT)
1546
- q_emb = await run_blocking_with_timeout(lambda: embed_model.encode([en_msg], convert_to_tensor=True, show_progress_bar=False)[0], timeout=MODEL_TIMEOUT)
1547
- import torch as _torch
 
 
1548
  try:
1549
- scores = _torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), embs, dim=1)
1550
- except Exception:
1551
- scores = _torch.nn.functional.cosine_similarity(embs, q_emb.unsqueeze(0), dim=1)
1552
- cand = []
1553
- for i in range(scores.shape[0]):
1554
- s = float(scores[i])
1555
- kr = knowledge_rows[i]
1556
- candidate_text = (kr["reply"] or kr["text"]).strip()
1557
- if is_boilerplate_candidate(candidate_text):
1558
- continue
1559
- if s >= 0.30:
1560
- cand.append({"text": candidate_text, "lang": kr["lang"], "score": s})
1561
- cand = sorted(cand, key=lambda x: -x["score"])
1562
- matches = [c["text"] for c in cand]
1563
- confidence = float(cand[0]["score"]) if cand else 0.0
1564
- except asyncio.TimeoutError:
1565
- logger.warning("[retrieval] embedding encode timed out")
1566
- matches = []
1567
- except Exception as e:
1568
- logger.warning(f"[retrieval] embedding error: {e}")
1569
- matches = []
1570
- else:
1571
- # No embed model: fallback to simple keyword substring matching inside replies/text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1572
  cand = []
 
1573
  for kr in knowledge_rows:
1574
  txt = (kr["reply"] or kr["text"]) or ""
1575
- if en_msg.lower() in txt.lower():
1576
- if is_boilerplate_candidate(txt):
 
 
 
 
 
1577
  continue
1578
- cand.append({"text": txt, "lang": kr["lang"], "score": 0.0})
1579
- matches = [c["text"] for c in cand]
1580
- confidence = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1581
  except Exception as e:
1582
  logger.warning(f"[retrieval] error: {e}")
1583
  matches = []
@@ -1612,17 +1819,32 @@ async def chat(request: Request, data: dict = Body(...)):
1612
  record_request(time.time() - t0)
1613
  return {"reply": reply_final, "topic": topic, "language": reply_lang, "emoji": "", "confidence": round(confidence,2), "flags": flags}
1614
 
1615
- # Post-process and translate
1616
  reply_en = dedupe_sentences(reply_en)
1617
  reply_final = reply_en
1618
- lang_code = (reply_lang or "und").split("-")[0].lower()
1619
- if lang_code not in ("en", "eng", "und", ""):
 
 
 
 
 
 
 
 
 
 
 
1620
  try:
 
1621
  reply_final = translate_from_english(reply_en, lang_code)
1622
  reply_final = dedupe_sentences(reply_final)
 
1623
  except Exception as exc:
1624
  logger.warning(f"[translation] failed to translate reply_en -> {lang_code}: {exc}")
1625
  reply_final = reply_en
 
 
1626
 
1627
  # Mood & emoji append
1628
  emoji = ""
 
111
  except Exception:
112
  detect_lang = None
113
 
114
+ # Optional fuzzy matching for spell tolerance
115
+ try:
116
+ from difflib import SequenceMatcher
117
+ FUZZY_AVAILABLE = True
118
+ except Exception:
119
+ FUZZY_AVAILABLE = False
120
+
121
  # Moderator pipeline (optional)
122
  moderator = None
123
  try:
 
650
  # -------------------------
651
  # Simple fallback topic inference (NEW)
652
  # -------------------------
653
+ def fuzzy_match_score(s1: str, s2: str) -> float:
654
+ """
655
+ Calculate fuzzy match score between two strings (0.0 to 1.0).
656
+ Handles spell errors and variations.
657
+ """
658
+ if not FUZZY_AVAILABLE:
659
+ return 1.0 if s1.lower() == s2.lower() else 0.0
660
+ return SequenceMatcher(None, s1.lower(), s2.lower()).ratio()
661
+
662
  def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
663
  """
664
  Fallback topic inference: tries keyword matching against topic names and
 
667
  if not msg or not topics:
668
  return None
669
  low = msg.lower()
670
+
671
+ # Try exact topic token matches first
672
  for t in topics:
673
  if not t:
674
  continue
 
679
  for w in re.split(r'[\s\-_]+', token):
680
  if w and re.search(r'\b' + re.escape(w) + r'\b', low):
681
  return t
682
+
683
+ # Try fuzzy matching for spell tolerance
684
+ if FUZZY_AVAILABLE:
685
+ best_match = None
686
+ best_score = 0.0
687
+ for t in topics:
688
+ if not t:
689
+ continue
690
+ token = str(t).lower()
691
+ # Check fuzzy match against whole message
692
+ score = fuzzy_match_score(token, low)
693
+ if score > 0.7 and score > best_score:
694
+ best_score = score
695
+ best_match = t
696
+ # Check fuzzy match against individual words
697
+ for word in low.split():
698
+ if len(word) > 3: # Only check meaningful words
699
+ score = fuzzy_match_score(token, word)
700
+ if score > 0.75 and score > best_score:
701
+ best_score = score
702
+ best_match = t
703
+ if best_match:
704
+ return best_match
705
+
706
  # If no direct match, try heuristics: map some keywords to topics
707
  heuristics = {
708
+ "security": ["security", "vulnerability", "exploit", "attack", "auth", "password", "login"],
709
+ "billing": ["bill", "invoice", "payment", "charge", "price", "cost"],
710
+ "installation": ["install", "setup", "deploy", "deployment", "configure"],
711
+ "general": ["help", "question", "how", "what", "why", "issue", "problem"]
712
  }
713
  for topic, kws in heuristics.items():
714
  for kw in kws:
 
718
  return topic
719
  return None
720
 
721
+ def infer_topic_with_embeddings(msg: str, topics: List[str], knowledge_rows: List[dict]) -> Optional[str]:
722
+ """
723
+ Use cosine similarity on embeddings to infer the best matching topic.
724
+ This provides semantic understanding instead of just keyword matching.
725
+ """
726
+ if not embed_model or not topics or not knowledge_rows:
727
+ return None
728
+
729
+ try:
730
+ # Compute query embedding
731
+ q_emb = embed_model.encode([msg], convert_to_tensor=True, show_progress_bar=False)[0]
732
+
733
+ # Group knowledge by topic and compute average embedding per topic
734
+ topic_embeddings = {}
735
+ topic_counts = {}
736
+
737
+ for kr in knowledge_rows:
738
+ t = kr.get("topic", "general")
739
+ if t not in topics:
740
+ continue
741
+ emb_bytes = kr.get("embedding")
742
+ if emb_bytes is None:
743
+ continue
744
+ emb_tensor = bytes_to_tensor(emb_bytes)
745
+ if emb_tensor is None:
746
+ continue
747
+
748
+ if t not in topic_embeddings:
749
+ topic_embeddings[t] = emb_tensor
750
+ topic_counts[t] = 1
751
+ else:
752
+ topic_embeddings[t] = topic_embeddings[t] + emb_tensor
753
+ topic_counts[t] += 1
754
+
755
+ # Average the embeddings
756
+ for t in topic_embeddings:
757
+ topic_embeddings[t] = topic_embeddings[t] / topic_counts[t]
758
+
759
+ if not topic_embeddings:
760
+ return None
761
+
762
+ # Compute cosine similarity with each topic
763
+ best_topic = None
764
+ best_score = 0.0
765
+
766
+ for t, t_emb in topic_embeddings.items():
767
+ try:
768
+ score = float(torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), t_emb.unsqueeze(0), dim=1)[0])
769
+ if score > best_score:
770
+ best_score = score
771
+ best_topic = t
772
+ except Exception:
773
+ continue
774
+
775
+ # Only return if confidence is high enough
776
+ if best_score > 0.4:
777
+ logger.info(f"[topic inference] embedding-based: {best_topic} (score={best_score:.2f})")
778
+ return best_topic
779
+
780
+ except Exception as e:
781
+ logger.debug(f"[topic inference] embedding error: {e}")
782
+
783
+ return None
784
+
785
  # -------------------------
786
  # Boilerplate detection & reply helpers
787
  # -------------------------
 
1545
  except Exception:
1546
  en_msg = raw_msg
1547
 
1548
+ # Load ALL knowledge entries first (needed for embedding-based topic inference)
1549
+ try:
1550
+ with engine_knowledge.begin() as conn:
1551
+ all_rows = conn.execute(sql_text("SELECT id, text, reply, language, embedding, topic FROM knowledge ORDER BY created_at DESC")).fetchall()
1552
+ except Exception as e:
1553
+ record_request(time.time() - t0)
1554
+ return JSONResponse(status_code=500, content={"error": "failed to read knowledge", "details": str(e)})
1555
+
1556
+ all_knowledge_rows = [{"id": r[0], "text": r[1] or "", "reply": r[2] or "", "lang": r[3] or "und", "embedding": r[4], "topic": r[5] or "general"} for r in all_rows]
1557
+
1558
+ # Get list of known topics
1559
+ known_topics = list(set([kr.get("topic", "general") for kr in all_knowledge_rows if kr.get("topic")]))
1560
+
1561
+ # Determine topic: Embeddings first (best), then Ollama, then keyword matching
1562
  topic = "general"
1563
  try:
1564
  if not topic_hint:
 
 
 
1565
  chosen = None
1566
+
1567
+ # 1. Try embedding-based topic inference (BEST - semantic understanding)
1568
+ if embed_model is not None and all_knowledge_rows:
1569
+ try:
1570
+ chosen = infer_topic_with_embeddings(en_msg, known_topics, all_knowledge_rows)
1571
+ if chosen:
1572
+ logger.info(f"[topic] Selected via embeddings: {chosen}")
1573
+ except Exception as e:
1574
+ logger.debug(f"[topic] embedding inference failed: {e}")
1575
+
1576
+ # 2. Fallback to Ollama if embeddings didn't work
1577
+ if not chosen:
1578
+ try:
1579
+ if (ollama_http_available() or ollama_cli_available()) and known_topics:
1580
+ possible = infer_topic_with_ollama(en_msg, known_topics)
1581
+ if possible:
1582
+ chosen = possible
1583
+ logger.info(f"[topic] Selected via Ollama: {chosen}")
1584
+ except Exception as e:
1585
+ logger.debug(f"[topic] ollama inference failed: {e}")
1586
+
1587
+ # 3. Final fallback to keyword/fuzzy matching
1588
  if not chosen:
 
1589
  chosen = infer_topic_from_message(en_msg, known_topics)
1590
+ if chosen:
1591
+ logger.info(f"[topic] Selected via keyword/fuzzy: {chosen}")
1592
+
1593
  topic = chosen or "general"
1594
  else:
1595
  topic = topic_hint or "general"
1596
+ except Exception as e:
1597
+ logger.warning(f"[topic] inference error: {e}")
1598
  topic = topic_hint or "general"
1599
+
1600
+ logger.info(f"[chat] Final topic: {topic}")
1601
 
1602
  # Moderation
1603
  flags = {}
 
1612
  except Exception:
1613
  pass
1614
 
1615
+ # Filter knowledge entries for this topic only
1616
+ knowledge_rows = [kr for kr in all_knowledge_rows if kr.get("topic") == topic]
 
 
 
 
 
 
 
1617
 
1618
+ # Retrieval using cosine similarity with spell tolerance
1619
  matches: List[str] = []
1620
  confidence = 0.0
1621
+ match_lang = "en"
1622
+
1623
  try:
1624
+ # If we have an embed model, use semantic similarity (BEST approach)
1625
+ if embed_model is not None and knowledge_rows:
1626
+ stored_embs = []
1627
+ stored_indices = []
1628
+
1629
+ # Collect stored embeddings
1630
+ for i, kr in enumerate(knowledge_rows):
1631
+ if kr.get("embedding") is not None:
1632
+ t = bytes_to_tensor(kr["embedding"])
1633
+ if t is not None:
1634
+ stored_embs.append(t)
1635
+ stored_indices.append(i)
1636
+
1637
+ # Use stored embeddings if available
1638
+ if torch is not None and stored_embs:
1639
+ try:
1640
+ # Stack stored embeddings
1641
+ embs_tensor = torch.stack(stored_embs)
1642
+
1643
+ # Compute query embedding
1644
+ q_emb = await run_blocking_with_timeout(
1645
+ lambda: embed_model.encode([en_msg], convert_to_tensor=True, show_progress_bar=False)[0],
1646
+ timeout=MODEL_TIMEOUT
1647
+ )
1648
+
1649
+ if not isinstance(q_emb, torch.Tensor):
1650
  q_emb = torch.from_numpy(q_emb.cpu().numpy())
1651
+
1652
+ # Compute cosine similarity
1653
+ try:
1654
+ scores = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), embs_tensor, dim=1)
1655
  except Exception:
1656
+ scores = torch.nn.functional.cosine_similarity(embs_tensor, q_emb.unsqueeze(0), dim=1)
1657
+
1658
+ # Collect candidates with scores
1659
+ cand = []
1660
+ for idx, s in enumerate(scores):
1661
+ i_orig = stored_indices[idx]
1662
+ kr = knowledge_rows[i_orig]
1663
+ candidate_text = (kr["reply"] or kr["text"]).strip()
1664
+
1665
+ if is_boilerplate_candidate(candidate_text):
1666
+ continue
1667
+
1668
+ s_float = float(s)
1669
+ # Lower threshold for better recall
1670
+ if s_float >= 0.25:
1671
+ cand.append({
1672
+ "text": candidate_text,
1673
+ "lang": kr["lang"],
1674
+ "score": s_float
1675
+ })
1676
+
1677
+ # Sort by score
1678
+ cand = sorted(cand, key=lambda x: -x["score"])
1679
+ matches = [c["text"] for c in cand[:5]] # Top 5 matches
1680
+ confidence = float(cand[0]["score"]) if cand else 0.0
1681
+ match_lang = cand[0]["lang"] if cand else "en"
1682
+
1683
+ logger.info(f"[retrieval] Found {len(matches)} matches via embeddings, best score: {confidence:.2f}")
1684
+
1685
+ except asyncio.TimeoutError:
1686
+ logger.warning("[retrieval] embedding encode timed out")
1687
+ except Exception as e:
1688
+ logger.warning(f"[retrieval] embedding error: {e}")
1689
+
1690
+ # Fallback: compute embeddings on the fly if no stored embeddings
1691
+ if not matches and knowledge_rows:
1692
  try:
1693
+ texts = [kr["text"] for kr in knowledge_rows]
1694
+ embs = await run_blocking_with_timeout(
1695
+ lambda: embed_model.encode(texts, convert_to_tensor=True, show_progress_bar=False),
1696
+ timeout=MODEL_TIMEOUT
1697
+ )
1698
+ q_emb = await run_blocking_with_timeout(
1699
+ lambda: embed_model.encode([en_msg], convert_to_tensor=True, show_progress_bar=False)[0],
1700
+ timeout=MODEL_TIMEOUT
1701
+ )
1702
+
1703
+ try:
1704
+ scores = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), embs, dim=1)
1705
+ except Exception:
1706
+ scores = torch.nn.functional.cosine_similarity(embs, q_emb.unsqueeze(0), dim=1)
1707
+
1708
+ cand = []
1709
+ for i in range(scores.shape[0]):
1710
+ s = float(scores[i])
1711
+ kr = knowledge_rows[i]
1712
+ candidate_text = (kr["reply"] or kr["text"]).strip()
1713
+
1714
+ if is_boilerplate_candidate(candidate_text):
1715
+ continue
1716
+
1717
+ if s >= 0.25:
1718
+ cand.append({
1719
+ "text": candidate_text,
1720
+ "lang": kr["lang"],
1721
+ "score": s
1722
+ })
1723
+
1724
+ cand = sorted(cand, key=lambda x: -x["score"])
1725
+ matches = [c["text"] for c in cand[:5]]
1726
+ confidence = float(cand[0]["score"]) if cand else 0.0
1727
+ match_lang = cand[0]["lang"] if cand else "en"
1728
+
1729
+ logger.info(f"[retrieval] Found {len(matches)} matches via on-the-fly embeddings, best score: {confidence:.2f}")
1730
+
1731
+ except asyncio.TimeoutError:
1732
+ logger.warning("[retrieval] embedding encode timed out")
1733
+ except Exception as e:
1734
+ logger.warning(f"[retrieval] embedding error: {e}")
1735
+
1736
+ # Final fallback: fuzzy keyword matching with spell tolerance
1737
+ if not matches and knowledge_rows:
1738
+ logger.info("[retrieval] Using fuzzy keyword matching fallback")
1739
  cand = []
1740
+
1741
  for kr in knowledge_rows:
1742
  txt = (kr["reply"] or kr["text"]) or ""
1743
+ txt_lower = txt.lower()
1744
+ msg_lower = en_msg.lower()
1745
+
1746
+ # Exact substring match
1747
+ if msg_lower in txt_lower:
1748
+ if not is_boilerplate_candidate(txt):
1749
+ cand.append({"text": txt, "lang": kr["lang"], "score": 0.8})
1750
  continue
1751
+
1752
+ # Fuzzy matching for spell tolerance
1753
+ if FUZZY_AVAILABLE and len(en_msg) > 3:
1754
+ # Check fuzzy match against text
1755
+ fuzzy_score = fuzzy_match_score(en_msg, txt)
1756
+ if fuzzy_score > 0.6:
1757
+ if not is_boilerplate_candidate(txt):
1758
+ cand.append({"text": txt, "lang": kr["lang"], "score": fuzzy_score * 0.7})
1759
+ continue
1760
+
1761
+ # Check fuzzy match against individual words
1762
+ msg_words = [w for w in msg_lower.split() if len(w) > 3]
1763
+ txt_words = [w for w in txt_lower.split() if len(w) > 3]
1764
+
1765
+ for msg_word in msg_words:
1766
+ for txt_word in txt_words:
1767
+ word_score = fuzzy_match_score(msg_word, txt_word)
1768
+ if word_score > 0.75:
1769
+ if not is_boilerplate_candidate(txt):
1770
+ cand.append({"text": txt, "lang": kr["lang"], "score": word_score * 0.5})
1771
+ break
1772
+
1773
+ # Remove duplicates and sort
1774
+ seen = set()
1775
+ unique_cand = []
1776
+ for c in cand:
1777
+ if c["text"] not in seen:
1778
+ seen.add(c["text"])
1779
+ unique_cand.append(c)
1780
+
1781
+ cand = sorted(unique_cand, key=lambda x: -x["score"])
1782
+ matches = [c["text"] for c in cand[:5]]
1783
+ confidence = float(cand[0]["score"]) if cand else 0.0
1784
+ match_lang = cand[0]["lang"] if cand else "en"
1785
+
1786
+ logger.info(f"[retrieval] Found {len(matches)} matches via fuzzy matching, best score: {confidence:.2f}")
1787
+
1788
  except Exception as e:
1789
  logger.warning(f"[retrieval] error: {e}")
1790
  matches = []
 
1819
  record_request(time.time() - t0)
1820
  return {"reply": reply_final, "topic": topic, "language": reply_lang, "emoji": "", "confidence": round(confidence,2), "flags": flags}
1821
 
1822
+ # Post-process and translate back to user's language
1823
  reply_en = dedupe_sentences(reply_en)
1824
  reply_final = reply_en
1825
+
1826
+ # Determine target language for translation
1827
+ target_lang = reply_lang if reply_lang and reply_lang not in ("en", "eng", "und", "") else None
1828
+
1829
+ # If match was in a different language, try to use that
1830
+ if match_lang and match_lang not in ("en", "eng", "und", ""):
1831
+ # If user's language matches the match language, use it
1832
+ if target_lang and target_lang.split("-")[0].lower() == match_lang.split("-")[0].lower():
1833
+ target_lang = match_lang
1834
+
1835
+ # Translate to user's language
1836
+ if target_lang:
1837
+ lang_code = target_lang.split("-")[0].lower()
1838
  try:
1839
+ logger.info(f"[translation] Translating reply from en to {lang_code}")
1840
  reply_final = translate_from_english(reply_en, lang_code)
1841
  reply_final = dedupe_sentences(reply_final)
1842
+ logger.info(f"[translation] Translation successful")
1843
  except Exception as exc:
1844
  logger.warning(f"[translation] failed to translate reply_en -> {lang_code}: {exc}")
1845
  reply_final = reply_en
1846
+ else:
1847
+ logger.info("[translation] No translation needed, using English")
1848
 
1849
  # Mood & emoji append
1850
  emoji = ""