Update app.py
Browse files
app.py
CHANGED
|
@@ -111,6 +111,13 @@ try:
|
|
| 111 |
except Exception:
|
| 112 |
detect_lang = None
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
# Moderator pipeline (optional)
|
| 115 |
moderator = None
|
| 116 |
try:
|
|
@@ -643,6 +650,15 @@ def infer_topic_with_ollama(msg: str, topics: List[str], model: str = OLLAMA_MOD
|
|
| 643 |
# -------------------------
|
| 644 |
# Simple fallback topic inference (NEW)
|
| 645 |
# -------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
|
| 647 |
"""
|
| 648 |
Fallback topic inference: tries keyword matching against topic names and
|
|
@@ -651,7 +667,8 @@ def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
|
|
| 651 |
if not msg or not topics:
|
| 652 |
return None
|
| 653 |
low = msg.lower()
|
| 654 |
-
|
|
|
|
| 655 |
for t in topics:
|
| 656 |
if not t:
|
| 657 |
continue
|
|
@@ -662,12 +679,36 @@ def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
|
|
| 662 |
for w in re.split(r'[\s\-_]+', token):
|
| 663 |
if w and re.search(r'\b' + re.escape(w) + r'\b', low):
|
| 664 |
return t
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
# If no direct match, try heuristics: map some keywords to topics
|
| 666 |
heuristics = {
|
| 667 |
-
"security": ["security", "vulnerability", "exploit", "attack", "auth"],
|
| 668 |
-
"billing": ["bill", "invoice", "payment", "charge"],
|
| 669 |
-
"installation": ["install", "setup", "deploy", "deployment"],
|
| 670 |
-
"general": ["help", "question", "how", "what", "why"]
|
| 671 |
}
|
| 672 |
for topic, kws in heuristics.items():
|
| 673 |
for kw in kws:
|
|
@@ -677,6 +718,70 @@ def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
|
|
| 677 |
return topic
|
| 678 |
return None
|
| 679 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
# -------------------------
|
| 681 |
# Boilerplate detection & reply helpers
|
| 682 |
# -------------------------
|
|
@@ -1440,29 +1545,59 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 1440 |
except Exception:
|
| 1441 |
en_msg = raw_msg
|
| 1442 |
|
| 1443 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1444 |
topic = "general"
|
| 1445 |
try:
|
| 1446 |
if not topic_hint:
|
| 1447 |
-
with engine_knowledge.begin() as conn:
|
| 1448 |
-
rows = conn.execute(sql_text("SELECT DISTINCT topic FROM knowledge")).fetchall()
|
| 1449 |
-
known_topics = [r[0] for r in rows if r and r[0]]
|
| 1450 |
chosen = None
|
| 1451 |
-
|
| 1452 |
-
|
| 1453 |
-
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
-
|
| 1457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1458 |
if not chosen:
|
| 1459 |
-
# use our local fallback inference
|
| 1460 |
chosen = infer_topic_from_message(en_msg, known_topics)
|
|
|
|
|
|
|
|
|
|
| 1461 |
topic = chosen or "general"
|
| 1462 |
else:
|
| 1463 |
topic = topic_hint or "general"
|
| 1464 |
-
except Exception:
|
|
|
|
| 1465 |
topic = topic_hint or "general"
|
|
|
|
|
|
|
| 1466 |
|
| 1467 |
# Moderation
|
| 1468 |
flags = {}
|
|
@@ -1477,107 +1612,179 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 1477 |
except Exception:
|
| 1478 |
pass
|
| 1479 |
|
| 1480 |
-
#
|
| 1481 |
-
|
| 1482 |
-
with engine_knowledge.begin() as conn:
|
| 1483 |
-
rows = conn.execute(sql_text("SELECT id, text, reply, language, embedding FROM knowledge WHERE topic = :topic ORDER BY created_at DESC"), {"topic": topic}).fetchall()
|
| 1484 |
-
except Exception as e:
|
| 1485 |
-
record_request(time.time() - t0)
|
| 1486 |
-
return JSONResponse(status_code=500, content={"error": "failed to read knowledge", "details": str(e)})
|
| 1487 |
-
|
| 1488 |
-
knowledge_rows = [{"id": r[0], "text": r[1] or "", "reply": r[2] or "", "lang": r[3] or "und", "embedding": r[4]} for r in rows]
|
| 1489 |
|
| 1490 |
-
# Retrieval
|
| 1491 |
matches: List[str] = []
|
| 1492 |
confidence = 0.0
|
|
|
|
|
|
|
| 1493 |
try:
|
| 1494 |
-
# If we have an embed model
|
| 1495 |
-
|
| 1496 |
-
|
| 1497 |
-
|
| 1498 |
-
|
| 1499 |
-
|
| 1500 |
-
|
| 1501 |
-
|
| 1502 |
-
|
| 1503 |
-
|
| 1504 |
-
|
| 1505 |
-
|
| 1506 |
-
|
| 1507 |
-
|
| 1508 |
-
|
| 1509 |
-
|
| 1510 |
-
|
| 1511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1512 |
q_emb = torch.from_numpy(q_emb.cpu().numpy())
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1513 |
except Exception:
|
| 1514 |
-
|
| 1515 |
-
|
| 1516 |
-
|
| 1517 |
-
|
| 1518 |
-
|
| 1519 |
-
|
| 1520 |
-
|
| 1521 |
-
|
| 1522 |
-
|
| 1523 |
-
|
| 1524 |
-
|
| 1525 |
-
|
| 1526 |
-
|
| 1527 |
-
|
| 1528 |
-
|
| 1529 |
-
|
| 1530 |
-
|
| 1531 |
-
|
| 1532 |
-
|
| 1533 |
-
|
| 1534 |
-
|
| 1535 |
-
|
| 1536 |
-
|
| 1537 |
-
|
| 1538 |
-
|
| 1539 |
-
|
| 1540 |
-
|
| 1541 |
-
|
| 1542 |
-
|
| 1543 |
-
|
| 1544 |
-
|
| 1545 |
-
|
| 1546 |
-
|
| 1547 |
-
|
|
|
|
|
|
|
| 1548 |
try:
|
| 1549 |
-
|
| 1550 |
-
|
| 1551 |
-
|
| 1552 |
-
|
| 1553 |
-
|
| 1554 |
-
|
| 1555 |
-
|
| 1556 |
-
|
| 1557 |
-
|
| 1558 |
-
|
| 1559 |
-
|
| 1560 |
-
|
| 1561 |
-
|
| 1562 |
-
|
| 1563 |
-
|
| 1564 |
-
|
| 1565 |
-
|
| 1566 |
-
|
| 1567 |
-
|
| 1568 |
-
|
| 1569 |
-
|
| 1570 |
-
|
| 1571 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1572 |
cand = []
|
|
|
|
| 1573 |
for kr in knowledge_rows:
|
| 1574 |
txt = (kr["reply"] or kr["text"]) or ""
|
| 1575 |
-
|
| 1576 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1577 |
continue
|
| 1578 |
-
|
| 1579 |
-
|
| 1580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1581 |
except Exception as e:
|
| 1582 |
logger.warning(f"[retrieval] error: {e}")
|
| 1583 |
matches = []
|
|
@@ -1612,17 +1819,32 @@ async def chat(request: Request, data: dict = Body(...)):
|
|
| 1612 |
record_request(time.time() - t0)
|
| 1613 |
return {"reply": reply_final, "topic": topic, "language": reply_lang, "emoji": "", "confidence": round(confidence,2), "flags": flags}
|
| 1614 |
|
| 1615 |
-
# Post-process and translate
|
| 1616 |
reply_en = dedupe_sentences(reply_en)
|
| 1617 |
reply_final = reply_en
|
| 1618 |
-
|
| 1619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1620 |
try:
|
|
|
|
| 1621 |
reply_final = translate_from_english(reply_en, lang_code)
|
| 1622 |
reply_final = dedupe_sentences(reply_final)
|
|
|
|
| 1623 |
except Exception as exc:
|
| 1624 |
logger.warning(f"[translation] failed to translate reply_en -> {lang_code}: {exc}")
|
| 1625 |
reply_final = reply_en
|
|
|
|
|
|
|
| 1626 |
|
| 1627 |
# Mood & emoji append
|
| 1628 |
emoji = ""
|
|
|
|
| 111 |
except Exception:
|
| 112 |
detect_lang = None
|
| 113 |
|
| 114 |
+
# Optional fuzzy matching for spell tolerance
|
| 115 |
+
try:
|
| 116 |
+
from difflib import SequenceMatcher
|
| 117 |
+
FUZZY_AVAILABLE = True
|
| 118 |
+
except Exception:
|
| 119 |
+
FUZZY_AVAILABLE = False
|
| 120 |
+
|
| 121 |
# Moderator pipeline (optional)
|
| 122 |
moderator = None
|
| 123 |
try:
|
|
|
|
| 650 |
# -------------------------
|
| 651 |
# Simple fallback topic inference (NEW)
|
| 652 |
# -------------------------
|
| 653 |
+
def fuzzy_match_score(s1: str, s2: str) -> float:
|
| 654 |
+
"""
|
| 655 |
+
Calculate fuzzy match score between two strings (0.0 to 1.0).
|
| 656 |
+
Handles spell errors and variations.
|
| 657 |
+
"""
|
| 658 |
+
if not FUZZY_AVAILABLE:
|
| 659 |
+
return 1.0 if s1.lower() == s2.lower() else 0.0
|
| 660 |
+
return SequenceMatcher(None, s1.lower(), s2.lower()).ratio()
|
| 661 |
+
|
| 662 |
def infer_topic_from_message(msg: str, topics: List[str]) -> Optional[str]:
|
| 663 |
"""
|
| 664 |
Fallback topic inference: tries keyword matching against topic names and
|
|
|
|
| 667 |
if not msg or not topics:
|
| 668 |
return None
|
| 669 |
low = msg.lower()
|
| 670 |
+
|
| 671 |
+
# Try exact topic token matches first
|
| 672 |
for t in topics:
|
| 673 |
if not t:
|
| 674 |
continue
|
|
|
|
| 679 |
for w in re.split(r'[\s\-_]+', token):
|
| 680 |
if w and re.search(r'\b' + re.escape(w) + r'\b', low):
|
| 681 |
return t
|
| 682 |
+
|
| 683 |
+
# Try fuzzy matching for spell tolerance
|
| 684 |
+
if FUZZY_AVAILABLE:
|
| 685 |
+
best_match = None
|
| 686 |
+
best_score = 0.0
|
| 687 |
+
for t in topics:
|
| 688 |
+
if not t:
|
| 689 |
+
continue
|
| 690 |
+
token = str(t).lower()
|
| 691 |
+
# Check fuzzy match against whole message
|
| 692 |
+
score = fuzzy_match_score(token, low)
|
| 693 |
+
if score > 0.7 and score > best_score:
|
| 694 |
+
best_score = score
|
| 695 |
+
best_match = t
|
| 696 |
+
# Check fuzzy match against individual words
|
| 697 |
+
for word in low.split():
|
| 698 |
+
if len(word) > 3: # Only check meaningful words
|
| 699 |
+
score = fuzzy_match_score(token, word)
|
| 700 |
+
if score > 0.75 and score > best_score:
|
| 701 |
+
best_score = score
|
| 702 |
+
best_match = t
|
| 703 |
+
if best_match:
|
| 704 |
+
return best_match
|
| 705 |
+
|
| 706 |
# If no direct match, try heuristics: map some keywords to topics
|
| 707 |
heuristics = {
|
| 708 |
+
"security": ["security", "vulnerability", "exploit", "attack", "auth", "password", "login"],
|
| 709 |
+
"billing": ["bill", "invoice", "payment", "charge", "price", "cost"],
|
| 710 |
+
"installation": ["install", "setup", "deploy", "deployment", "configure"],
|
| 711 |
+
"general": ["help", "question", "how", "what", "why", "issue", "problem"]
|
| 712 |
}
|
| 713 |
for topic, kws in heuristics.items():
|
| 714 |
for kw in kws:
|
|
|
|
| 718 |
return topic
|
| 719 |
return None
|
| 720 |
|
| 721 |
+
def infer_topic_with_embeddings(msg: str, topics: List[str], knowledge_rows: List[dict]) -> Optional[str]:
|
| 722 |
+
"""
|
| 723 |
+
Use cosine similarity on embeddings to infer the best matching topic.
|
| 724 |
+
This provides semantic understanding instead of just keyword matching.
|
| 725 |
+
"""
|
| 726 |
+
if not embed_model or not topics or not knowledge_rows:
|
| 727 |
+
return None
|
| 728 |
+
|
| 729 |
+
try:
|
| 730 |
+
# Compute query embedding
|
| 731 |
+
q_emb = embed_model.encode([msg], convert_to_tensor=True, show_progress_bar=False)[0]
|
| 732 |
+
|
| 733 |
+
# Group knowledge by topic and compute average embedding per topic
|
| 734 |
+
topic_embeddings = {}
|
| 735 |
+
topic_counts = {}
|
| 736 |
+
|
| 737 |
+
for kr in knowledge_rows:
|
| 738 |
+
t = kr.get("topic", "general")
|
| 739 |
+
if t not in topics:
|
| 740 |
+
continue
|
| 741 |
+
emb_bytes = kr.get("embedding")
|
| 742 |
+
if emb_bytes is None:
|
| 743 |
+
continue
|
| 744 |
+
emb_tensor = bytes_to_tensor(emb_bytes)
|
| 745 |
+
if emb_tensor is None:
|
| 746 |
+
continue
|
| 747 |
+
|
| 748 |
+
if t not in topic_embeddings:
|
| 749 |
+
topic_embeddings[t] = emb_tensor
|
| 750 |
+
topic_counts[t] = 1
|
| 751 |
+
else:
|
| 752 |
+
topic_embeddings[t] = topic_embeddings[t] + emb_tensor
|
| 753 |
+
topic_counts[t] += 1
|
| 754 |
+
|
| 755 |
+
# Average the embeddings
|
| 756 |
+
for t in topic_embeddings:
|
| 757 |
+
topic_embeddings[t] = topic_embeddings[t] / topic_counts[t]
|
| 758 |
+
|
| 759 |
+
if not topic_embeddings:
|
| 760 |
+
return None
|
| 761 |
+
|
| 762 |
+
# Compute cosine similarity with each topic
|
| 763 |
+
best_topic = None
|
| 764 |
+
best_score = 0.0
|
| 765 |
+
|
| 766 |
+
for t, t_emb in topic_embeddings.items():
|
| 767 |
+
try:
|
| 768 |
+
score = float(torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), t_emb.unsqueeze(0), dim=1)[0])
|
| 769 |
+
if score > best_score:
|
| 770 |
+
best_score = score
|
| 771 |
+
best_topic = t
|
| 772 |
+
except Exception:
|
| 773 |
+
continue
|
| 774 |
+
|
| 775 |
+
# Only return if confidence is high enough
|
| 776 |
+
if best_score > 0.4:
|
| 777 |
+
logger.info(f"[topic inference] embedding-based: {best_topic} (score={best_score:.2f})")
|
| 778 |
+
return best_topic
|
| 779 |
+
|
| 780 |
+
except Exception as e:
|
| 781 |
+
logger.debug(f"[topic inference] embedding error: {e}")
|
| 782 |
+
|
| 783 |
+
return None
|
| 784 |
+
|
| 785 |
# -------------------------
|
| 786 |
# Boilerplate detection & reply helpers
|
| 787 |
# -------------------------
|
|
|
|
| 1545 |
except Exception:
|
| 1546 |
en_msg = raw_msg
|
| 1547 |
|
| 1548 |
+
# Load ALL knowledge entries first (needed for embedding-based topic inference)
|
| 1549 |
+
try:
|
| 1550 |
+
with engine_knowledge.begin() as conn:
|
| 1551 |
+
all_rows = conn.execute(sql_text("SELECT id, text, reply, language, embedding, topic FROM knowledge ORDER BY created_at DESC")).fetchall()
|
| 1552 |
+
except Exception as e:
|
| 1553 |
+
record_request(time.time() - t0)
|
| 1554 |
+
return JSONResponse(status_code=500, content={"error": "failed to read knowledge", "details": str(e)})
|
| 1555 |
+
|
| 1556 |
+
all_knowledge_rows = [{"id": r[0], "text": r[1] or "", "reply": r[2] or "", "lang": r[3] or "und", "embedding": r[4], "topic": r[5] or "general"} for r in all_rows]
|
| 1557 |
+
|
| 1558 |
+
# Get list of known topics
|
| 1559 |
+
known_topics = list(set([kr.get("topic", "general") for kr in all_knowledge_rows if kr.get("topic")]))
|
| 1560 |
+
|
| 1561 |
+
# Determine topic: Embeddings first (best), then Ollama, then keyword matching
|
| 1562 |
topic = "general"
|
| 1563 |
try:
|
| 1564 |
if not topic_hint:
|
|
|
|
|
|
|
|
|
|
| 1565 |
chosen = None
|
| 1566 |
+
|
| 1567 |
+
# 1. Try embedding-based topic inference (BEST - semantic understanding)
|
| 1568 |
+
if embed_model is not None and all_knowledge_rows:
|
| 1569 |
+
try:
|
| 1570 |
+
chosen = infer_topic_with_embeddings(en_msg, known_topics, all_knowledge_rows)
|
| 1571 |
+
if chosen:
|
| 1572 |
+
logger.info(f"[topic] Selected via embeddings: {chosen}")
|
| 1573 |
+
except Exception as e:
|
| 1574 |
+
logger.debug(f"[topic] embedding inference failed: {e}")
|
| 1575 |
+
|
| 1576 |
+
# 2. Fallback to Ollama if embeddings didn't work
|
| 1577 |
+
if not chosen:
|
| 1578 |
+
try:
|
| 1579 |
+
if (ollama_http_available() or ollama_cli_available()) and known_topics:
|
| 1580 |
+
possible = infer_topic_with_ollama(en_msg, known_topics)
|
| 1581 |
+
if possible:
|
| 1582 |
+
chosen = possible
|
| 1583 |
+
logger.info(f"[topic] Selected via Ollama: {chosen}")
|
| 1584 |
+
except Exception as e:
|
| 1585 |
+
logger.debug(f"[topic] ollama inference failed: {e}")
|
| 1586 |
+
|
| 1587 |
+
# 3. Final fallback to keyword/fuzzy matching
|
| 1588 |
if not chosen:
|
|
|
|
| 1589 |
chosen = infer_topic_from_message(en_msg, known_topics)
|
| 1590 |
+
if chosen:
|
| 1591 |
+
logger.info(f"[topic] Selected via keyword/fuzzy: {chosen}")
|
| 1592 |
+
|
| 1593 |
topic = chosen or "general"
|
| 1594 |
else:
|
| 1595 |
topic = topic_hint or "general"
|
| 1596 |
+
except Exception as e:
|
| 1597 |
+
logger.warning(f"[topic] inference error: {e}")
|
| 1598 |
topic = topic_hint or "general"
|
| 1599 |
+
|
| 1600 |
+
logger.info(f"[chat] Final topic: {topic}")
|
| 1601 |
|
| 1602 |
# Moderation
|
| 1603 |
flags = {}
|
|
|
|
| 1612 |
except Exception:
|
| 1613 |
pass
|
| 1614 |
|
| 1615 |
+
# Filter knowledge entries for this topic only
|
| 1616 |
+
knowledge_rows = [kr for kr in all_knowledge_rows if kr.get("topic") == topic]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1617 |
|
| 1618 |
+
# Retrieval using cosine similarity with spell tolerance
|
| 1619 |
matches: List[str] = []
|
| 1620 |
confidence = 0.0
|
| 1621 |
+
match_lang = "en"
|
| 1622 |
+
|
| 1623 |
try:
|
| 1624 |
+
# If we have an embed model, use semantic similarity (BEST approach)
|
| 1625 |
+
if embed_model is not None and knowledge_rows:
|
| 1626 |
+
stored_embs = []
|
| 1627 |
+
stored_indices = []
|
| 1628 |
+
|
| 1629 |
+
# Collect stored embeddings
|
| 1630 |
+
for i, kr in enumerate(knowledge_rows):
|
| 1631 |
+
if kr.get("embedding") is not None:
|
| 1632 |
+
t = bytes_to_tensor(kr["embedding"])
|
| 1633 |
+
if t is not None:
|
| 1634 |
+
stored_embs.append(t)
|
| 1635 |
+
stored_indices.append(i)
|
| 1636 |
+
|
| 1637 |
+
# Use stored embeddings if available
|
| 1638 |
+
if torch is not None and stored_embs:
|
| 1639 |
+
try:
|
| 1640 |
+
# Stack stored embeddings
|
| 1641 |
+
embs_tensor = torch.stack(stored_embs)
|
| 1642 |
+
|
| 1643 |
+
# Compute query embedding
|
| 1644 |
+
q_emb = await run_blocking_with_timeout(
|
| 1645 |
+
lambda: embed_model.encode([en_msg], convert_to_tensor=True, show_progress_bar=False)[0],
|
| 1646 |
+
timeout=MODEL_TIMEOUT
|
| 1647 |
+
)
|
| 1648 |
+
|
| 1649 |
+
if not isinstance(q_emb, torch.Tensor):
|
| 1650 |
q_emb = torch.from_numpy(q_emb.cpu().numpy())
|
| 1651 |
+
|
| 1652 |
+
# Compute cosine similarity
|
| 1653 |
+
try:
|
| 1654 |
+
scores = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), embs_tensor, dim=1)
|
| 1655 |
except Exception:
|
| 1656 |
+
scores = torch.nn.functional.cosine_similarity(embs_tensor, q_emb.unsqueeze(0), dim=1)
|
| 1657 |
+
|
| 1658 |
+
# Collect candidates with scores
|
| 1659 |
+
cand = []
|
| 1660 |
+
for idx, s in enumerate(scores):
|
| 1661 |
+
i_orig = stored_indices[idx]
|
| 1662 |
+
kr = knowledge_rows[i_orig]
|
| 1663 |
+
candidate_text = (kr["reply"] or kr["text"]).strip()
|
| 1664 |
+
|
| 1665 |
+
if is_boilerplate_candidate(candidate_text):
|
| 1666 |
+
continue
|
| 1667 |
+
|
| 1668 |
+
s_float = float(s)
|
| 1669 |
+
# Lower threshold for better recall
|
| 1670 |
+
if s_float >= 0.25:
|
| 1671 |
+
cand.append({
|
| 1672 |
+
"text": candidate_text,
|
| 1673 |
+
"lang": kr["lang"],
|
| 1674 |
+
"score": s_float
|
| 1675 |
+
})
|
| 1676 |
+
|
| 1677 |
+
# Sort by score
|
| 1678 |
+
cand = sorted(cand, key=lambda x: -x["score"])
|
| 1679 |
+
matches = [c["text"] for c in cand[:5]] # Top 5 matches
|
| 1680 |
+
confidence = float(cand[0]["score"]) if cand else 0.0
|
| 1681 |
+
match_lang = cand[0]["lang"] if cand else "en"
|
| 1682 |
+
|
| 1683 |
+
logger.info(f"[retrieval] Found {len(matches)} matches via embeddings, best score: {confidence:.2f}")
|
| 1684 |
+
|
| 1685 |
+
except asyncio.TimeoutError:
|
| 1686 |
+
logger.warning("[retrieval] embedding encode timed out")
|
| 1687 |
+
except Exception as e:
|
| 1688 |
+
logger.warning(f"[retrieval] embedding error: {e}")
|
| 1689 |
+
|
| 1690 |
+
# Fallback: compute embeddings on the fly if no stored embeddings
|
| 1691 |
+
if not matches and knowledge_rows:
|
| 1692 |
try:
|
| 1693 |
+
texts = [kr["text"] for kr in knowledge_rows]
|
| 1694 |
+
embs = await run_blocking_with_timeout(
|
| 1695 |
+
lambda: embed_model.encode(texts, convert_to_tensor=True, show_progress_bar=False),
|
| 1696 |
+
timeout=MODEL_TIMEOUT
|
| 1697 |
+
)
|
| 1698 |
+
q_emb = await run_blocking_with_timeout(
|
| 1699 |
+
lambda: embed_model.encode([en_msg], convert_to_tensor=True, show_progress_bar=False)[0],
|
| 1700 |
+
timeout=MODEL_TIMEOUT
|
| 1701 |
+
)
|
| 1702 |
+
|
| 1703 |
+
try:
|
| 1704 |
+
scores = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), embs, dim=1)
|
| 1705 |
+
except Exception:
|
| 1706 |
+
scores = torch.nn.functional.cosine_similarity(embs, q_emb.unsqueeze(0), dim=1)
|
| 1707 |
+
|
| 1708 |
+
cand = []
|
| 1709 |
+
for i in range(scores.shape[0]):
|
| 1710 |
+
s = float(scores[i])
|
| 1711 |
+
kr = knowledge_rows[i]
|
| 1712 |
+
candidate_text = (kr["reply"] or kr["text"]).strip()
|
| 1713 |
+
|
| 1714 |
+
if is_boilerplate_candidate(candidate_text):
|
| 1715 |
+
continue
|
| 1716 |
+
|
| 1717 |
+
if s >= 0.25:
|
| 1718 |
+
cand.append({
|
| 1719 |
+
"text": candidate_text,
|
| 1720 |
+
"lang": kr["lang"],
|
| 1721 |
+
"score": s
|
| 1722 |
+
})
|
| 1723 |
+
|
| 1724 |
+
cand = sorted(cand, key=lambda x: -x["score"])
|
| 1725 |
+
matches = [c["text"] for c in cand[:5]]
|
| 1726 |
+
confidence = float(cand[0]["score"]) if cand else 0.0
|
| 1727 |
+
match_lang = cand[0]["lang"] if cand else "en"
|
| 1728 |
+
|
| 1729 |
+
logger.info(f"[retrieval] Found {len(matches)} matches via on-the-fly embeddings, best score: {confidence:.2f}")
|
| 1730 |
+
|
| 1731 |
+
except asyncio.TimeoutError:
|
| 1732 |
+
logger.warning("[retrieval] embedding encode timed out")
|
| 1733 |
+
except Exception as e:
|
| 1734 |
+
logger.warning(f"[retrieval] embedding error: {e}")
|
| 1735 |
+
|
| 1736 |
+
# Final fallback: fuzzy keyword matching with spell tolerance
|
| 1737 |
+
if not matches and knowledge_rows:
|
| 1738 |
+
logger.info("[retrieval] Using fuzzy keyword matching fallback")
|
| 1739 |
cand = []
|
| 1740 |
+
|
| 1741 |
for kr in knowledge_rows:
|
| 1742 |
txt = (kr["reply"] or kr["text"]) or ""
|
| 1743 |
+
txt_lower = txt.lower()
|
| 1744 |
+
msg_lower = en_msg.lower()
|
| 1745 |
+
|
| 1746 |
+
# Exact substring match
|
| 1747 |
+
if msg_lower in txt_lower:
|
| 1748 |
+
if not is_boilerplate_candidate(txt):
|
| 1749 |
+
cand.append({"text": txt, "lang": kr["lang"], "score": 0.8})
|
| 1750 |
continue
|
| 1751 |
+
|
| 1752 |
+
# Fuzzy matching for spell tolerance
|
| 1753 |
+
if FUZZY_AVAILABLE and len(en_msg) > 3:
|
| 1754 |
+
# Check fuzzy match against text
|
| 1755 |
+
fuzzy_score = fuzzy_match_score(en_msg, txt)
|
| 1756 |
+
if fuzzy_score > 0.6:
|
| 1757 |
+
if not is_boilerplate_candidate(txt):
|
| 1758 |
+
cand.append({"text": txt, "lang": kr["lang"], "score": fuzzy_score * 0.7})
|
| 1759 |
+
continue
|
| 1760 |
+
|
| 1761 |
+
# Check fuzzy match against individual words
|
| 1762 |
+
msg_words = [w for w in msg_lower.split() if len(w) > 3]
|
| 1763 |
+
txt_words = [w for w in txt_lower.split() if len(w) > 3]
|
| 1764 |
+
|
| 1765 |
+
for msg_word in msg_words:
|
| 1766 |
+
for txt_word in txt_words:
|
| 1767 |
+
word_score = fuzzy_match_score(msg_word, txt_word)
|
| 1768 |
+
if word_score > 0.75:
|
| 1769 |
+
if not is_boilerplate_candidate(txt):
|
| 1770 |
+
cand.append({"text": txt, "lang": kr["lang"], "score": word_score * 0.5})
|
| 1771 |
+
break
|
| 1772 |
+
|
| 1773 |
+
# Remove duplicates and sort
|
| 1774 |
+
seen = set()
|
| 1775 |
+
unique_cand = []
|
| 1776 |
+
for c in cand:
|
| 1777 |
+
if c["text"] not in seen:
|
| 1778 |
+
seen.add(c["text"])
|
| 1779 |
+
unique_cand.append(c)
|
| 1780 |
+
|
| 1781 |
+
cand = sorted(unique_cand, key=lambda x: -x["score"])
|
| 1782 |
+
matches = [c["text"] for c in cand[:5]]
|
| 1783 |
+
confidence = float(cand[0]["score"]) if cand else 0.0
|
| 1784 |
+
match_lang = cand[0]["lang"] if cand else "en"
|
| 1785 |
+
|
| 1786 |
+
logger.info(f"[retrieval] Found {len(matches)} matches via fuzzy matching, best score: {confidence:.2f}")
|
| 1787 |
+
|
| 1788 |
except Exception as e:
|
| 1789 |
logger.warning(f"[retrieval] error: {e}")
|
| 1790 |
matches = []
|
|
|
|
| 1819 |
record_request(time.time() - t0)
|
| 1820 |
return {"reply": reply_final, "topic": topic, "language": reply_lang, "emoji": "", "confidence": round(confidence,2), "flags": flags}
|
| 1821 |
|
| 1822 |
+
# Post-process and translate back to user's language
|
| 1823 |
reply_en = dedupe_sentences(reply_en)
|
| 1824 |
reply_final = reply_en
|
| 1825 |
+
|
| 1826 |
+
# Determine target language for translation
|
| 1827 |
+
target_lang = reply_lang if reply_lang and reply_lang not in ("en", "eng", "und", "") else None
|
| 1828 |
+
|
| 1829 |
+
# If match was in a different language, try to use that
|
| 1830 |
+
if match_lang and match_lang not in ("en", "eng", "und", ""):
|
| 1831 |
+
# If user's language matches the match language, use it
|
| 1832 |
+
if target_lang and target_lang.split("-")[0].lower() == match_lang.split("-")[0].lower():
|
| 1833 |
+
target_lang = match_lang
|
| 1834 |
+
|
| 1835 |
+
# Translate to user's language
|
| 1836 |
+
if target_lang:
|
| 1837 |
+
lang_code = target_lang.split("-")[0].lower()
|
| 1838 |
try:
|
| 1839 |
+
logger.info(f"[translation] Translating reply from en to {lang_code}")
|
| 1840 |
reply_final = translate_from_english(reply_en, lang_code)
|
| 1841 |
reply_final = dedupe_sentences(reply_final)
|
| 1842 |
+
logger.info(f"[translation] Translation successful")
|
| 1843 |
except Exception as exc:
|
| 1844 |
logger.warning(f"[translation] failed to translate reply_en -> {lang_code}: {exc}")
|
| 1845 |
reply_final = reply_en
|
| 1846 |
+
else:
|
| 1847 |
+
logger.info("[translation] No translation needed, using English")
|
| 1848 |
|
| 1849 |
# Mood & emoji append
|
| 1850 |
emoji = ""
|