Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

App Files Files Community

Eyob-Sol commited on Sep 18

Commit

ac1f51b

verified ·

1 Parent(s): 5645c33

Upload 41 files

Browse files

Files changed (24) hide show

.gitattributes +1 -0
README.md +1 -0
app.py +6 -10
app/__pycache__/gradio_app.cpython-312.pyc +0 -0
app/catalog.py +89 -39
app/gradio_app.py +51 -42
app/intent_schema.py +48 -4
app/orchestrator.py +117 -15
app/policy.py +105 -18
app/sim_api.py +92 -53
app/sim_api_bridge.py +28 -0
app/tools.py +129 -16
models/.DS_Store +0 -0
models/__pycache__/asr_whisper.cpython-312.pyc +0 -0
models/__pycache__/llm_chat.cpython-312.pyc +0 -0
models/__pycache__/tts_router.cpython-312.pyc +0 -0
models/asr_whisper.py +50 -15
models/llm_chat.py +85 -39
models/llm_router.py +27 -10
models/tts_router.py +34 -42
requirements.txt +6 -7
runtime/audio/tts_8eda72f9b61c4b13a04c70a4b1f1a997.wav +3 -0
utils/__pycache__/config.cpython-312.pyc +0 -0
utils/config.py +31 -14

.gitattributes CHANGED Viewed

@@ -37,3 +37,4 @@ runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge
 runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
 runtime/audio/tts_4056705ada224a0092325b697c975501.wav filter=lfs diff=lfs merge=lfs -text
 models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text

 runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
 runtime/audio/tts_4056705ada224a0092325b697c975501.wav filter=lfs diff=lfs merge=lfs -text
 models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+runtime/audio/tts_8eda72f9b61c4b13a04c70a4b1f1a997.wav filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ emoji: ☎️
 colorFrom: indigo
 colorTo: blue
 sdk: gradio
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: indigo
 colorTo: blue
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -1,18 +1,14 @@
 from app.gradio_app import build_demo
-# in app.py or when building the demo
 from models.tts_router import cleanup_old_audio
-cleanup_old_audio(keep_latest=None)  # removes all existing tts_*.wav on boot
-from utils.startup_models import ensure_model
-# - LLM (GGUF): Qwen/Qwen2.5-1.5B-Instruct-GGUF → q4_k_m (~0.7 GB)
-llm_dir = ensure_model("Qwen/Qwen2.5-1.5B-Instruct-GGUF", "*.gguf")
-# - Piper voice (~100–200 MB depending on voice)
-piper_dir = ensure_model("rhasspy/piper-voices", "en/en_US/en_US-amy-medium.onnx")
 def main():
     demo = build_demo()
-    demo.launch(share=True, server_port=7860, inbrowser=False)
 if __name__ == "__main__":
     main()

+# app.py
 from app.gradio_app import build_demo
 from models.tts_router import cleanup_old_audio
 def main():
+    # Clean up old TTS files on boot
+    cleanup_old_audio(keep_latest=None)
     demo = build_demo()
+    # Don’t set server_name/server_port; HF will handle it.
+    demo.launch(share=True)
 if __name__ == "__main__":
     main()

app/__pycache__/gradio_app.cpython-312.pyc CHANGED Viewed

Binary files a/app/__pycache__/gradio_app.cpython-312.pyc and b/app/__pycache__/gradio_app.cpython-312.pyc differ

app/catalog.py CHANGED Viewed

@@ -1,85 +1,137 @@
 # app/catalog.py
 from __future__ import annotations
-import json, os
-from typing import Dict, Any, List, Optional
-_CATALOG: Dict[str, Any] | None = None
 def get_catalog_path() -> str:
     here = os.path.dirname(os.path.abspath(__file__))
     root = os.path.dirname(here)
     return os.path.join(root, "data", "menu_catalog.json")
-def load_catalog() -> Dict[str, Any]:
     global _CATALOG
-    if _CATALOG is not None:
         return _CATALOG
     path = get_catalog_path()
-    with open(path, "r", encoding="utf-8") as f:
-        _CATALOG = json.load(f)
     return _CATALOG
 def find_item_by_name(name: str) -> Optional[Dict[str, Any]]:
     c = load_catalog()
-    name_l = (name or "").strip().lower()
     for it in c["items"]:
-        if it["name"].lower() == name_l:
-            return it
-        # lightweight alias match
-        if name_l in it["name"].lower():
             return it
     return None
 def find_item_by_sku(sku: str) -> Optional[Dict[str, Any]]:
     c = load_catalog()
     for it in c["items"]:
-        if it["sku"] == sku:
             return it
     return None
 def required_fields_for_category(category: str) -> List[str]:
     c = load_catalog()
     schema = c["schema"].get(category) or {}
-    return list(schema.get("required_fields") or [])
 def optional_fields_for_category(category: str) -> List[str]:
     c = load_catalog()
     schema = c["schema"].get(category) or {}
-    return list(schema.get("optional_fields") or [])
 def compute_missing_fields(order_item: Dict[str, Any]) -> List[str]:
     """
-    order_item: {"name": "...", "sku": optional, "qty": int, "<opts>": ...}
     Uses catalog schema to see which fields are missing.
     """
-    it = None
-    if "sku" in order_item:
-        it = find_item_by_sku(order_item["sku"])
-    if not it and "name" in order_item:
-        it = find_item_by_name(order_item["name"])
     if not it:
         return ["name"]  # we don’t even know the item yet
     category = it["category"]
     req = set(required_fields_for_category(category))
-    present = set([k for k in order_item.keys() if k in req or k == "qty" or k == "name" or k == "sku"])
-    # qty normalization: consider qty present if >=1
-    if "qty" in req and (order_item.get("qty") is None or int(order_item.get("qty", 0)) < 1):
-        # keep qty “missing”
-        pass
     else:
-        present.add("qty")
     missing = [f for f in req if f not in present]
     return missing
 def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
-    it = None
-    if "sku" in order_item:
-        it = find_item_by_sku(order_item["sku"])
-    if not it and "name" in order_item:
-        it = find_item_by_name(order_item["name"])
     if not it:
         return "Which item would you like to order?"
@@ -87,19 +139,16 @@ def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
     req = required_fields_for_category(category)
     opt = optional_fields_for_category(category)
-    parts = []
-    opt_txt = ""
-    if opt:
-        opt_txt = f" Optional: {', '.join(opt)}."
     if req:
         parts.append(f"I need {', '.join(req)} for {it['name']}.{opt_txt}")
     else:
         parts.append(f"Please specify quantity for {it['name']}.{opt_txt}")
-    # Also list choices for required options
-    # e.g., size choices
     opts = it.get("options") or {}
-    choice_bits = []
     for k, spec in opts.items():
         if spec.get("required"):
             choices = spec.get("choices") or []
@@ -107,4 +156,5 @@ def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
                 choice_bits.append(f"{k}: {', '.join(choices)}")
     if choice_bits:
         parts.append("Choices → " + " | ".join(choice_bits))
     return " ".join(parts)

 # app/catalog.py
 from __future__ import annotations
+import json
+import os
+import re
+from typing import Dict, Any, List, Optional, Tuple
+# In-memory singleton
+_CATALOG: Optional[Dict[str, Any]] = None
+def _norm(s: str) -> str:
+    """lightweight normalization for fuzzy-ish equality"""
+    return re.sub(r"\s+", " ", (s or "").strip().lower())
 def get_catalog_path() -> str:
+    """
+    Resolve catalog path in this order:
+      1) ENV CAFE_CATALOG_PATH
+      2) repo-relative data/menu_catalog.json (current default)
+    """
+    env_path = os.getenv("CAFE_CATALOG_PATH")
+    if env_path:
+        return env_path
     here = os.path.dirname(os.path.abspath(__file__))
     root = os.path.dirname(here)
     return os.path.join(root, "data", "menu_catalog.json")
+def _load_from_disk(path: str) -> Dict[str, Any]:
+    if not os.path.exists(path):
+        raise FileNotFoundError(
+            f"Catalog not found at {path}. "
+            "Set CAFE_CATALOG_PATH in your .env or place data/menu_catalog.json."
+        )
+    with open(path, "r", encoding="utf-8") as f:
+        try:
+            data = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Catalog JSON invalid at {path}: {e}") from e
+    # quick shape checks (non-fatal, but helpful)
+    if not isinstance(data, dict) or "items" not in data or "schema" not in data:
+        raise ValueError("Catalog must contain top-level keys: 'items' and 'schema'.")
+    if not isinstance(data["items"], list):
+        raise ValueError("'items' must be a list.")
+    if not isinstance(data["schema"], dict):
+        raise ValueError("'schema' must be a dict.")
+    return data
+def load_catalog(force_reload: bool = False) -> Dict[str, Any]:
     global _CATALOG
+    if _CATALOG is not None and not force_reload:
         return _CATALOG
     path = get_catalog_path()
+    _CATALOG = _load_from_disk(path)
     return _CATALOG
 def find_item_by_name(name: str) -> Optional[Dict[str, Any]]:
     c = load_catalog()
+    q = _norm(name)
+    if not q:
+        return None
     for it in c["items"]:
+        nm = _norm(it.get("name", ""))
+        if q == nm or q in nm:
             return it
+        # optional alias list support: ["alias1", "alias2"]
+        for alias in it.get("aliases", []) or []:
+            if q == _norm(alias) or q in _norm(alias):
+                return it
     return None
 def find_item_by_sku(sku: str) -> Optional[Dict[str, Any]]:
     c = load_catalog()
+    target = (sku or "").strip()
+    if not target:
+        return None
     for it in c["items"]:
+        if str(it.get("sku", "")).strip() == target:
             return it
     return None
 def required_fields_for_category(category: str) -> List[str]:
     c = load_catalog()
     schema = c["schema"].get(category) or {}
+    rf = schema.get("required_fields") or []
+    return list(rf)
 def optional_fields_for_category(category: str) -> List[str]:
     c = load_catalog()
     schema = c["schema"].get(category) or {}
+    of = schema.get("optional_fields") or []
+    return list(of)
+def _resolve_item(order_item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    it: Optional[Dict[str, Any]] = None
+    if order_item.get("sku"):
+        it = find_item_by_sku(str(order_item["sku"]))
+    if not it and order_item.get("name"):
+        it = find_item_by_name(str(order_item["name"]))
+    return it
 def compute_missing_fields(order_item: Dict[str, Any]) -> List[str]:
     """
+    order_item example:
+      {"name": "Margherita Pizza", "qty": 2, "size": "large", ...}
     Uses catalog schema to see which fields are missing.
     """
+    it = _resolve_item(order_item)
     if not it:
         return ["name"]  # we don’t even know the item yet
     category = it["category"]
     req = set(required_fields_for_category(category))
+    present = set(k for k in order_item.keys() if k in req or k in {"qty", "name", "sku"})
+    # qty normalization: consider qty present if >= 1
+    if "qty" in req:
+        try:
+            q = int(order_item.get("qty", 0))
+        except Exception:
+            q = 0
+        if q >= 1:
+            present.add("qty")
+        # else leave "qty" missing
     else:
+        # even if qty isn't required, count it as present if provided
+        if order_item.get("qty") is not None:
+            present.add("qty")
     missing = [f for f in req if f not in present]
     return missing
 def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
+    it = _resolve_item(order_item)
     if not it:
         return "Which item would you like to order?"
     req = required_fields_for_category(category)
     opt = optional_fields_for_category(category)
+    parts: List[str] = []
+    opt_txt = f" Optional: {', '.join(opt)}." if opt else ""
     if req:
         parts.append(f"I need {', '.join(req)} for {it['name']}.{opt_txt}")
     else:
         parts.append(f"Please specify quantity for {it['name']}.{opt_txt}")
+    # Also list choices for required options (e.g., size choices)
     opts = it.get("options") or {}
+    choice_bits: List[str] = []
     for k, spec in opts.items():
         if spec.get("required"):
             choices = spec.get("choices") or []
                 choice_bits.append(f"{k}: {', '.join(choices)}")
     if choice_bits:
         parts.append("Choices → " + " | ".join(choice_bits))
     return " ".join(parts)

app/gradio_app.py CHANGED Viewed

@@ -1,18 +1,10 @@
 # app/gradio_app.py
 from __future__ import annotations
-import os
-import time
-import shutil
-import uuid
 from typing import List, Dict, Any, Tuple
 import gradio as gr
-# ---- External modules we rely on (light, stable) ----
-# - ASR: faster-whisper wrapper you already have
-# - TTS: local piper/ say via models/tts_router.py
-# - LLM: optional local model; if missing, we fallback to a safe canned reply
 try:
     from models.asr_whisper import get_asr
 except Exception:
@@ -23,9 +15,12 @@ try:
 except Exception:
     llm_respond_chat = None
-from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
-from models.tts_router import tts_synthesize, cleanup_old_audio, AUDIO_DIR
-import shutil, uuid, os
 # =============================================================================
 # Helpers (pure, modular)
@@ -33,17 +28,24 @@ import shutil, uuid, os
 def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
     """
-    Ask the chat LLM for a response. If it's not available, use a reasonable fallback.
     """
-    if llm_respond_chat is not None:
-        try:
-            # policy guard is optional; pass an empty dict
-            bot_text, _guard, _diag = llm_respond_chat(history or [], user_text, {})
-            if isinstance(bot_text, str) and bot_text.strip():
-                return bot_text.strip()
-        except Exception as e:
-            print("[LLM] fallback due to error:", e)
-    # Fallback (LLM unavailable or failed)
     return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
@@ -267,8 +269,8 @@ def build_demo():
                 * delete the user clip immediately after ASR
                 * delete all older TTS, keep only latest one
             - Append transcript pairs to voice chat state
             """
-            import time
             empty_diag = {
                 "intent": None,
                 "slots": {},
@@ -277,23 +279,24 @@ def build_demo():
                 "latency_ms": 0,
             }
             if not aud_path:
-                return (
-                    voice_hist or [],
-                    None,         # assistant_audio
-                    empty_diag,
-                    None,         # clear recorder (handled elsewhere if you chain a clear)
-                    voice_hist or []
-                )
             t0 = time.time()
-            # 1) Stabilize mic path into runtime/audio
             stable_user = _persist_copy(aud_path)
             # 2) Transcribe
             try:
-                asr = get_asr()
-                asr_out = asr.transcribe(stable_user)
-                transcript = (asr_out.get("text") or "").strip() or "(no speech detected)"
             finally:
                 # Remove the user clip ASAP to keep the folder small
                 if stable_user and os.path.exists(stable_user):
@@ -302,18 +305,24 @@ def build_demo():
                     except Exception as e:
                         print("[CLEANUP] Could not delete user clip:", e)
-            # 3) Get bot reply (LLM response)
             try:
                 from models.llm_chat import respond_chat_voice
             except Exception:
-                # Fallback: reuse text chat function if you don’t have a voice-specific one
                 from models.llm_chat import respond_chat as respond_chat_voice
-            bot_text, new_policy, policy_diag = respond_chat_voice(voice_hist or [], transcript, {})
-            # 4) TTS the bot reply into runtime/audio
-            new_tts = tts_synthesize(bot_text)  # this writes into runtime/audio
-            # Keep only the latest TTS (delete older tts_*.wav)
             cleanup_old_audio(keep_latest=new_tts)
             # 5) Append to voice chat state (text transcripts)
@@ -334,8 +343,8 @@ def build_demo():
                 "latency_ms": int((time.time() - t0) * 1000),
             }
-            # Return: (voice_chat, assistant_audio_path, diag, recorder_clear, voice_state)
-            return new_hist, new_tts, diag, gr.update(value=None), new_hist
         def on_text_send(txt: str, hist: List[Dict[str, str]]):
             new_hist, diag, clear_text = handle_text_turn(txt, hist or [])

 # app/gradio_app.py
 from __future__ import annotations
 from typing import List, Dict, Any, Tuple
+import os, time, shutil, uuid
 import gradio as gr
 try:
     from models.asr_whisper import get_asr
 except Exception:
 except Exception:
     llm_respond_chat = None
+from models.tts_router import (
+    tts_synthesize,
+    ensure_runtime_audio_dir,
+    cleanup_old_audio,
+    AUDIO_DIR,
+)
 # =============================================================================
 # Helpers (pure, modular)
 def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
     """
+    Try local LLM. If it's missing or errors, log loudly and return a safe fallback.
     """
+    if llm_respond_chat is None:
+        print("[LLM] respond_chat not imported; using fallback.")
+        return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
+    try:
+        bot_text, _guard, _diag = llm_respond_chat(history or [], user_text, {})
+        if isinstance(bot_text, str) and bot_text.strip():
+            print("[LLM] returned:", bot_text[:120].replace("\n"," "))
+            return bot_text.strip()
+        else:
+            print("[LLM] empty/invalid response; using fallback.")
+    except Exception as e:
+        import traceback
+        print("[LLM] error -> fallback:", repr(e))
+        traceback.print_exc()
     return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
                 * delete the user clip immediately after ASR
                 * delete all older TTS, keep only latest one
             - Append transcript pairs to voice chat state
+            Returns exactly: (voice_chat_messages, assistant_audio_path, diag_json)
             """
             empty_diag = {
                 "intent": None,
                 "slots": {},
                 "latency_ms": 0,
             }
             if not aud_path:
+                return (voice_hist or []), None, empty_diag
             t0 = time.time()
+            # 1) Stabilize mic path into runtime/audio (ENV-driven via AUDIO_DIR)
             stable_user = _persist_copy(aud_path)
             # 2) Transcribe
+            transcript = "(transcription failed)"
             try:
+                if get_asr is None:
+                    transcript = "(ASR unavailable)"
+                else:
+                    asr = get_asr()
+                    asr_out = asr.transcribe(stable_user)
+                    transcript = (asr_out.get("text") or "").strip() or "(no speech detected)"
+            except Exception as e:
+                print("[ASR] error:", e)
             finally:
                 # Remove the user clip ASAP to keep the folder small
                 if stable_user and os.path.exists(stable_user):
                     except Exception as e:
                         print("[CLEANUP] Could not delete user clip:", e)
+            # 3) Get bot reply (LLM response) – env/model-path handled inside models/llm_chat
             try:
                 from models.llm_chat import respond_chat_voice
             except Exception:
                 from models.llm_chat import respond_chat as respond_chat_voice
+            try:
+                bot_text, new_policy, policy_diag = respond_chat_voice(voice_hist or [], transcript, {})
+            except Exception as e:
+                print("[LLM] voice fallback due to error:", e)
+                bot_text, new_policy, policy_diag = (
+                    "Hello! How can I help with FutureCafe today?",
+                    {},
+                    {},
+                )
+            # 4) TTS the bot reply into runtime/audio (ENV-driven path via tts_router)
+            new_tts = tts_synthesize(bot_text)  # path in VOICE_AUDIO_DIR
             cleanup_old_audio(keep_latest=new_tts)
             # 5) Append to voice chat state (text transcripts)
                 "latency_ms": int((time.time() - t0) * 1000),
             }
+            # Return EXACTLY the 3 outputs you wired: (voice_chat, assistant_audio, call_diag)
+            return new_hist, new_tts, diag
         def on_text_send(txt: str, hist: List[Dict[str, str]]):
             new_hist, diag, clear_text = handle_text_turn(txt, hist or [])

app/intent_schema.py CHANGED Viewed

@@ -1,10 +1,19 @@
 # app/intent_schema.py
 from __future__ import annotations
-from typing import List, Optional, Literal
-from pydantic import BaseModel, Field
-IntentName = Literal["reservation.create", "order.create", "hours.get", "menu.search", "smalltalk", "other"]
 class ReservationSlots(BaseModel):
     name: Optional[str] = None
     party_size: Optional[int] = Field(default=None, ge=1, le=20)
@@ -12,20 +21,55 @@ class ReservationSlots(BaseModel):
     time: Optional[str] = None          # “19:00” or “7 pm”
     phone: Optional[str] = None
 class OrderItem(BaseModel):
     name: str
     qty: int = Field(default=1, ge=1)
 class OrderSlots(BaseModel):
     items: List[OrderItem] = Field(default_factory=list)
     notes: Optional[str] = None
 class MenuSlots(BaseModel):
     query: Optional[str] = None
     dietary: List[str] = Field(default_factory=list)  # e.g., ["vegan","gluten-free"]
 class IntentEnvelope(BaseModel):
     intent: IntentName
     need_more_info: bool = False
     ask_user: Optional[str] = None   # a single, polite follow-up question if info missing
-    slots: dict = Field(default_factory=dict)   # raw dict; we’ll validate by intent

 # app/intent_schema.py
 from __future__ import annotations
+from typing import List, Optional, Literal, Union, Tuple
+from pydantic import BaseModel, Field, ValidationError
+# ---- Canonical intent names ----
+IntentName = Literal[
+    "reservation.create",
+    "order.create",
+    "hours.get",
+    "menu.search",
+    "smalltalk",
+    "other",
+]
+# ---- Slot models ----
 class ReservationSlots(BaseModel):
     name: Optional[str] = None
     party_size: Optional[int] = Field(default=None, ge=1, le=20)
     time: Optional[str] = None          # “19:00” or “7 pm”
     phone: Optional[str] = None
+    model_config = {"extra": "ignore"}  # tolerate extra keys from LLM
 class OrderItem(BaseModel):
     name: str
     qty: int = Field(default=1, ge=1)
+    model_config = {"extra": "ignore"}
 class OrderSlots(BaseModel):
     items: List[OrderItem] = Field(default_factory=list)
     notes: Optional[str] = None
+    model_config = {"extra": "ignore"}
 class MenuSlots(BaseModel):
     query: Optional[str] = None
     dietary: List[str] = Field(default_factory=list)  # e.g., ["vegan","gluten-free"]
+    model_config = {"extra": "ignore"}
+# ---- Envelope returned by the router/LLM ----
 class IntentEnvelope(BaseModel):
     intent: IntentName
     need_more_info: bool = False
     ask_user: Optional[str] = None   # a single, polite follow-up question if info missing
+    # Keep it loose at the API boundary; we’ll coerce it with helper below.
+    slots: dict = Field(default_factory=dict)
+    model_config = {"extra": "ignore"}
+# ---- Helpers to validate slots into the right model ----
+SlotsUnion = Union[ReservationSlots, OrderSlots, MenuSlots, dict]
+def coerce_slots(intent: IntentName, slots: dict | None) -> Tuple[SlotsUnion, Optional[str]]:
+    """
+    Try to convert a loose slots dict into the correct typed model based on 'intent'.
+    Returns (slots_obj, error_message). If cannot validate, returns (original_dict, message).
+    This keeps the pipeline resilient while giving you typed access when possible.
+    """
+    raw = slots or {}
+    try:
+        if intent == "reservation.create":
+            return ReservationSlots(**raw), None
+        if intent == "order.create":
+            return OrderSlots(**raw), None
+        if intent == "menu.search":
+            return MenuSlots(**raw), None
+        # 'hours.get', 'smalltalk', 'other' often don’t need slots
+        return raw, None
+    except ValidationError as ve:
+        return raw, f"slot_validation_failed: {ve.errors()}"

app/orchestrator.py CHANGED Viewed

@@ -1,32 +1,134 @@
-from typing import Dict, Any
-from models.llm_router import respond as route_fn, nlg
-from app.tools import dispatch_tool
 from utils.phone import extract_phone, looks_valid
 def llm_route_and_execute(user_text: str) -> Dict[str, Any]:
-    route = route_fn(user_text)  # {"tool": "get_hours"|..., "args": {...}}
     tool = route.get("tool")
-    args = route.get("args") or {}
-    # enrich reservation with phone if present in the text
     if tool == "create_reservation":
-        phone = extract_phone(user_text)
-        if looks_valid(phone):
             args["phone"] = phone
         if not args.get("name"):
-            # naive default name if user included "my name is ..."
-            import re
-            m = re.search(r"(?:my name is|i am|i'm)\s+([A-Z][a-z]+)", user_text, re.I)
-            if m: args["name"] = m.group(1)
     tool_result = None
     if tool:
-        tool_result = dispatch_tool(tool, args)
-    reply = nlg(tool or "", tool_result or {}, user_text)
     return {
-        "intent": tool or ("smalltalk" if not tool else tool),
         "slots": args,
         "tool_selected": tool,
         "tool_result": tool_result,

+# app/orchestrator.py
+from __future__ import annotations
+import re
+from typing import Dict, Any, Callable, Optional
+from utils.config import get_settings
 from utils.phone import extract_phone, looks_valid
+from app.tools import dispatch_tool
+# -----------------------------
+# Resolve router/NLG per BACKEND
+# -----------------------------
+_s = get_settings()
+_route_fn: Optional[Callable[[str], Dict[str, Any]]] = None
+_nlg_fn:   Optional[Callable[[str, Dict[str, Any], str], str]] = None
+def _load_router():
+    global _route_fn, _nlg_fn
+    backend = (_s.BACKEND_LLM or "").lower()
+    try:
+        if backend == "llamacpp":
+            from models.llm_router import respond as route, nlg as nlg_impl
+            _route_fn, _nlg_fn = route, nlg_impl
+        elif backend == "openai":
+            from models.openai_router import respond as route, nlg as nlg_impl
+            _route_fn, _nlg_fn = route, nlg_impl
+        elif backend == "groq":
+            from models.groq_router import respond as route, nlg as nlg_impl
+            _route_fn, _nlg_fn = route, nlg_impl
+        else:
+            # Unknown backend → safe fallbacks
+            _route_fn = lambda _: {"tool": None, "args": {}}
+            _nlg_fn = _fallback_nlg
+    except Exception:
+        # If import fails, still keep app running with safe fallbacks
+        _route_fn = lambda _: {"tool": None, "args": {}}
+        _nlg_fn   = _fallback_nlg
+def _fallback_nlg(tool: str, tool_result: Dict[str, Any] | None, user_text: str) -> str:
+    """Minimal reply if no NLG provided by the chosen backend."""
+    tr = tool_result or {}
+    if tool in (None, "", "smalltalk"):
+        return "Hello! How can I help with FutureCafe—menu, hours, reservations, or orders?"
+    if tool == "get_hours":
+        hours = tr.get("hours") or "11:00–22:00 daily"
+        address = tr.get("address") or "123 Main St"
+        return f"We’re open {hours} at {address}. What else can I do for you?"
+    if tool == "menu_lookup":
+        items = tr.get("items") or []
+        if items:
+            names = ", ".join(i.get("name", "item") for i in items[:6])
+            return f"Here are some popular items: {names}. Would you like to order any of these?"
+        return "I can look up menu items—any dietary needs or a specific dish?"
+    if tool == "create_reservation":
+        when = tr.get("when") or tr.get("datetime") or "your requested time"
+        code = tr.get("reservation_id") or tr.get("code") or "a confirmation code"
+        return f"Reservation confirmed for {when}. Code {code}. Anything else I can help with?"
+    if tool == "create_order":
+        items = tr.get("items") or []
+        if items:
+            summary = ", ".join(f"{i.get('qty','1')}× {i.get('name','item')}" for i in items)
+            total = tr.get("total")
+            return f"Order placed: {summary}" + (f". Total ${total:.2f}" if isinstance(total, (int,float)) else "") + "."
+        return "Your order is noted. Anything to add?"
+    # Generic fallback
+    return "Done. Anything else I can help you with?"
+# Load router once at import
+_load_router()
+# -----------------------------
+# Public API
+# -----------------------------
 def llm_route_and_execute(user_text: str) -> Dict[str, Any]:
+    """
+    1) Route the user_text to a tool (model-dependent)
+    2) Enrich args (e.g., reservation phone/name)
+    3) Execute tool (dispatch_tool)
+    4) Generate reply (NLG if available, else fallback)
+    Returns a single dict suitable for the UI diagnostics panel.
+    """
+    text = (user_text or "").strip()
+    if not text:
+        return {
+            "intent": "smalltalk",
+            "slots": {},
+            "tool_selected": None,
+            "tool_result": None,
+            "response": "Hello! How can I help with FutureCafe today?",
+        }
+    # --- 1) Route ---
+    try:
+        route = _route_fn(text) if _route_fn else {"tool": None, "args": {}}
+        if not isinstance(route, dict):
+            route = {"tool": None, "args": {}}
+    except Exception:
+        route = {"tool": None, "args": {}}
     tool = route.get("tool")
+    args = dict(route.get("args") or {})
+    # --- 2) Enrich args for reservation ---
     if tool == "create_reservation":
+        phone = extract_phone(text)
+        if looks_valid(phone) and not args.get("phone"):
             args["phone"] = phone
+        # lightweight name inference: “my name is X”, “I am X”, “I’m X”
         if not args.get("name"):
+            m = re.search(r"(?:my name is|i am|i'm)\s+([A-Z][a-z]+)", text, re.I)
+            if m:
+                args["name"] = m.group(1)
+    # --- 3) Execute tool (optional) ---
     tool_result = None
     if tool:
+        try:
+            tool_result = dispatch_tool(tool, args)
+        except Exception as e:
+            tool_result = {"ok": False, "error": f"tool_error: {e!s}"}
+    # --- 4) NLG (or fallback) ---
+    try:
+        reply = _nlg_fn(tool or "", tool_result or {}, text) if _nlg_fn else _fallback_nlg(tool or "", tool_result or {}, text)
+    except Exception:
+        reply = _fallback_nlg(tool or "", tool_result or {}, text)
     return {
+        "intent": tool or "smalltalk",
         "slots": args,
         "tool_selected": tool,
         "tool_result": tool_result,

app/policy.py CHANGED Viewed

@@ -1,36 +1,65 @@
 # app/policy.py
 from __future__ import annotations
-import os, re
-# --- Topic detection (very lightweight, fast) ---
-CAFE_KEYWORDS = [
     "menu","order","item","dish","pizza","burger","salad","pasta","vegan","gluten",
     "price","special","deal","offer","hours","open","close","time","location","address",
     "book","reserve","reservation","table","party","pickup","delivery","takeout","payment",
     "futurecafe","future cafe","future-cafe","café","coffee","drinks","beverage","side"
 ]
-_kw_re = re.compile(r"|".join([re.escape(k) for k in CAFE_KEYWORDS]), re.I)
-SMALLTALK = r"\b(hi|hello|hey|good\s+(morning|afternoon|evening)|thanks|thank you|bye|goodbye)\b"
-_smalltalk_re = re.compile(SMALLTALK, re.I)
-def is_cafe_topic(text: str) -> bool:
-    return bool(text and _kw_re.search(text))
-def is_smalltalk(text: str) -> bool:
-    return bool(text and _smalltalk_re.search(text))
 def unrelated_limit() -> int:
-    """How many off-topic turns allowed before ending."""
     try:
         n = int(os.getenv("CAFE_UNRELATED_LIMIT", "3"))
         return max(1, min(5, n))
     except Exception:
         return 3
-# --- Messages ---
-POLITE_REFUSAL = (
-    "I’m here to help with FutureCafe—menu, hours, reservations, and orders. "
     "Could you ask something about the restaurant?"
 )
@@ -39,7 +68,65 @@ POLITE_REFUSAL_2 = (
     "Ask me about our menu, hours, or booking a table."
 )
-def end_message() -> str:
-    return ("I’m only able to help with FutureCafe topics. "
-            "Let’s end this chat for now. If you need menu, hours, or reservations, "
-            "message me again anytime.")

 # app/policy.py
 from __future__ import annotations
+import json, os, re
+from typing import Dict, Any, List, Tuple
+# ---------- Loading & configuration ----------
+def _root_dir() -> str:
+    here = os.path.dirname(os.path.abspath(__file__))
+    return os.path.dirname(here)
+def _load_keywords_from_file() -> Dict[str, Any]:
+    """Optional: data/policy_keywords.json -> {"cafe_keywords":[...], "smalltalk_regex": "..."}"""
+    path = os.path.join(_root_dir(), "data", "policy_keywords.json")
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f) or {}
+    except Exception:
+        return {}
+def _env_list(var: str) -> List[str]:
+    raw = (os.getenv(var) or "").strip()
+    if not raw:
+        return []
+    return [x.strip() for x in raw.split(",") if x.strip()]
+def _compile_regex(patterns: List[str]) -> re.Pattern:
+    if not patterns:
+        return re.compile(r"$^")  # match nothing
+    return re.compile(r"|".join([re.escape(k) for k in patterns]), re.I)
+# Defaults (used only if not overridden by env/file)
+_DEFAULT_CAFE_KEYWORDS = [
     "menu","order","item","dish","pizza","burger","salad","pasta","vegan","gluten",
     "price","special","deal","offer","hours","open","close","time","location","address",
     "book","reserve","reservation","table","party","pickup","delivery","takeout","payment",
     "futurecafe","future cafe","future-cafe","café","coffee","drinks","beverage","side"
 ]
+_DEFAULT_SMALLTALK_RE = r"\b(hi|hello|hey|good\s+(morning|afternoon|evening)|thanks|thank you|bye|goodbye)\b"
+# Merge precedence: ENV > file > defaults
+_file_conf = _load_keywords_from_file()
+_CAFE_KEYWORDS = _env_list("CAFE_KEYWORDS") or _file_conf.get("cafe_keywords") or _DEFAULT_CAFE_KEYWORDS
+_SMALLTALK_RE_STR = os.getenv("SMALLTALK_REGEX") or _file_conf.get("smalltalk_regex") or _DEFAULT_SMALLTALK_RE
+_kw_re = _compile_regex(_CAFE_KEYWORDS)
+_smalltalk_re = re.compile(_SMALLTALK_RE_STR, re.I)
+# ---------- Limits & messages ----------
 def unrelated_limit() -> int:
+    """How many off-topic turns allowed before ending (clamped 1..5)."""
     try:
         n = int(os.getenv("CAFE_UNRELATED_LIMIT", "3"))
         return max(1, min(5, n))
     except Exception:
         return 3
+POLITE_REFUSAL_1 = (
+    "I'm here to help with FutureCafe—menu, hours, reservations, and orders. "
     "Could you ask something about the restaurant?"
 )
     "Ask me about our menu, hours, or booking a table."
 )
+END_MESSAGE = (
+    "I'm only able to help with FutureCafe topics. Let's end this chat for now. "
+    "If you need menu, hours, or reservations, message me again anytime."
+)
+def refusal_message(count: int) -> str:
+    return POLITE_REFUSAL_1 if count <= 1 else POLITE_REFUSAL_2
+# ---------- Public utilities ----------
+def is_cafe_topic(text: str) -> bool:
+    return bool(text and _kw_re.search(text))
+def is_smalltalk(text: str) -> bool:
+    return bool(text and _smalltalk_re.search(text))
+def enforce_policy(
+    user_text: str,
+    guard_state: Dict[str, Any] | None
+) -> Tuple[bool, str | None, Dict[str, Any], Dict[str, Any]]:
+    """
+    Lightweight topic gate. Returns:
+      allowed: bool                -> if True, send to LLM; if False, use reply_if_block
+      reply_if_block: Optional[str]
+      new_guard: dict             -> persist across turns (store in State)
+      diag: dict                  -> tiny diagnostics blob for the UI
+    guard_state schema: {"unrelated": int, "ended": bool}
+    """
+    text = (user_text or "").strip()
+    guard = dict(guard_state or {"unrelated": 0, "ended": False})
+    diag: Dict[str, Any] = {"limit": unrelated_limit()}
+    if guard.get("ended"):
+        diag["policy"] = "ended"
+        return False, END_MESSAGE, guard, diag
+    # Allow smalltalk to go through (LLM can handle niceties)
+    if is_smalltalk(text) or is_cafe_topic(text):
+        diag["policy"] = "ok"
+        return True, None, guard, diag
+    # Off-topic
+    guard["unrelated"] = int(guard.get("unrelated", 0)) + 1
+    diag["unrelated"] = guard["unrelated"]
+    if guard["unrelated"] >= unrelated_limit():
+        guard["ended"] = True
+        diag["policy"] = "ended"
+        return False, END_MESSAGE, guard, diag
+    diag["policy"] = "nudge"
+    return False, refusal_message(guard["unrelated"]), guard, diag
+# ---------- Introspection helpers (nice for your Insights pane) ----------
+def policy_snapshot() -> Dict[str, Any]:
+    """Expose the active config so you can show it in the Insights JSON."""
+    return {
+        "cafe_keywords": _CAFE_KEYWORDS,
+        "smalltalk_regex": _SMALLTALK_RE_STR,
+        "unrelated_limit": unrelated_limit(),
+    }

app/sim_api.py CHANGED Viewed

@@ -1,81 +1,120 @@
 # app/sim_api.py
 from __future__ import annotations
-from typing import Dict, Any, List, Tuple
 from app.catalog import load_catalog, find_item_by_name, find_item_by_sku
-def _pick_item(order_it: Dict[str, Any]) -> Dict[str, Any] | None:
     it = None
-    if "sku" in order_it:
-        it = find_item_by_sku(order_it["sku"])
-    if not it and "name" in order_it:
-        it = find_item_by_name(order_it["name"])
     return it
-def check_item_availability(order_it: Dict[str, Any]) -> Tuple[bool, Dict[str, Any]]:
     """
     Returns (is_available, info)
-    info contains { "reason": "...", "alternatives": [...] } when not available
-    For size-based items, verify stock for requested size.
     """
     it = _pick_item(order_it)
     if not it:
-        return False, {"reason": "unknown_item", "alternatives": []}
-    qty = int(order_it.get("qty", 0) or 0)
-    if qty < 1:
-        return False, {"reason": "qty_missing", "alternatives": []}
-    # size key heuristics
     size = order_it.get("size")
-    stock_map = it.get("stock") or {}
     if "one_size" in stock_map:
-        avail = stock_map["one_size"]
-        if avail >= qty:
-            return True, {"price_each": (it.get("price") or {}).get("one_size", 0.0)}
-        else:
-            return False, {"reason": "insufficient_stock", "have": avail, "alternatives": []}
-    if size:
-        have = int(stock_map.get(size, 0))
         if have >= qty:
-            return True, {"price_each": (it.get("price") or {}).get(size, 0.0)}
-        else:
-            # propose other sizes with stock
-            alts = []
-            for s, have_s in stock_map.items():
-                if have_s >= qty:
-                    alts.append({"size": s, "have": have_s, "price_each": (it.get("price") or {}).get(s, 0.0)})
-            return False, {"reason": "size_out_of_stock", "have": have, "alternatives": alts}
-    else:
-        # missing required option — let schema enforcement ask; but if user skipped, treat as not available
-        return False, {"reason": "size_missing", "alternatives": [{"hint": "provide size"}]}
-def place_order(order_items: List[Dict[str, Any]]) -> Dict[str, Any]:
     """
-    Verifies each item and (if all available) returns summary.
-    We do not mutate stock here (sim).
     """
-    ok = True
-    lines = []
     total = 0.0
-    for it in order_items:
-        item_def = _pick_item(it)
-        if not item_def:
-            return {"ok": False, "reason": "unknown_item", "item": it}
-        avail, info = check_item_availability(it)
-        if not avail:
-            return {"ok": False, "reason": info.get("reason"), "item": it, "alternatives": info.get("alternatives", [])}
-        qty = int(it["qty"])
-        unit = info.get("price_each", 0.0)
         line_total = unit * qty
         total += line_total
         lines.append({
-            "sku": item_def["sku"],
-            "name": item_def["name"],
             "qty": qty,
-            "options": {k: v for k, v in it.items() if k not in ("name","sku","qty")},
             "unit": unit,
-            "line_total": line_total
         })
     return {"ok": True, "total": round(total, 2), "lines": lines}

 # app/sim_api.py
 from __future__ import annotations
+from typing import Dict, Any, List, Tuple, Optional
 from app.catalog import load_catalog, find_item_by_name, find_item_by_sku
+def _catalog() -> Dict[str, Any]:
+    # Local indirection so we can override in tests if needed
+    return load_catalog()
+def _pick_item(order_it: Dict[str, Any]) -> Optional[Dict[str, Any]]:
     it = None
+    if order_it.get("sku"):
+        it = find_item_by_sku(str(order_it["sku"]))
+    if not it and order_it.get("name"):
+        it = find_item_by_name(str(order_it["name"]))
     return it
+def _norm_qty(q: Any) -> Optional[int]:
+    try:
+        qi = int(q)
+        return qi if qi >= 1 else None
+    except Exception:
+        return None
+def check_item_availability(order_it: Dict[str, Any], catalog: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
     """
     Returns (is_available, info)
+    - info on success: {"price_each": float}
+    - info on failure: {"reason": str, "item": dict, **context}
     """
     it = _pick_item(order_it)
     if not it:
+        return False, {"reason": "unknown_item", "item": order_it}
+    qty = _norm_qty(order_it.get("qty"))
+    if qty is None:
+        return False, {"reason": "qty_missing_or_invalid", "item": order_it}
+    # Normalize size if provided
     size = order_it.get("size")
+    size_norm = str(size).lower() if isinstance(size, str) else None
+    price_map = (it.get("price") or {})
+    stock_map = (it.get("stock") or {})
+    # One-size items
     if "one_size" in stock_map:
+        have = int(stock_map.get("one_size", 0))
         if have >= qty:
+            unit = float(price_map.get("one_size", 0.0))
+            return True, {"price_each": unit}
+        return False, {"reason": "insufficient_stock", "have": have, "item": order_it}
+    # Size-required items
+    if not size_norm:
+        # schema enforcement will normally ask for size; we surface a nudge + available choices
+        choices = [k for k in stock_map.keys()]
+        return False, {"reason": "size_missing", "choices": choices, "item": order_it}
+    have = int(stock_map.get(size_norm, 0))
+    if have >= qty:
+        unit = float(price_map.get(size_norm, 0.0))
+        return True, {"price_each": unit}
+    # Try alternatives that can satisfy qty
+    alts = []
+    for s, have_s in stock_map.items():
+        try:
+            hs = int(have_s)
+        except Exception:
+            continue
+        if hs >= qty:
+            alts.append({
+                "size": s,
+                "have": hs,
+                "price_each": float(price_map.get(s, 0.0))
+            })
+    return False, {"reason": "size_out_of_stock", "requested_size": size_norm, "alternatives": alts, "item": order_it}
+def place_order(order_items: List[Dict[str, Any]], catalog: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
     """
+    Validates all items via check_item_availability.
+    Returns:
+    - {"ok": True, "total": float, "lines": [ ... ] }
+    - {"ok": False, "reason": str, "item": dict, "alternatives": [...]?}
     """
     total = 0.0
+    lines: List[Dict[str, Any]] = []
+    for raw in order_items:
+        it = _pick_item(raw)
+        if not it:
+            return {"ok": False, "reason": "unknown_item", "item": raw}
+        ok, info = check_item_availability(raw, catalog=catalog)
+        if not ok:
+            # Bubble up first blocking failure
+            fail = {"ok": False, "reason": info.get("reason", "unavailable"), "item": info.get("item", raw)}
+            if "alternatives" in info:
+                fail["alternatives"] = info["alternatives"]
+            if "choices" in info:
+                fail["choices"] = info["choices"]
+            return fail
+        qty = _norm_qty(raw.get("qty")) or 0
+        unit = float(info.get("price_each", 0.0))
         line_total = unit * qty
         total += line_total
+        # Echo back normalized line
+        opts = {k: v for k, v in raw.items() if k not in ("name", "sku", "qty")}
         lines.append({
+            "sku": it["sku"],
+            "name": it["name"],
             "qty": qty,
+            "options": opts,
             "unit": unit,
+            "line_total": round(line_total, 2),
         })
     return {"ok": True, "total": round(total, 2), "lines": lines}

app/sim_api_bridge.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# app/sim_api_bridge.py
+from __future__ import annotations
+from typing import List, Dict, Any
+from app.sim_api import place_order
+from app.catalog import load_catalog
+def get_hours() -> Dict[str, Any]:
+    c = load_catalog()
+    return {"hours": c.get("hours"), "address": c.get("address"), "phone": c.get("phone")}
+def menu_lookup(filters: List[str]) -> List[Dict[str, Any]]:
+    # naive filter: return all items and let the LLM filter in text for now
+    c = load_catalog()
+    return c.get("items", [])
+def create_reservation(name: str, phone: str | None, party_size: int, datetime_str: str) -> Dict[str, Any]:
+    # Simulate success
+    return {
+        "ok": True,
+        "reservation_id": "sim-" + str(abs(hash((name, phone, party_size, datetime_str))))[:8],
+        "name": name,
+        "party_size": party_size,
+        "when": datetime_str,
+        "phone": phone,
+    }
+def create_order(items: List[Dict[str, Any]]) -> Dict[str, Any]:
+    return place_order(items)

app/tools.py CHANGED Viewed

@@ -1,18 +1,131 @@
-from typing import Any, Dict
-from mock_api import service as svc
 def dispatch_tool(tool: str, args: Dict[str, Any]) -> Dict[str, Any]:
-    if tool == "get_hours":
-        return svc.get_hours()
-    if tool == "menu_lookup":
-        return {"items": svc.menu_lookup(args.get("filters") or [])}
-    if tool == "create_reservation":
-        return svc.create_reservation(
-            name=args.get("name") or "Guest",
-            phone=args.get("phone"),
-            party_size=int(args.get("party_size") or 2),
-            datetime_str=args.get("datetime_str") or "",
-        )
-    if tool == "create_order":
-        return svc.create_order(args.get("items") or [])
-    raise ValueError(f"unknown tool: {tool}")

+# app/tools.py
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+import importlib
+from utils.config import get_settings
+# ------------------------------
+# Backend service loader
+# ------------------------------
+def _load_service():
+    """
+    Load a service module providing the following callables:
+      - get_hours() -> dict
+      - menu_lookup(filters: List[str]) -> List[dict]
+      - create_reservation(name: str, phone: Optional[str], party_size: int, datetime_str: str) -> dict
+      - create_order(items: List[dict]) -> dict
+    Selection is controlled by `API_BACKEND` in .env:
+      - "sim"   -> use built-in simulated API (app.sim_api_bridge)
+      - "mock"  -> import app.mock_api.service or mock_api.service
+      - "http"  -> import app.http_api.service (you can implement later)
+    """
+    s = get_settings()
+    backend = (getattr(s, "API_BACKEND", None) or "sim").lower()
+    module_candidates: List[str] = []
+    if backend == "sim":
+        module_candidates = ["app.sim_api_bridge"]
+    elif backend == "mock":
+        module_candidates = ["app.mock_api.service", "mock_api.service"]
+    elif backend == "http":
+        module_candidates = ["app.http_api.service"]
+    else:
+        # unknown -> fall back to sim
+        module_candidates = ["app.sim_api_bridge"]
+    last_err = None
+    for modname in module_candidates:
+        try:
+            return importlib.import_module(modname)
+        except Exception as e:
+            last_err = e
+    # Final fallback to sim bridge even if env asked otherwise
+    try:
+        return importlib.import_module("app.sim_api_bridge")
+    except Exception as e:
+        raise RuntimeError(f"Could not load any service module ({module_candidates}): {last_err or e}")
+_service = None
+def _service_module():
+    global _service
+    if _service is None:
+        _service = _load_service()
+    return _service
+# ------------------------------
+# Input helpers
+# ------------------------------
+def _as_int(x: Any, default: int) -> int:
+    try:
+        return int(x)
+    except Exception:
+        return default
+def _as_list(x: Any) -> List[Any]:
+    if x is None:
+        return []
+    if isinstance(x, list):
+        return x
+    return [x]
+def _ensure_items(items: Any) -> List[dict]:
+    if items is None:
+        return []
+    if isinstance(items, list):
+        # keep only dict-like lines
+        return [it for it in items if isinstance(it, dict)]
+    return []
+# ------------------------------
+# Public dispatch
+# ------------------------------
 def dispatch_tool(tool: str, args: Dict[str, Any]) -> Dict[str, Any]:
+    svc = _service_module()
+    try:
+        if tool == "get_hours":
+            return svc.get_hours()
+        if tool == "menu_lookup":
+            filters = _as_list(args.get("filters"))
+            return {"items": svc.menu_lookup(filters)}
+        if tool == "create_reservation":
+            name = args.get("name") or "Guest"
+            phone = args.get("phone")
+            # accept either "party_size" or "partySize"
+            party_size = _as_int(args.get("party_size") or args.get("partySize"), 2)
+            # accept "datetime_str" or split date/time if your UI produces them separately
+            datetime_str = args.get("datetime_str") or args.get("datetime") or ""
+            if not datetime_str:
+                # optional convenience: build from date + time if present
+                date = (args.get("date") or "").strip()
+                time_val = (args.get("time") or "").strip()
+                if date or time_val:
+                    datetime_str = f"{date} {time_val}".strip()
+            return svc.create_reservation(
+                name=name,
+                phone=phone,
+                party_size=party_size,
+                datetime_str=datetime_str,
+            )
+        if tool == "create_order":
+            items = _ensure_items(args.get("items"))
+            if not items:
+                return {"ok": False, "reason": "no_items", "message": "No order items were provided."}
+            return svc.create_order(items)
+        # Unknown tool
+        return {"ok": False, "reason": "unknown_tool", "tool": tool}
+    except Exception as e:
+        # Never raise to the UI; always return a structured error
+        return {"ok": False, "reason": "exception", "error": str(e), "tool": tool}

models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

models/__pycache__/asr_whisper.cpython-312.pyc CHANGED Viewed

Binary files a/models/__pycache__/asr_whisper.cpython-312.pyc and b/models/__pycache__/asr_whisper.cpython-312.pyc differ

models/__pycache__/llm_chat.cpython-312.pyc CHANGED Viewed

Binary files a/models/__pycache__/llm_chat.cpython-312.pyc and b/models/__pycache__/llm_chat.cpython-312.pyc differ

models/__pycache__/tts_router.cpython-312.pyc CHANGED Viewed

Binary files a/models/__pycache__/tts_router.cpython-312.pyc and b/models/__pycache__/tts_router.cpython-312.pyc differ

models/asr_whisper.py CHANGED Viewed

@@ -1,26 +1,61 @@
 # models/asr_whisper.py
 from faster_whisper import WhisperModel
 from utils.config import get_settings
-_asr_singleton = None
 class WhisperASR:
     def __init__(self):
         s = get_settings()
-        # faster-whisper supports: 'cpu' or 'cuda' (no 'mps')
-        requested = (s.ASR_DEVICE or "cpu").lower()
-        device = "cpu" if requested not in ("cpu", "cuda") else requested
-        if requested == "mps":
-            print("[ASR] 'mps' not supported by faster-whisper; falling back to CPU.")
-        compute_type = "int8" if device == "cpu" else "float16"
-        self.model = WhisperModel("tiny", device=device, compute_type=compute_type)
-    def transcribe(self, path: str) -> dict:
-        segments, info = self.model.transcribe(path, beam_size=1, language="en")
-        text = " ".join(seg.text.strip() for seg in segments)
-        return {"text": text, "language": info.language, "segments": []}
-def get_asr():
     global _asr_singleton
     if _asr_singleton is None:
         _asr_singleton = WhisperASR()

 # models/asr_whisper.py
+from __future__ import annotations
+import os
+from typing import Optional, Dict, Any
 from faster_whisper import WhisperModel
 from utils.config import get_settings
+_asr_singleton: Optional["WhisperASR"] = None
+def _norm_device(req: str) -> str:
+    """faster-whisper supports only 'cpu' or 'cuda'."""
+    r = (req or "cpu").strip().lower()
+    if r == "mps":
+        print("[ASR] 'mps' is not supported by faster-whisper; falling back to CPU.")
+        return "cpu"
+    return r if r in ("cpu", "cuda") else "cpu"
+def _compute_type_for(device: str) -> str:
+    # Keep it simple and stable for HF/macOS:
+    # - CPU: int8 (fast, small)
+    # - CUDA: float16 (good default on GPUs)
+    return "float16" if device == "cuda" else "int8"
 class WhisperASR:
     def __init__(self):
         s = get_settings()
+        self.model_size = os.getenv("WHISPER_SIZE", "tiny").strip()  # tiny|base|small|medium|large-v3 ...
+        self.language = os.getenv("WHISPER_LANG", "").strip() or None  # e.g., "en"; None = auto
+        self.device = _norm_device(getattr(s, "ASR_DEVICE", "cpu"))
+        self.compute_type = _compute_type_for(self.device)
+        print(f"[ASR] Loading faster-whisper: size={self.model_size} device={self.device} compute_type={self.compute_type}")
+        self.model = WhisperModel(self.model_size, device=self.device, compute_type=self.compute_type)
+    def transcribe(self, path: str) -> Dict[str, Any]:
+        """
+        Returns: {"text": str, "language": str|None, "segments": [...]}
+        """
+        # language=None lets faster-whisper auto-detect. You can force via WHISPER_LANG.
+        segments, info = self.model.transcribe(
+            path,
+            beam_size=1,
+            language=self.language or None,
+        )
+        text = " ".join((seg.text or "").strip() for seg in segments).strip()
+        return {
+            "text": text or "",
+            "language": getattr(info, "language", None),
+            # You can expose timings later if you want:
+            "segments": []  # keep lightweight for UI
+        }
+def get_asr() -> WhisperASR:
     global _asr_singleton
     if _asr_singleton is None:
         _asr_singleton = WhisperASR()

models/llm_chat.py CHANGED Viewed

@@ -2,10 +2,10 @@
 from __future__ import annotations
 from typing import List, Dict, Any, Tuple
 import os
 from utils.config import get_settings
-# --- Small, readable menu JSON kept in the system prompt for now ---
 MENU_JSON = """
 {
   "pizzas": [
@@ -25,18 +25,52 @@ MENU_JSON = """
 """
 SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.
-You talk naturally and help with:
-- Menu questions, placing orders, hours/location, and reservations (lightweight).
-- If the user asks for pizza/order: list choices from the MENU and ask for missing details (size, quantity, etc.).
-- If user provides all details, confirm the order in words (no need to return JSON), include a brief total using MENU prices.
-- For hours/location, reply from MENU.
-- For unrelated topics, gently steer back to FutureCafe; if the user remains off-topic for 3 turns total, politely end.
-- Keep replies concise and friendly. No long explanations.
-MENU (JSON you can read from for options & prices):
 {MENU_JSON}
 """
 # ---------------- llama.cpp singleton ----------------
 _llm = None
@@ -52,30 +86,44 @@ def _get_local_llm():
         raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
     _llm = Llama(
         model_path=model_path,
-        n_ctx=2048,
-        n_threads=os.cpu_count() or 4,
-        n_gpu_layers=0,  # CPU by default
         verbose=False,
     )
     return _llm
-def _apply_chat_template(messages: List[Dict[str, str]]) -> str:
-    parts = []
     for m in messages:
         role = m.get("role", "user")
-        content = m.get("content", "")
         if role == "system":
-            parts.append(f"<|system|>\n{content}\n")
-        elif role == "user":
-            parts.append(f"<|user|>\n{content}\n")
         else:
-            parts.append(f"<|assistant|>\n{content}\n")
-    parts.append("<|assistant|>\n")
-    return "\n".join(parts)
-def _generate(messages: List[Dict[str, str]], temperature=0.3, max_tokens=320) -> str:
     llm = _get_local_llm()
-    prompt = _apply_chat_template(messages)
     out = llm(
         prompt,
         max_tokens=max_tokens,
@@ -84,33 +132,31 @@ def _generate(messages: List[Dict[str, str]], temperature=0.3, max_tokens=320) -
         repeat_penalty=1.1,
         stop=["<|user|>", "<|system|>", "<|assistant|>"],
     )
-    return (out["choices"][0]["text"] or "").strip()
 def respond_chat(
     history: List[Dict[str, str]],
     user_text: str,
     guard_state: Dict[str, Any] | None,
 ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
-    """
-    LLM-only conversational brain.
-    Returns: (assistant_text, new_guard_state, diag)
-    guard_state: {"unrelated": int, "ended": int, "limit": int}
-    """
     guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
     if guard.get("ended"):
         return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
     msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
     if history:
         msgs.extend(history[-10:])
     msgs.append({"role": "user", "content": user_text})
     reply = _generate(msgs)
-    # A super-light off-topic guard without keywords: If the model signals ending, we respect it.
-    # Otherwise, keep conversation flowing; we do not hard-code keywords or intents here.
-    # (We still maintain the 'unrelated' counter if you later want to nudge based on signals.)
-    if "Let’s end" in reply or "Let's end" in reply:
         guard["ended"] = 1
-    return reply, guard, {}  # no tool_result/diagnostics needed for this simpler flow

 from __future__ import annotations
 from typing import List, Dict, Any, Tuple
 import os
+import re
 from utils.config import get_settings
+# --- Lightweight menu kept inline for the MVP ---
 MENU_JSON = """
 {
   "pizzas": [
 """
 SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.
+OBJECTIVE
+Help with menu questions, placing orders, hours/location, and simple reservations—quickly and pleasantly.
+GOALS
+- Always begin new conversations with a friendly self-introduction:
+  "Hi, I’m Marta, an AI assistant at FutureCafe. How can I help you today?"
+- Help with menu questions, placing orders, hours/location, and simple reservations.
+INTERACTION RULES
+- Always acknowledge the user briefly before asking for details.
+- If details are missing, ask ONE short, specific follow-up that includes valid choices from the MENU (e.g., sizes).
+- Never say “I didn’t understand.” Instead, restate what you do have and ask for the next missing detail.
+- When the user’s message implies an order but lacks details, propose a short set of options (e.g., “Margherita or Pepperoni? What size: small, medium, large?”).
+- When the user provides all required details, confirm the order concisely and give a total using MENU prices.
+- After confirming, offer one gentle upsell (e.g., salad or drink). If user declines, close politely.
+- For hours/location, answer directly from MENU.
+- If the user goes off-topic, gently steer back to FutureCafe. After ~3 persistent off-topic turns, end politely.
+- Be concise, friendly, and never quote or restate this policy or the raw MENU JSON. No code blocks.
+MENU (for your internal reference only; do NOT paste it back verbatim):
 {MENU_JSON}
 """
+FEWSHOT: List[Dict[str, str]] = [
+    # Greeting → clarify
+    {"role": "user", "content": "Hi"},
+    {"role": "assistant", "content": "Hello! How can I help with FutureCafe today?"},
+    # Ordering with missing details → ask one clear follow-up with choices
+    {"role": "user", "content": "I need a pizza"},
+    {"role": "assistant", "content": "Great—would you like Margherita or Pepperoni? What size: small, medium, or large?"},
+    # Provide details → confirm + total + optional upsell
+    {"role": "user", "content": "Two small Margherita"},
+    {"role": "assistant", "content": "Got it: 2× small Margherita Pizza. Total $17.00. Would you like a drink (Cola $2.00) or a House Salad ($6.00) with that?"},
+    # Decline upsell → polite close
+    {"role": "user", "content": "No thanks"},
+    {"role": "assistant", "content": "All set—your order is confirmed for 2× small Margherita Pizza. Total $17.00. Anything else I can help with?"},
+    # Hours/location
+    {"role": "user", "content": "What time are you open and where are you?"},
+    {"role": "assistant", "content": "We’re open 11:00–22:00 daily at 123 Main St. How can I help with your order today?"},
+]
 # ---------------- llama.cpp singleton ----------------
 _llm = None
         raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
     _llm = Llama(
         model_path=model_path,
+        n_ctx=s.N_CTX,
+        n_threads=s.N_THREADS,
+        n_gpu_layers=s.N_GPU_LAYERS,
         verbose=False,
     )
     return _llm
+# ---------------- Prompt building ----------------
+def _apply_chatml(messages: List[Dict[str, str]]) -> str:
+    out = []
     for m in messages:
         role = m.get("role", "user")
+        content = m.get("content", "").strip()
         if role == "system":
+            out.append("<|system|>\n" + content + "\n")
+        elif role == "assistant":
+            out.append("<|assistant|>\n" + content + "\n")
         else:
+            out.append("<|user|>\n" + content + "\n")
+    out.append("<|assistant|>\n")
+    return "\n".join(out)
+_CODE_FENCE_RE = re.compile(r"```.*?```", flags=re.DOTALL)
+_TAG_RE = re.compile(r"<\|.*?\|>")
+def _sanitize(text: str) -> str:
+    if not text:
+        return ""
+    text = _CODE_FENCE_RE.sub("", text)
+    text = _TAG_RE.sub("", text)
+    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
+    if lines and any(k in lines[0].lower() for k in ["you are marta", "policy", "menu", "assistant", "as an ai"]):
+        lines = lines[1:]
+    return " ".join(lines).strip()
+def _generate(messages: List[Dict[str, str]], temperature=0.15, max_tokens=256) -> str:
     llm = _get_local_llm()
+    prompt = _apply_chatml(messages)
     out = llm(
         prompt,
         max_tokens=max_tokens,
         repeat_penalty=1.1,
         stop=["<|user|>", "<|system|>", "<|assistant|>"],
     )
+    raw = (out["choices"][0]["text"] or "").strip()
+    return _sanitize(raw)
+# ---------------- Public APIs ----------------
 def respond_chat(
     history: List[Dict[str, str]],
     user_text: str,
     guard_state: Dict[str, Any] | None,
 ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
     guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
     if guard.get("ended"):
         return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
     msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
+    msgs.extend(FEWSHOT)
     if history:
         msgs.extend(history[-10:])
     msgs.append({"role": "user", "content": user_text})
     reply = _generate(msgs)
+    if "let’s end" in reply.lower() or "let's end" in reply.lower():
         guard["ended"] = 1
+    return reply, guard, {}
+def respond_chat_voice(
+    voice_history: List[Dict[str, str]],
+    transcript: str,
+    guard_state: Dict[str, Any] | None,
+) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
+    return respond_chat(voice_history, transcript, guard_state)

models/llm_router.py CHANGED Viewed

@@ -1,20 +1,26 @@
 from utils.config import get_settings
 def small_router(text: str) -> dict:
     t = (text or "").lower()
     if any(k in t for k in ["hour", "open", "close", "address", "location"]):
         return {"tool": "get_hours", "args": {}}
     if any(k in t for k in ["menu", "vegan", "gluten", "pizza", "salad", "special"]):
         flt = []
-        for k in ["vegan","gluten-free","pizza","salad"]:
-            if k in t: flt.append(k)
         return {"tool": "menu_lookup", "args": {"filters": flt}}
-    if any(k in t for k in ["reserve","reservation","book","table"]):
-        # naive hints
         party = 2 if ("2" in t or "two" in t) else None
         time = "19:00" if "7" in t else None
         return {"tool": "create_reservation", "args": {"party_size": party, "datetime_str": time}}
-    if any(k in t for k in ["order","buy"]):
         return {"tool": "create_order", "args": {"items": []}}
     return {"tool": None, "args": {}}
@@ -37,10 +43,21 @@ def nlg(intent: str, tool_result: dict, user_text: str) -> str:
             items = ", ".join(f"{it['qty']}× {it['name']}" for it in tool_result.get("items", []))
             return f"Got it: {items}. Total ${tool_result.get('total', 0)}."
         return "I couldn't place that order—want me to try again?"
-    # small talk
     return "Hello, this is Marta, an AI agent for FutureCafe. How can I help you today?"
-def respond(user_text: str) -> dict:
-    # MVP: use rule-based router; later swap to real LLM function-calling
-    route = small_router(user_text)
-    return route

+# app/llm_router.py
+from __future__ import annotations
+import os
+from typing import Any, Dict
 from utils.config import get_settings
+# --- Existing rule-based pieces kept as a fallback ---
 def small_router(text: str) -> dict:
     t = (text or "").lower()
     if any(k in t for k in ["hour", "open", "close", "address", "location"]):
         return {"tool": "get_hours", "args": {}}
     if any(k in t for k in ["menu", "vegan", "gluten", "pizza", "salad", "special"]):
         flt = []
+        for k in ["vegan", "gluten-free", "pizza", "salad"]:
+            if k in t:
+                flt.append(k)
         return {"tool": "menu_lookup", "args": {"filters": flt}}
+    if any(k in t for k in ["reserve", "reservation", "book", "table"]):
         party = 2 if ("2" in t or "two" in t) else None
         time = "19:00" if "7" in t else None
         return {"tool": "create_reservation", "args": {"party_size": party, "datetime_str": time}}
+    if any(k in t for k in ["order", "buy"]):
         return {"tool": "create_order", "args": {"items": []}}
     return {"tool": None, "args": {}}
             items = ", ".join(f"{it['qty']}× {it['name']}" for it in tool_result.get("items", []))
             return f"Got it: {items}. Total ${tool_result.get('total', 0)}."
         return "I couldn't place that order—want me to try again?"
     return "Hello, this is Marta, an AI agent for FutureCafe. How can I help you today?"
+# --- Router mode switch (env-controlled) ---
+# ROUTER_MODE = "rules" | "llm"
+#   - rules: use small_router (current behavior)
+#   - llm:   return no tool; the chat LLM handles everything in text/voice flows
+def _router_mode() -> str:
+    s = get_settings()
+    # allow either .env or process env to override
+    return os.getenv("ROUTER_MODE", getattr(s, "ROUTER_MODE", "rules")).strip().lower()
+def respond(user_text: str) -> Dict[str, Any]:
+    mode = _router_mode()
+    if mode == "llm":
+        # Pure LLM flow: don’t pre-select tools; downstream chat model decides.
+        return {"tool": None, "args": {}}
+    # Default / fallback: rule-based
+    return small_router(user_text)

models/tts_router.py CHANGED Viewed

@@ -5,107 +5,105 @@ import os
 import re
 import uuid
 import wave
-import shutil
 import subprocess
 from shutil import which
 from typing import Optional
 RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
-import os, glob
-from typing import Optional
 AUDIO_DIR = os.path.join(os.path.dirname(__file__), "..", "runtime", "audio")
 os.makedirs(AUDIO_DIR, exist_ok=True)
 def cleanup_old_audio(keep_latest: Optional[str] = None):
     """Delete all audio files in runtime/audio except the one to keep."""
-    for f in glob.glob(os.path.join(AUDIO_DIR, "*.wav")):
         if keep_latest and os.path.abspath(f) == os.path.abspath(keep_latest):
             continue
-        try:
-            os.remove(f)
-        except Exception as e:
-            print(f"[CLEANUP] Could not delete {f}: {e}")
 def ensure_runtime_audio_dir() -> str:
     os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
     return RUNTIME_AUDIO_DIR
 def _have(cmd: str) -> bool:
     return which(cmd) is not None
-def _is_valid_wav(path: str) -> bool:
     try:
         with wave.open(path, "rb") as w:
             frames = w.getnframes()
             rate = w.getframerate()
-            if frames <= 0 or rate <= 0:
                 return False
     except Exception:
         return False
     return True
 def _tts_with_piper(text: str) -> Optional[str]:
     """
     Use local Piper if available.
-    Requires:
-      - env PIPER_MODEL to point to models/piper/<voice>.onnx
-      - `piper` binary in PATH (brew install piper or from releases)
     """
     model = os.getenv("PIPER_MODEL")
     if not model or not os.path.exists(model):
         return None
-    if not _have("piper"):
         return None
     out_dir = ensure_runtime_audio_dir()
     out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
-    # Avoid stray control chars that can confuse some engines
     safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
     try:
-        # Simple one-shot pipe
         p = subprocess.Popen(
-            ["piper", "--model", model, "--output_file", out_path],
             stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
         )
-        p.communicate(input=safe_text.encode("utf-8"), timeout=30)
         if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
-            return out_path
     except Exception as e:
         print("[TTS] Piper error:", e)
     return None
 def _tts_with_say(text: str) -> Optional[str]:
     """
     macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
-    else writes AIFF and returns it if WAV conversion fails.
     """
-    if os.name != "posix":
-        return None
-    if not _have("say"):
         return None
     out_dir = ensure_runtime_audio_dir()
     aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
     wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
     safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
     try:
-        # Basic AIFF
-        subprocess.run(["say", "-o", aiff, safe_text], check=True)
     except Exception as e:
         print("[TTS] say failed:", e)
         return None
     converted = False
-    # Prefer afconvert
     if which("afconvert"):
         try:
             subprocess.run(
@@ -115,7 +113,6 @@ def _tts_with_say(text: str) -> Optional[str]:
             converted = True
         except Exception:
             converted = False
-    # Else try ffmpeg
     if not converted and which("ffmpeg"):
         try:
             subprocess.run(
@@ -126,45 +123,40 @@ def _tts_with_say(text: str) -> Optional[str]:
         except Exception:
             converted = False
-    # Cleanup/return best
     if converted and os.path.exists(wav) and _is_valid_wav(wav):
         try:
             os.remove(aiff)
         except Exception:
             pass
-        return wav
-    # Fallback: return AIFF if WAV conversion failed but aiff exists
     if os.path.exists(aiff):
-        return aiff
     return None
 def tts_synthesize(text: str) -> Optional[str]:
     """
     High-level TTS router:
       1) Piper (if configured)
       2) macOS 'say'
       3) None
-    Always writes to runtime/audio.
     """
     if not (text and text.strip()):
         return None
     ensure_runtime_audio_dir()
-    # 1) Piper
     out = _tts_with_piper(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
-    # 2) macOS say
     out = _tts_with_say(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
-    # 3) None
     return None

 import re
 import uuid
 import wave
+import glob
 import subprocess
 from shutil import which
 from typing import Optional
 RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
 AUDIO_DIR = os.path.join(os.path.dirname(__file__), "..", "runtime", "audio")
 os.makedirs(AUDIO_DIR, exist_ok=True)
 def cleanup_old_audio(keep_latest: Optional[str] = None):
     """Delete all audio files in runtime/audio except the one to keep."""
+    for f in glob.glob(os.path.join(AUDIO_DIR, "*")):
+        # keep both .wav/.aiff just in case engine produced AIFF
         if keep_latest and os.path.abspath(f) == os.path.abspath(keep_latest):
             continue
+        if f.endswith((".wav", ".aiff")):
+            try:
+                os.remove(f)
+            except Exception as e:
+                print(f"[CLEANUP] Could not delete {f}: {e}")
 def ensure_runtime_audio_dir() -> str:
     os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
     return RUNTIME_AUDIO_DIR
 def _have(cmd: str) -> bool:
     return which(cmd) is not None
+def _is_valid_wav(path: str, min_duration_s: float = 0.25) -> bool:
     try:
         with wave.open(path, "rb") as w:
             frames = w.getnframes()
             rate = w.getframerate()
+            dur = (frames / float(rate)) if rate else 0.0
+            if frames <= 0 or rate <= 0 or dur < min_duration_s:
                 return False
     except Exception:
         return False
     return True
 def _tts_with_piper(text: str) -> Optional[str]:
     """
     Use local Piper if available.
+    Env:
+      - PIPER_MODEL: path to models/piper/<voice>.onnx
+      - PIPER_BIN (optional): override binary name/path (default 'piper')
     """
     model = os.getenv("PIPER_MODEL")
     if not model or not os.path.exists(model):
         return None
+    piper_bin = os.getenv("PIPER_BIN", "piper")
+    if not _have(piper_bin) and not os.path.isabs(piper_bin):
+        # If the user passed an absolute path, we try it even if not in PATH
         return None
     out_dir = ensure_runtime_audio_dir()
     out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
     safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
     try:
         p = subprocess.Popen(
+            [piper_bin, "--model", model, "--output_file", out_path],
             stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
         )
+        p.communicate(input=safe_text.encode("utf-8"), timeout=45)
         if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
+            return os.path.abspath(out_path)
     except Exception as e:
         print("[TTS] Piper error:", e)
     return None
 def _tts_with_say(text: str) -> Optional[str]:
     """
     macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
+    else returns AIFF path.
+    Env:
+      - SAY_VOICE (optional): e.g., "Samantha" / "Alex"
     """
+    if os.name != "posix" or not _have("say"):
         return None
     out_dir = ensure_runtime_audio_dir()
     aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
     wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
+    voice = os.getenv("SAY_VOICE")
     safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
     try:
+        cmd = ["say", "-o", aiff]
+        if voice:
+            cmd.extend(["-v", voice])
+        cmd.append(safe_text)
+        subprocess.run(cmd, check=True)
     except Exception as e:
         print("[TTS] say failed:", e)
         return None
     converted = False
     if which("afconvert"):
         try:
             subprocess.run(
             converted = True
         except Exception:
             converted = False
     if not converted and which("ffmpeg"):
         try:
             subprocess.run(
         except Exception:
             converted = False
     if converted and os.path.exists(wav) and _is_valid_wav(wav):
         try:
             os.remove(aiff)
         except Exception:
             pass
+        return os.path.abspath(wav)
     if os.path.exists(aiff):
+        # AIFF is fine as a fallback (Gradio can usually play it)
+        return os.path.abspath(aiff)
     return None
 def tts_synthesize(text: str) -> Optional[str]:
     """
     High-level TTS router:
       1) Piper (if configured)
       2) macOS 'say'
       3) None
+    Always writes to runtime/audio and prunes older files.
     """
     if not (text and text.strip()):
         return None
     ensure_runtime_audio_dir()
     out = _tts_with_piper(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
     out = _tts_with_say(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
     return None

requirements.txt CHANGED Viewed

@@ -1,11 +1,10 @@
-gradio
 pydantic>=2.8
 pydantic-settings>=2.5
 numpy>=1.26
 soundfile>=0.12
-webrtcvad
-faster-whisper
-llama-cpp-python==0.2.90
-pyttsx3
-openai
-huggingface_hub>=0.23

+gradio>5.0
 pydantic>=2.8
 pydantic-settings>=2.5
 numpy>=1.26
 soundfile>=0.12
+webrtcvad>=2.0.10
+faster-whisper>=1.0.0
+llama-cpp-python>=0.2.90
+pyttsx3>=2.90
+openai>=1.44.0

runtime/audio/tts_8eda72f9b61c4b13a04c70a4b1f1a997.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:359a1a8892aac21d21dae0840b05c6d72bdab4bd8a91ec41850de9deb5096bae
+size 246834

utils/__pycache__/config.cpython-312.pyc CHANGED Viewed

Binary files a/utils/__pycache__/config.cpython-312.pyc and b/utils/__pycache__/config.cpython-312.pyc differ

utils/config.py CHANGED Viewed

@@ -1,38 +1,55 @@
 from __future__ import annotations
 import os
-from pydantic_settings import BaseSettings
 from pydantic import Field
 class Settings(BaseSettings):
     BACKEND_LLM: str = Field(default="llamacpp")  # 'llamacpp' | 'openai' | 'groq'
-    LLAMACPP_MODEL_PATH: str = Field(default="models/qwen2.5-1.5b-instruct-q4_k_m.gguf")
     N_CTX: int = 4096
     N_THREADS: int = 4
     N_GPU_LAYERS: int = 0
-    ASR_DEVICE: str = "mps"  # 'mps' or 'cpu'
-    TTS_ENGINE: str = "pyttsx3"  # 'pyttsx3' | 'say' | 'piper' (later)
-    OPENAI_API_KEY: str | None = None
-    GROQ_API_KEY: str | None = None
     IS_HF_SPACE: bool = False
     DEBUG: bool = True
-    class Config:
-        env_file = ".env"
-        extra = "ignore"
     def pretty(self) -> dict:
         d = self.model_dump()
-        if d.get("OPENAI_API_KEY"):
-            d["OPENAI_API_KEY"] = True
-        if d.get("GROQ_API_KEY"):
-            d["GROQ_API_KEY"] = True
         return d
-_settings: Settings | None = None
 def get_settings() -> Settings:
     global _settings

+# utils/config.py
 from __future__ import annotations
 import os
+from typing import Optional
+from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import Field
 class Settings(BaseSettings):
+    # --- Core LLM backend ---
     BACKEND_LLM: str = Field(default="llamacpp")  # 'llamacpp' | 'openai' | 'groq'
+    LLAMACPP_MODEL_PATH: Optional[str] = Field(default=None)
+    # llama.cpp runtime knobs
     N_CTX: int = 4096
     N_THREADS: int = 4
     N_GPU_LAYERS: int = 0
+    # ASR / TTS
+    ASR_DEVICE: str = "cpu"  # 'mps' | 'cpu'
+    TTS_ENGINE: str = "pyttsx3"  # 'pyttsx3' | 'say' | 'piper'
+    # Piper specifics (optional, only used if TTS_ENGINE='piper')
+    PIPER_MODEL: Optional[str] = None           # e.g. "models/piper/en_US-amy-medium.onnx"
+    PIPER_BIN: str = "piper"                    # executable name or absolute path
+    # Where we persist session audio (created elsewhere if missing)
+    VOICE_AUDIO_DIR: str = "runtime/audio"
+    # Cloud keys (optional)
+    OPENAI_API_KEY: Optional[str] = None
+    GROQ_API_KEY: Optional[str] = None
+    # App flags
     IS_HF_SPACE: bool = False
     DEBUG: bool = True
+    CAFE_UNRELATED_LIMIT: int = 3
+    model_config = SettingsConfigDict(env_file=".env", extra="ignore")
     def pretty(self) -> dict:
         d = self.model_dump()
+        # Mask secrets
+        for k in ("OPENAI_API_KEY", "GROQ_API_KEY"):
+            if d.get(k):
+                d[k] = True
+        # Expand absolute path preview for convenience (doesn't change real value)
+        if d.get("VOICE_AUDIO_DIR"):
+            d["VOICE_AUDIO_DIR"] = os.path.abspath(d["VOICE_AUDIO_DIR"])
         return d
+_settings: Optional[Settings] = None
 def get_settings() -> Settings:
     global _settings