Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

App Files Files Community

Eyob-Sol commited on Sep 17

Commit

74bb5fe

verified ·

1 Parent(s): af7a51d

Upload 38 files

Browse files

Files changed (39) hide show

.gitattributes +2 -0
README.md +23 -6
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/__pycache__/catalog.cpython-312.pyc +0 -0
app/__pycache__/gradio_app.cpython-312.pyc +0 -0
app/__pycache__/intent_schema.cpython-312.pyc +0 -0
app/__pycache__/orchestrator.cpython-312.pyc +0 -0
app/__pycache__/policy.cpython-312.pyc +0 -0
app/__pycache__/sim_api.cpython-312.pyc +0 -0
app/__pycache__/tools.cpython-312.pyc +0 -0
app/catalog.py +110 -0
app/gradio_app.py +299 -0
app/intent_schema.py +31 -0
app/orchestrator.py +34 -0
app/policy.py +45 -0
app/sim_api.py +81 -0
app/tools.py +18 -0
data/menu_catalog.json +57 -0
models/__init__.py +0 -0
models/__pycache__/__init__.cpython-312.pyc +0 -0
models/__pycache__/asr_whisper.cpython-312.pyc +0 -0
models/__pycache__/llm_chat.cpython-312.pyc +0 -0
models/__pycache__/llm_router.cpython-312.pyc +0 -0
models/__pycache__/tts_router.cpython-312.pyc +0 -0
models/asr_whisper.py +27 -0
models/llm_chat.py +116 -0
models/llm_router.py +46 -0
models/tts_router.py +151 -0
requirements.txt +10 -0
runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav +3 -0
runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav +3 -0
utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-312.pyc +0 -0
utils/__pycache__/config.cpython-312.pyc +0 -0
utils/__pycache__/phone.cpython-312.pyc +0 -0
utils/audio.py +18 -0
utils/config.py +41 -0
utils/phone.py +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
+runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,29 @@
 ---
-title: Futurecafe Voice Core
-emoji: 💻
-colorFrom: yellow
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.46.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: FutureCafe Voice Core (Private)
+emoji: ☎️
+colorFrom: indigo
+colorTo: blue
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# FutureCafe Voice Core (Private)
+This Space runs the **full** Gradio app (voice + SMS). It’s **private** and will be called by a public wrapper Space via `gradio_client`.
+## Run
+- Uses **Piper** TTS model at `models/piper/en_US-amy-medium.onnx`
+- Uses **faster-whisper** (tiny) for ASR
+### Environment variables (set in Space → Settings → Secrets)
+- `BACKEND_LLM=openai` (or `groq`)
+- If `openai`: `OPENAI_API_KEY=<your-key>`
+- If `groq`: `GROQ_API_KEY=<your-key>`
+- `TTS_ENGINE=piper`
+- `PIPER_MODEL=models/piper/en_US-amy-medium.onnx`
+This project writes generated audio files into `runtime/audio`.

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (183 Bytes). View file

app/__pycache__/catalog.cpython-312.pyc ADDED Viewed

Binary file (5.51 kB). View file

app/__pycache__/gradio_app.cpython-312.pyc ADDED Viewed

Binary file (10.7 kB). View file

app/__pycache__/intent_schema.cpython-312.pyc ADDED Viewed

Binary file (2.18 kB). View file

app/__pycache__/orchestrator.cpython-312.pyc ADDED Viewed

Binary file (1.51 kB). View file

app/__pycache__/policy.cpython-312.pyc ADDED Viewed

Binary file (2.53 kB). View file

app/__pycache__/sim_api.cpython-312.pyc ADDED Viewed

Binary file (3.78 kB). View file

app/__pycache__/tools.cpython-312.pyc ADDED Viewed

Binary file (1.4 kB). View file

app/catalog.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# app/catalog.py
+from __future__ import annotations
+import json, os
+from typing import Dict, Any, List, Optional
+_CATALOG: Dict[str, Any] | None = None
+def get_catalog_path() -> str:
+    here = os.path.dirname(os.path.abspath(__file__))
+    root = os.path.dirname(here)
+    return os.path.join(root, "data", "menu_catalog.json")
+def load_catalog() -> Dict[str, Any]:
+    global _CATALOG
+    if _CATALOG is not None:
+        return _CATALOG
+    path = get_catalog_path()
+    with open(path, "r", encoding="utf-8") as f:
+        _CATALOG = json.load(f)
+    return _CATALOG
+def find_item_by_name(name: str) -> Optional[Dict[str, Any]]:
+    c = load_catalog()
+    name_l = (name or "").strip().lower()
+    for it in c["items"]:
+        if it["name"].lower() == name_l:
+            return it
+        # lightweight alias match
+        if name_l in it["name"].lower():
+            return it
+    return None
+def find_item_by_sku(sku: str) -> Optional[Dict[str, Any]]:
+    c = load_catalog()
+    for it in c["items"]:
+        if it["sku"] == sku:
+            return it
+    return None
+def required_fields_for_category(category: str) -> List[str]:
+    c = load_catalog()
+    schema = c["schema"].get(category) or {}
+    return list(schema.get("required_fields") or [])
+def optional_fields_for_category(category: str) -> List[str]:
+    c = load_catalog()
+    schema = c["schema"].get(category) or {}
+    return list(schema.get("optional_fields") or [])
+def compute_missing_fields(order_item: Dict[str, Any]) -> List[str]:
+    """
+    order_item: {"name": "...", "sku": optional, "qty": int, "<opts>": ...}
+    Uses catalog schema to see which fields are missing.
+    """
+    it = None
+    if "sku" in order_item:
+        it = find_item_by_sku(order_item["sku"])
+    if not it and "name" in order_item:
+        it = find_item_by_name(order_item["name"])
+    if not it:
+        return ["name"]  # we don’t even know the item yet
+    category = it["category"]
+    req = set(required_fields_for_category(category))
+    present = set([k for k in order_item.keys() if k in req or k == "qty" or k == "name" or k == "sku"])
+    # qty normalization: consider qty present if >=1
+    if "qty" in req and (order_item.get("qty") is None or int(order_item.get("qty", 0)) < 1):
+        # keep qty “missing”
+        pass
+    else:
+        present.add("qty")
+    missing = [f for f in req if f not in present]
+    return missing
+def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
+    it = None
+    if "sku" in order_item:
+        it = find_item_by_sku(order_item["sku"])
+    if not it and "name" in order_item:
+        it = find_item_by_name(order_item["name"])
+    if not it:
+        return "Which item would you like to order?"
+    category = it["category"]
+    req = required_fields_for_category(category)
+    opt = optional_fields_for_category(category)
+    parts = []
+    opt_txt = ""
+    if opt:
+        opt_txt = f" Optional: {', '.join(opt)}."
+    if req:
+        parts.append(f"I need {', '.join(req)} for {it['name']}.{opt_txt}")
+    else:
+        parts.append(f"Please specify quantity for {it['name']}.{opt_txt}")
+    # Also list choices for required options
+    # e.g., size choices
+    opts = it.get("options") or {}
+    choice_bits = []
+    for k, spec in opts.items():
+        if spec.get("required"):
+            choices = spec.get("choices") or []
+            if choices:
+                choice_bits.append(f"{k}: {', '.join(choices)}")
+    if choice_bits:
+        parts.append("Choices → " + " | ".join(choice_bits))
+    return " ".join(parts)

app/gradio_app.py ADDED Viewed

	@@ -0,0 +1,299 @@

+# app/gradio_app.py
+from __future__ import annotations
+import os
+import time
+import shutil
+import uuid
+from typing import List, Dict, Any, Tuple
+import gradio as gr
+# ---- External modules we rely on (light, stable) ----
+# - ASR: faster-whisper wrapper you already have
+# - TTS: local piper/ say via models/tts_router.py
+# - LLM: optional local model; if missing, we fallback to a safe canned reply
+try:
+    from models.asr_whisper import get_asr
+except Exception:
+    get_asr = None
+try:
+    from models.llm_chat import respond_chat as llm_respond_chat
+except Exception:
+    llm_respond_chat = None
+from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
+# =============================================================================
+# Helpers (pure, modular)
+# =============================================================================
+def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
+    """
+    Ask the chat LLM for a response. If it's not available, use a reasonable fallback.
+    """
+    if llm_respond_chat is not None:
+        try:
+            # policy guard is optional; pass an empty dict
+            bot_text, _guard, _diag = llm_respond_chat(history or [], user_text, {})
+            if isinstance(bot_text, str) and bot_text.strip():
+                return bot_text.strip()
+        except Exception as e:
+            print("[LLM] fallback due to error:", e)
+    # Fallback (LLM unavailable or failed)
+    return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
+def _asr_transcribe(aud_path: str) -> str:
+    """
+    Transcribe audio to text. If ASR is unavailable, return a safe message.
+    """
+    if not aud_path:
+        return "(no audio)"
+    if get_asr is None:
+        return "(ASR unavailable)"
+    try:
+        asr = get_asr()
+        out = asr.transcribe(aud_path)
+        return (out.get("text") or "").strip() or "(no speech detected)"
+    except Exception as e:
+        print("[ASR] error:", e)
+        return "(transcription failed)"
+def _tts_from_text(text: str) -> str | None:
+    """
+    Synthesize assistant text to a WAV in runtime/audio.
+    Returns a file path or None.
+    """
+    if not (text and text.strip()):
+        return None
+    path = tts_synthesize(text.strip())
+    if path and os.path.exists(path):
+        return path
+    # always attempt one more minimal fallback to avoid empty path
+    return tts_synthesize("How can I help with FutureCafe?")
+def _append_chat(history: List[Dict[str, str]] | None,
+                 role: str, content: str) -> List[Dict[str, str]]:
+    hist = list(history or [])
+    hist.append({"role": role, "content": content})
+    return hist
+def _startup_clean_runtime_audio():
+    """
+    On app start, clean previous session audio artifacts.
+    """
+    audio_dir = ensure_runtime_audio_dir()
+    try:
+        for name in os.listdir(audio_dir):
+            p = os.path.join(audio_dir, name)
+            if os.path.isfile(p):
+                os.remove(p)
+    except Exception as e:
+        print("[RUNTIME] Cannot clean runtime/audio:", e)
+# =============================================================================
+# Voice handlers (modular)
+# =============================================================================
+def handle_voice_turn(
+    user_audio_path: str,
+    voice_history: List[Dict[str, str]] | None
+) -> Tuple[List[Dict[str, str]], str | None, Dict[str, Any]]:
+    """
+    Single voice turn:
+      1) Transcribe user audio
+      2) Ask LLM for a reply (text)
+      3) TTS the reply to a WAV
+      4) Append both transcript and assistant text to the voice chat history
+    Returns: (new_voice_history, assistant_audio_path, diag_json)
+    """
+    t0 = time.time()
+    transcript = _asr_transcribe(user_audio_path)
+    hist1 = _append_chat(voice_history, "user", transcript)
+    bot_text = _safe_llm_reply(hist1, transcript)
+    hist2 = _append_chat(hist1, "assistant", bot_text)
+    tts_path = _tts_from_text(bot_text)
+    diag = {
+        "intent": None,
+        "slots": {},
+        "tool_selected": None,
+        "tool_result": {
+            "transcript": transcript,
+            "llm_response": bot_text
+        },
+        "latency_ms": int((time.time() - t0) * 1000),
+    }
+    return hist2, tts_path, diag
+# =============================================================================
+# Text handlers (modular)
+# =============================================================================
+def handle_text_turn(
+    user_text: str,
+    chat_history: List[Dict[str, str]] | None
+) -> Tuple[List[Dict[str, str]], Dict[str, Any], str]:
+    """
+    Single text turn:
+      1) Append user text
+      2) Ask LLM for a reply
+      3) Append assistant text
+      4) Prepare diagnostics
+    Returns: (new_chat_history, diag_json, clear_text_value)
+    """
+    t0 = time.time()
+    user_text = (user_text or "").strip()
+    if not user_text:
+        return (chat_history or []), {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}, ""
+    hist1 = _append_chat(chat_history, "user", user_text)
+    bot_text = _safe_llm_reply(hist1, user_text)
+    hist2 = _append_chat(hist1, "assistant", bot_text)
+    diag = {
+        "intent": None,
+        "slots": {},
+        "tool_selected": None,
+        "tool_result": {"user": user_text, "llm_response": bot_text},
+        "latency_ms": int((time.time() - t0) * 1000),
+    }
+    return hist2, diag, ""
+# =============================================================================
+# Fixed UI (as requested) + wiring
+# =============================================================================
+def build_demo():
+    """
+    Fixed UI layout:
+      LEFT (Voice Call):
+        - voice_in (mic recorder)
+        - assistant_audio (autoplay)
+        - voice_chat (transcript chat)
+        - call_diag (JSON)
+      RIGHT (SMS/Chat):
+        - chat_box
+        - text_in (enter to send)
+        - chat_diag (JSON)
+    """
+    _startup_clean_runtime_audio()
+    with gr.Blocks(title="FutureCafe Call/SMS Agent (MVP)") as demo:
+        gr.Markdown("### ☎️ FutureCafe AI Agent (MVP)\n**Call (voice)** on the left · **SMS/Chat** on the right")
+        # States
+        voice_state = gr.State([])   # list of {"role","content"} for voice transcript chat
+        chat_state  = gr.State([])   # list of {"role","content"} for SMS chat
+        with gr.Row():
+            # ---------------- LEFT: VOICE ----------------
+            with gr.Column(scale=1, min_width=430):
+                gr.Markdown("#### 📞 Voice Call")
+                voice_in = gr.Audio(
+                    label="Press Record → Speak → Stop (auto-sends)",
+                    sources=["microphone"],
+                    type="filepath",
+                    format="wav",
+                    interactive=True,
+                    editable=False,
+                    waveform_options={"show_recording_waveform": True},
+                )
+                assistant_audio = gr.Audio(
+                    label="Assistant Response (auto-play)",
+                    autoplay=True,
+                    type="filepath",
+                    interactive=False
+                )
+                voice_chat = gr.Chatbot(value=[], type="messages", height=220, label="Voice Chat (transcripts)")
+                call_diag = gr.JSON(
+                    value={"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0},
+                    label="Voice Diagnostics"
+                )
+            # ---------------- RIGHT: SMS / CHAT ----------------
+            with gr.Column(scale=1, min_width=430):
+                gr.Markdown("#### 💬 SMS / Chat")
+                chat_box = gr.Chatbot(value=[], type="messages", height=360, label=None)
+                text_in = gr.Textbox(
+                    placeholder="Type here… e.g., “Any vegan pizzas?”, “Book a table for 2 at 7.” (Enter to send)",
+                    label=None, lines=1
+                )
+                chat_diag = gr.JSON(
+                    value={"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0},
+                    label="Chat Diagnostics"
+                )
+        # ---------- Handlers (thin wrappers that call modular functions) ----------
+        def _clear_recorder():
+            # Only clears the recorder input; leaves assistant audio + transcripts intact
+            return gr.update(value=None, interactive=True)
+        def on_voice_change(aud_path: str | None, vhist: List[Dict[str, str]]):
+            if not aud_path:
+                # no audio; keep everything as-is
+                return vhist or [], None, {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}
+            new_vhist, tts_path, diag = handle_voice_turn(aud_path, vhist or [])
+            return new_vhist, tts_path, diag
+        def on_text_send(txt: str, hist: List[Dict[str, str]]):
+            new_hist, diag, clear_text = handle_text_turn(txt, hist or [])
+            return new_hist, diag, clear_text
+        # ---------- Wiring ----------
+        # Voice lane: update (voice_chat, assistant_audio, call_diag), do NOT clear recorder to keep it stable for now
+        # Try to fire on explicit Stop; fall back to generic change if not supported
+        rec_event = getattr(voice_in, "stop_recording", None)
+        if callable(rec_event):
+            rec_event(
+                on_voice_change,
+                inputs=[voice_in, voice_state],
+                outputs=[voice_chat, assistant_audio, call_diag],
+            ).then(
+                _clear_recorder,          # runs AFTER outputs are set → autoplay isn’t interrupted
+                inputs=None,
+                outputs=[voice_in],
+            )
+        else:
+            voice_in.change(
+                on_voice_change,
+                inputs=[voice_in, voice_state],
+                outputs=[voice_chat, assistant_audio, call_diag],
+            ).then(
+                _clear_recorder,
+                inputs=None,
+                outputs=[voice_in],
+            )
+        # Keep voice_state in sync with what's shown in voice_chat (unchanged)
+        voice_chat.change(lambda x: x, inputs=[voice_chat], outputs=[voice_state])
+        # Text lane: Enter to send
+        text_in.submit(
+            on_text_send,
+            inputs=[text_in, chat_state],
+            outputs=[chat_box, chat_diag, text_in],
+        )
+        # Keep chat_state in sync with what's shown in chat_box
+        chat_box.change(lambda x: x, inputs=[chat_box], outputs=[chat_state])
+    return demo

app/intent_schema.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# app/intent_schema.py
+from __future__ import annotations
+from typing import List, Optional, Literal
+from pydantic import BaseModel, Field
+IntentName = Literal["reservation.create", "order.create", "hours.get", "menu.search", "smalltalk", "other"]
+class ReservationSlots(BaseModel):
+    name: Optional[str] = None
+    party_size: Optional[int] = Field(default=None, ge=1, le=20)
+    date: Optional[str] = None          # ISO preferred (YYYY-MM-DD) or “today”
+    time: Optional[str] = None          # “19:00” or “7 pm”
+    phone: Optional[str] = None
+class OrderItem(BaseModel):
+    name: str
+    qty: int = Field(default=1, ge=1)
+class OrderSlots(BaseModel):
+    items: List[OrderItem] = Field(default_factory=list)
+    notes: Optional[str] = None
+class MenuSlots(BaseModel):
+    query: Optional[str] = None
+    dietary: List[str] = Field(default_factory=list)  # e.g., ["vegan","gluten-free"]
+class IntentEnvelope(BaseModel):
+    intent: IntentName
+    need_more_info: bool = False
+    ask_user: Optional[str] = None   # a single, polite follow-up question if info missing
+    slots: dict = Field(default_factory=dict)   # raw dict; we’ll validate by intent

app/orchestrator.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from typing import Dict, Any
+from models.llm_router import respond as route_fn, nlg
+from app.tools import dispatch_tool
+from utils.phone import extract_phone, looks_valid
+def llm_route_and_execute(user_text: str) -> Dict[str, Any]:
+    route = route_fn(user_text)  # {"tool": "get_hours"|..., "args": {...}}
+    tool = route.get("tool")
+    args = route.get("args") or {}
+    # enrich reservation with phone if present in the text
+    if tool == "create_reservation":
+        phone = extract_phone(user_text)
+        if looks_valid(phone):
+            args["phone"] = phone
+        if not args.get("name"):
+            # naive default name if user included "my name is ..."
+            import re
+            m = re.search(r"(?:my name is|i am|i'm)\s+([A-Z][a-z]+)", user_text, re.I)
+            if m: args["name"] = m.group(1)
+    tool_result = None
+    if tool:
+        tool_result = dispatch_tool(tool, args)
+    reply = nlg(tool or "", tool_result or {}, user_text)
+    return {
+        "intent": tool or ("smalltalk" if not tool else tool),
+        "slots": args,
+        "tool_selected": tool,
+        "tool_result": tool_result,
+        "response": reply,
+    }

app/policy.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# app/policy.py
+from __future__ import annotations
+import os, re
+# --- Topic detection (very lightweight, fast) ---
+CAFE_KEYWORDS = [
+    "menu","order","item","dish","pizza","burger","salad","pasta","vegan","gluten",
+    "price","special","deal","offer","hours","open","close","time","location","address",
+    "book","reserve","reservation","table","party","pickup","delivery","takeout","payment",
+    "futurecafe","future cafe","future-cafe","café","coffee","drinks","beverage","side"
+]
+_kw_re = re.compile(r"|".join([re.escape(k) for k in CAFE_KEYWORDS]), re.I)
+SMALLTALK = r"\b(hi|hello|hey|good\s+(morning|afternoon|evening)|thanks|thank you|bye|goodbye)\b"
+_smalltalk_re = re.compile(SMALLTALK, re.I)
+def is_cafe_topic(text: str) -> bool:
+    return bool(text and _kw_re.search(text))
+def is_smalltalk(text: str) -> bool:
+    return bool(text and _smalltalk_re.search(text))
+def unrelated_limit() -> int:
+    """How many off-topic turns allowed before ending."""
+    try:
+        n = int(os.getenv("CAFE_UNRELATED_LIMIT", "3"))
+        return max(1, min(5, n))
+    except Exception:
+        return 3
+# --- Messages ---
+POLITE_REFUSAL = (
+    "I’m here to help with FutureCafe—menu, hours, reservations, and orders. "
+    "Could you ask something about the restaurant?"
+)
+POLITE_REFUSAL_2 = (
+    "To keep things focused, I can only help with FutureCafe. "
+    "Ask me about our menu, hours, or booking a table."
+)
+def end_message() -> str:
+    return ("I’m only able to help with FutureCafe topics. "
+            "Let’s end this chat for now. If you need menu, hours, or reservations, "
+            "message me again anytime.")

app/sim_api.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# app/sim_api.py
+from __future__ import annotations
+from typing import Dict, Any, List, Tuple
+from app.catalog import load_catalog, find_item_by_name, find_item_by_sku
+def _pick_item(order_it: Dict[str, Any]) -> Dict[str, Any] | None:
+    it = None
+    if "sku" in order_it:
+        it = find_item_by_sku(order_it["sku"])
+    if not it and "name" in order_it:
+        it = find_item_by_name(order_it["name"])
+    return it
+def check_item_availability(order_it: Dict[str, Any]) -> Tuple[bool, Dict[str, Any]]:
+    """
+    Returns (is_available, info)
+    info contains { "reason": "...", "alternatives": [...] } when not available
+    For size-based items, verify stock for requested size.
+    """
+    it = _pick_item(order_it)
+    if not it:
+        return False, {"reason": "unknown_item", "alternatives": []}
+    qty = int(order_it.get("qty", 0) or 0)
+    if qty < 1:
+        return False, {"reason": "qty_missing", "alternatives": []}
+    # size key heuristics
+    size = order_it.get("size")
+    stock_map = it.get("stock") or {}
+    if "one_size" in stock_map:
+        avail = stock_map["one_size"]
+        if avail >= qty:
+            return True, {"price_each": (it.get("price") or {}).get("one_size", 0.0)}
+        else:
+            return False, {"reason": "insufficient_stock", "have": avail, "alternatives": []}
+    if size:
+        have = int(stock_map.get(size, 0))
+        if have >= qty:
+            return True, {"price_each": (it.get("price") or {}).get(size, 0.0)}
+        else:
+            # propose other sizes with stock
+            alts = []
+            for s, have_s in stock_map.items():
+                if have_s >= qty:
+                    alts.append({"size": s, "have": have_s, "price_each": (it.get("price") or {}).get(s, 0.0)})
+            return False, {"reason": "size_out_of_stock", "have": have, "alternatives": alts}
+    else:
+        # missing required option — let schema enforcement ask; but if user skipped, treat as not available
+        return False, {"reason": "size_missing", "alternatives": [{"hint": "provide size"}]}
+def place_order(order_items: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Verifies each item and (if all available) returns summary.
+    We do not mutate stock here (sim).
+    """
+    ok = True
+    lines = []
+    total = 0.0
+    for it in order_items:
+        item_def = _pick_item(it)
+        if not item_def:
+            return {"ok": False, "reason": "unknown_item", "item": it}
+        avail, info = check_item_availability(it)
+        if not avail:
+            return {"ok": False, "reason": info.get("reason"), "item": it, "alternatives": info.get("alternatives", [])}
+        qty = int(it["qty"])
+        unit = info.get("price_each", 0.0)
+        line_total = unit * qty
+        total += line_total
+        lines.append({
+            "sku": item_def["sku"],
+            "name": item_def["name"],
+            "qty": qty,
+            "options": {k: v for k, v in it.items() if k not in ("name","sku","qty")},
+            "unit": unit,
+            "line_total": line_total
+        })
+    return {"ok": True, "total": round(total, 2), "lines": lines}

app/tools.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from typing import Any, Dict
+from mock_api import service as svc
+def dispatch_tool(tool: str, args: Dict[str, Any]) -> Dict[str, Any]:
+    if tool == "get_hours":
+        return svc.get_hours()
+    if tool == "menu_lookup":
+        return {"items": svc.menu_lookup(args.get("filters") or [])}
+    if tool == "create_reservation":
+        return svc.create_reservation(
+            name=args.get("name") or "Guest",
+            phone=args.get("phone"),
+            party_size=int(args.get("party_size") or 2),
+            datetime_str=args.get("datetime_str") or "",
+        )
+    if tool == "create_order":
+        return svc.create_order(args.get("items") or [])
+    raise ValueError(f"unknown tool: {tool}")

data/menu_catalog.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "items": [
+      {
+        "sku": "pizza.margherita",
+        "name": "Margherita Pizza",
+        "category": "pizza",
+        "options": {
+          "size": { "required": true, "choices": ["small", "medium", "large"] },
+          "crust": { "required": false, "choices": ["thin", "regular"] },
+          "toppings": { "required": false, "choices": ["extra cheese", "basil", "olives"], "multi": true }
+        },
+        "price": { "small": 9.0, "medium": 12.0, "large": 14.0 },
+        "stock": { "small": 10, "medium": 6, "large": 0 },
+        "tags": ["vegetarian"]
+      },
+      {
+        "sku": "pizza.pepperoni",
+        "name": "Pepperoni Pizza",
+        "category": "pizza",
+        "options": {
+          "size": { "required": true, "choices": ["small", "medium", "large"] },
+          "crust": { "required": false, "choices": ["thin", "regular"] },
+          "toppings": { "required": false, "choices": ["extra cheese", "jalapeno"], "multi": true }
+        },
+        "price": { "small": 10.0, "medium": 13.5, "large": 15.5 },
+        "stock": { "small": 3, "medium": 0, "large": 2 },
+        "tags": []
+      },
+      {
+        "sku": "salad.house",
+        "name": "House Salad",
+        "category": "salad",
+        "options": {
+          "dressing": { "required": false, "choices": ["vinaigrette", "ranch", "no dressing"] }
+        },
+        "price": { "one_size": 7.5 },
+        "stock": { "one_size": 15 },
+        "tags": ["vegetarian", "vegan"]
+      },
+      {
+        "sku": "drink.cola",
+        "name": "Cola",
+        "category": "drink",
+        "options": {
+          "size": { "required": true, "choices": ["can", "bottle"] }
+        },
+        "price": { "can": 2.0, "bottle": 3.5 },
+        "stock": { "can": 20, "bottle": 4 },
+        "tags": []
+      }
+    ],
+    "schema": {
+      "pizza": { "required_fields": ["size", "qty"], "optional_fields": ["crust", "toppings"] },
+      "salad": { "required_fields": ["qty"], "optional_fields": ["dressing"] },
+      "drink": { "required_fields": ["size", "qty"], "optional_fields": [] }
+    }
+  }

models/__init__.py ADDED Viewed

File without changes

models/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (186 Bytes). View file

models/__pycache__/asr_whisper.cpython-312.pyc ADDED Viewed

Binary file (1.99 kB). View file

models/__pycache__/llm_chat.cpython-312.pyc ADDED Viewed

Binary file (4.85 kB). View file

models/__pycache__/llm_router.cpython-312.pyc ADDED Viewed

Binary file (4.08 kB). View file

models/__pycache__/tts_router.cpython-312.pyc ADDED Viewed

Binary file (6.28 kB). View file

models/asr_whisper.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# models/asr_whisper.py
+from faster_whisper import WhisperModel
+from utils.config import get_settings
+_asr_singleton = None
+class WhisperASR:
+    def __init__(self):
+        s = get_settings()
+        # faster-whisper supports: 'cpu' or 'cuda' (no 'mps')
+        requested = (s.ASR_DEVICE or "cpu").lower()
+        device = "cpu" if requested not in ("cpu", "cuda") else requested
+        if requested == "mps":
+            print("[ASR] 'mps' not supported by faster-whisper; falling back to CPU.")
+        compute_type = "int8" if device == "cpu" else "float16"
+        self.model = WhisperModel("tiny", device=device, compute_type=compute_type)
+    def transcribe(self, path: str) -> dict:
+        segments, info = self.model.transcribe(path, beam_size=1, language="en")
+        text = " ".join(seg.text.strip() for seg in segments)
+        return {"text": text, "language": info.language, "segments": []}
+def get_asr():
+    global _asr_singleton
+    if _asr_singleton is None:
+        _asr_singleton = WhisperASR()
+    return _asr_singleton

models/llm_chat.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# models/llm_chat.py
+from __future__ import annotations
+from typing import List, Dict, Any, Tuple
+import os
+from utils.config import get_settings
+# --- Small, readable menu JSON kept in the system prompt for now ---
+MENU_JSON = """
+{
+  "pizzas": [
+    {"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}},
+    {"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}}
+  ],
+  "salads": [
+    {"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}}
+  ],
+  "drinks": [
+    {"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}}
+  ],
+  "hours": "11:00–22:00 daily",
+  "address": "123 Main St",
+  "phone": "+1 (555) 010-0000"
+}
+"""
+SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.
+You talk naturally and help with:
+- Menu questions, placing orders, hours/location, and reservations (lightweight).
+- If the user asks for pizza/order: list choices from the MENU and ask for missing details (size, quantity, etc.).
+- If user provides all details, confirm the order in words (no need to return JSON), include a brief total using MENU prices.
+- For hours/location, reply from MENU.
+- For unrelated topics, gently steer back to FutureCafe; if the user remains off-topic for 3 turns total, politely end.
+- Keep replies concise and friendly. No long explanations.
+MENU (JSON you can read from for options & prices):
+{MENU_JSON}
+"""
+# ---------------- llama.cpp singleton ----------------
+_llm = None
+def _get_local_llm():
+    """Singleton llama.cpp model loader (GGUF)."""
+    global _llm
+    if _llm is not None:
+        return _llm
+    from llama_cpp import Llama
+    s = get_settings()
+    model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None))
+    if not model_path or not os.path.exists(model_path):
+        raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
+    _llm = Llama(
+        model_path=model_path,
+        n_ctx=2048,
+        n_threads=os.cpu_count() or 4,
+        n_gpu_layers=0,  # CPU by default
+        verbose=False,
+    )
+    return _llm
+def _apply_chat_template(messages: List[Dict[str, str]]) -> str:
+    parts = []
+    for m in messages:
+        role = m.get("role", "user")
+        content = m.get("content", "")
+        if role == "system":
+            parts.append(f"<|system|>\n{content}\n")
+        elif role == "user":
+            parts.append(f"<|user|>\n{content}\n")
+        else:
+            parts.append(f"<|assistant|>\n{content}\n")
+    parts.append("<|assistant|>\n")
+    return "\n".join(parts)
+def _generate(messages: List[Dict[str, str]], temperature=0.3, max_tokens=320) -> str:
+    llm = _get_local_llm()
+    prompt = _apply_chat_template(messages)
+    out = llm(
+        prompt,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=0.9,
+        repeat_penalty=1.1,
+        stop=["<|user|>", "<|system|>", "<|assistant|>"],
+    )
+    return (out["choices"][0]["text"] or "").strip()
+def respond_chat(
+    history: List[Dict[str, str]],
+    user_text: str,
+    guard_state: Dict[str, Any] | None,
+) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
+    """
+    LLM-only conversational brain.
+    Returns: (assistant_text, new_guard_state, diag)
+    guard_state: {"unrelated": int, "ended": int, "limit": int}
+    """
+    guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
+    if guard.get("ended"):
+        return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
+    msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
+    if history:
+        msgs.extend(history[-10:])
+    msgs.append({"role": "user", "content": user_text})
+    reply = _generate(msgs)
+    # A super-light off-topic guard without keywords: If the model signals ending, we respect it.
+    # Otherwise, keep conversation flowing; we do not hard-code keywords or intents here.
+    # (We still maintain the 'unrelated' counter if you later want to nudge based on signals.)
+    if "Let’s end" in reply or "Let's end" in reply:
+        guard["ended"] = 1
+    return reply, guard, {}  # no tool_result/diagnostics needed for this simpler flow

models/llm_router.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from utils.config import get_settings
+def small_router(text: str) -> dict:
+    t = (text or "").lower()
+    if any(k in t for k in ["hour", "open", "close", "address", "location"]):
+        return {"tool": "get_hours", "args": {}}
+    if any(k in t for k in ["menu", "vegan", "gluten", "pizza", "salad", "special"]):
+        flt = []
+        for k in ["vegan","gluten-free","pizza","salad"]:
+            if k in t: flt.append(k)
+        return {"tool": "menu_lookup", "args": {"filters": flt}}
+    if any(k in t for k in ["reserve","reservation","book","table"]):
+        # naive hints
+        party = 2 if ("2" in t or "two" in t) else None
+        time = "19:00" if "7" in t else None
+        return {"tool": "create_reservation", "args": {"party_size": party, "datetime_str": time}}
+    if any(k in t for k in ["order","buy"]):
+        return {"tool": "create_order", "args": {"items": []}}
+    return {"tool": None, "args": {}}
+def nlg(intent: str, tool_result: dict, user_text: str) -> str:
+    if intent == "get_hours":
+        h = tool_result
+        return f"We’re open {h['open']}–{h['close']} daily at {h['address']}."
+    if intent == "menu_lookup":
+        items = (tool_result or {}).get("items") or []
+        if not items:
+            return "We have a variety of options—anything specific you’d like?"
+        tops = ", ".join(f"{it['name']} (${it['price']})" for it in items[:3])
+        return f"Popular picks: {tops}."
+    if intent == "create_reservation":
+        if tool_result.get("ok"):
+            return f"Reservation confirmed for {tool_result['party_size']} at {tool_result['when']}. Code {tool_result['reservation_id']}."
+        return "I couldn't confirm that reservation—want me to try again?"
+    if intent == "create_order":
+        if tool_result.get("ok"):
+            items = ", ".join(f"{it['qty']}× {it['name']}" for it in tool_result.get("items", []))
+            return f"Got it: {items}. Total ${tool_result.get('total', 0)}."
+        return "I couldn't place that order—want me to try again?"
+    # small talk
+    return "Hello, this is Marta, an AI agent for FutureCafe. How can I help you today?"
+def respond(user_text: str) -> dict:
+    # MVP: use rule-based router; later swap to real LLM function-calling
+    route = small_router(user_text)
+    return route

models/tts_router.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# models/tts_router.py
+from __future__ import annotations
+import os
+import re
+import uuid
+import wave
+import shutil
+import subprocess
+from shutil import which
+from typing import Optional
+RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
+def ensure_runtime_audio_dir() -> str:
+    os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
+    return RUNTIME_AUDIO_DIR
+def _have(cmd: str) -> bool:
+    return which(cmd) is not None
+def _is_valid_wav(path: str) -> bool:
+    try:
+        with wave.open(path, "rb") as w:
+            frames = w.getnframes()
+            rate = w.getframerate()
+            if frames <= 0 or rate <= 0:
+                return False
+    except Exception:
+        return False
+    return True
+def _tts_with_piper(text: str) -> Optional[str]:
+    """
+    Use local Piper if available.
+    Requires:
+      - env PIPER_MODEL to point to models/piper/<voice>.onnx
+      - `piper` binary in PATH (brew install piper or from releases)
+    """
+    model = os.getenv("PIPER_MODEL")
+    if not model or not os.path.exists(model):
+        return None
+    if not _have("piper"):
+        return None
+    out_dir = ensure_runtime_audio_dir()
+    out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
+    # Avoid stray control chars that can confuse some engines
+    safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
+    try:
+        # Simple one-shot pipe
+        p = subprocess.Popen(
+            ["piper", "--model", model, "--output_file", out_path],
+            stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+        )
+        p.communicate(input=safe_text.encode("utf-8"), timeout=30)
+        if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
+            return out_path
+    except Exception as e:
+        print("[TTS] Piper error:", e)
+    return None
+def _tts_with_say(text: str) -> Optional[str]:
+    """
+    macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
+    else writes AIFF and returns it if WAV conversion fails.
+    """
+    if os.name != "posix":
+        return None
+    if not _have("say"):
+        return None
+    out_dir = ensure_runtime_audio_dir()
+    aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
+    wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
+    safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
+    try:
+        # Basic AIFF
+        subprocess.run(["say", "-o", aiff, safe_text], check=True)
+    except Exception as e:
+        print("[TTS] say failed:", e)
+        return None
+    converted = False
+    # Prefer afconvert
+    if which("afconvert"):
+        try:
+            subprocess.run(
+                ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
+                check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+            )
+            converted = True
+        except Exception:
+            converted = False
+    # Else try ffmpeg
+    if not converted and which("ffmpeg"):
+        try:
+            subprocess.run(
+                ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
+                check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+            )
+            converted = True
+        except Exception:
+            converted = False
+    # Cleanup/return best
+    if converted and os.path.exists(wav) and _is_valid_wav(wav):
+        try:
+            os.remove(aiff)
+        except Exception:
+            pass
+        return wav
+    # Fallback: return AIFF if WAV conversion failed but aiff exists
+    if os.path.exists(aiff):
+        return aiff
+    return None
+def tts_synthesize(text: str) -> Optional[str]:
+    """
+    High-level TTS router:
+      1) Piper (if configured)
+      2) macOS 'say'
+      3) None
+    Always writes to runtime/audio.
+    """
+    if not (text and text.strip()):
+        return None
+    ensure_runtime_audio_dir()
+    # 1) Piper
+    out = _tts_with_piper(text)
+    if out:
+        return out
+    # 2) macOS say
+    out = _tts_with_say(text)
+    if out:
+        return out
+    return None

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+gradio>5.0
+pydantic>=2.8
+pydantic-settings>=2.5
+numpy>=1.26
+soundfile>=0.12
+webrtcvad>=2.0.10
+faster-whisper>=1.0.0
+llama-cpp-python>=0.2.90
+pyttsx3>=2.90
+openai>=1.44.0

runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e96d0bc6697344d111427e3900cb71d28c54c5ff4fcc52b45819fa49da0b2f6c
+size 370708

runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e3f8ea05d78887dd73e67e846efa7ff7f3afb9ba15a9c61dac69ed62f075025
+size 216064

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (185 Bytes). View file

utils/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (1.97 kB). View file

utils/__pycache__/phone.cpython-312.pyc ADDED Viewed

Binary file (1.12 kB). View file

utils/audio.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import soundfile as sf
+import numpy as np
+def load_audio_mono_16k(path: str):
+    wav, sr = sf.read(path, dtype="float32", always_2d=False)
+    if wav.ndim == 2:
+        wav = wav.mean(axis=1)
+    if sr != 16000:
+        # lightweight resample
+        import numpy as np
+        import math
+        ratio = 16000 / sr
+        n = int(math.floor(len(wav) * ratio))
+        x_old = np.linspace(0, 1, len(wav), endpoint=False)
+        x_new = np.linspace(0, 1, n, endpoint=False)
+        wav = np.interp(x_new, x_old, wav).astype("float32")
+        sr = 16000
+    return wav, sr

utils/config.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from __future__ import annotations
+import os
+from pydantic_settings import BaseSettings
+from pydantic import Field
+class Settings(BaseSettings):
+    BACKEND_LLM: str = Field(default="llamacpp")  # 'llamacpp' | 'openai' | 'groq'
+    LLAMACPP_MODEL_PATH: str = Field(default="models/qwen2.5-1.5b-instruct-q4_k_m.gguf")
+    N_CTX: int = 4096
+    N_THREADS: int = 4
+    N_GPU_LAYERS: int = 0
+    ASR_DEVICE: str = "mps"  # 'mps' or 'cpu'
+    TTS_ENGINE: str = "pyttsx3"  # 'pyttsx3' | 'say' | 'piper' (later)
+    OPENAI_API_KEY: str | None = None
+    GROQ_API_KEY: str | None = None
+    IS_HF_SPACE: bool = False
+    DEBUG: bool = True
+    class Config:
+        env_file = ".env"
+        extra = "ignore"
+    def pretty(self) -> dict:
+        d = self.model_dump()
+        if d.get("OPENAI_API_KEY"):
+            d["OPENAI_API_KEY"] = True
+        if d.get("GROQ_API_KEY"):
+            d["GROQ_API_KEY"] = True
+        return d
+_settings: Settings | None = None
+def get_settings() -> Settings:
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings

utils/phone.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import re
+def extract_phone(text: str) -> str | None:
+    if not text:
+        return None
+    m = re.search(r"(\+?\d[\d\-\s]{8,}\d)", text)
+    return m.group(1).replace(" ", "") if m else None
+def looks_valid(phone: str | None) -> bool:
+    if not phone: return False
+    digits = "".join(ch for ch in phone if ch.isdigit())
+    return len(digits) >= 10