Spaces:
Sleeping
Sleeping
Upload 38 files
Browse files- .gitattributes +2 -0
- README.md +23 -6
- app/__init__.py +0 -0
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/__pycache__/catalog.cpython-312.pyc +0 -0
- app/__pycache__/gradio_app.cpython-312.pyc +0 -0
- app/__pycache__/intent_schema.cpython-312.pyc +0 -0
- app/__pycache__/orchestrator.cpython-312.pyc +0 -0
- app/__pycache__/policy.cpython-312.pyc +0 -0
- app/__pycache__/sim_api.cpython-312.pyc +0 -0
- app/__pycache__/tools.cpython-312.pyc +0 -0
- app/catalog.py +110 -0
- app/gradio_app.py +299 -0
- app/intent_schema.py +31 -0
- app/orchestrator.py +34 -0
- app/policy.py +45 -0
- app/sim_api.py +81 -0
- app/tools.py +18 -0
- data/menu_catalog.json +57 -0
- models/__init__.py +0 -0
- models/__pycache__/__init__.cpython-312.pyc +0 -0
- models/__pycache__/asr_whisper.cpython-312.pyc +0 -0
- models/__pycache__/llm_chat.cpython-312.pyc +0 -0
- models/__pycache__/llm_router.cpython-312.pyc +0 -0
- models/__pycache__/tts_router.cpython-312.pyc +0 -0
- models/asr_whisper.py +27 -0
- models/llm_chat.py +116 -0
- models/llm_router.py +46 -0
- models/tts_router.py +151 -0
- requirements.txt +10 -0
- runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav +3 -0
- runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav +3 -0
- utils/__init__.py +0 -0
- utils/__pycache__/__init__.cpython-312.pyc +0 -0
- utils/__pycache__/config.cpython-312.pyc +0 -0
- utils/__pycache__/phone.cpython-312.pyc +0 -0
- utils/audio.py +18 -0
- utils/config.py +41 -0
- utils/phone.py +12 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,12 +1,29 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: FutureCafe Voice Core (Private)
|
| 3 |
+
emoji: ☎️
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# FutureCafe Voice Core (Private)
|
| 14 |
+
|
| 15 |
+
This Space runs the **full** Gradio app (voice + SMS). It’s **private** and will be called by a public wrapper Space via `gradio_client`.
|
| 16 |
+
|
| 17 |
+
## Run
|
| 18 |
+
|
| 19 |
+
- Uses **Piper** TTS model at `models/piper/en_US-amy-medium.onnx`
|
| 20 |
+
- Uses **faster-whisper** (tiny) for ASR
|
| 21 |
+
|
| 22 |
+
### Environment variables (set in Space → Settings → Secrets)
|
| 23 |
+
- `BACKEND_LLM=openai` (or `groq`)
|
| 24 |
+
- If `openai`: `OPENAI_API_KEY=<your-key>`
|
| 25 |
+
- If `groq`: `GROQ_API_KEY=<your-key>`
|
| 26 |
+
- `TTS_ENGINE=piper`
|
| 27 |
+
- `PIPER_MODEL=models/piper/en_US-amy-medium.onnx`
|
| 28 |
+
|
| 29 |
+
This project writes generated audio files into `runtime/audio`.
|
app/__init__.py
ADDED
|
File without changes
|
app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (183 Bytes). View file
|
|
|
app/__pycache__/catalog.cpython-312.pyc
ADDED
|
Binary file (5.51 kB). View file
|
|
|
app/__pycache__/gradio_app.cpython-312.pyc
ADDED
|
Binary file (10.7 kB). View file
|
|
|
app/__pycache__/intent_schema.cpython-312.pyc
ADDED
|
Binary file (2.18 kB). View file
|
|
|
app/__pycache__/orchestrator.cpython-312.pyc
ADDED
|
Binary file (1.51 kB). View file
|
|
|
app/__pycache__/policy.cpython-312.pyc
ADDED
|
Binary file (2.53 kB). View file
|
|
|
app/__pycache__/sim_api.cpython-312.pyc
ADDED
|
Binary file (3.78 kB). View file
|
|
|
app/__pycache__/tools.cpython-312.pyc
ADDED
|
Binary file (1.4 kB). View file
|
|
|
app/catalog.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/catalog.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
import json, os
|
| 4 |
+
from typing import Dict, Any, List, Optional
|
| 5 |
+
|
| 6 |
+
_CATALOG: Dict[str, Any] | None = None
|
| 7 |
+
|
| 8 |
+
def get_catalog_path() -> str:
|
| 9 |
+
here = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
root = os.path.dirname(here)
|
| 11 |
+
return os.path.join(root, "data", "menu_catalog.json")
|
| 12 |
+
|
| 13 |
+
def load_catalog() -> Dict[str, Any]:
|
| 14 |
+
global _CATALOG
|
| 15 |
+
if _CATALOG is not None:
|
| 16 |
+
return _CATALOG
|
| 17 |
+
path = get_catalog_path()
|
| 18 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 19 |
+
_CATALOG = json.load(f)
|
| 20 |
+
return _CATALOG
|
| 21 |
+
|
| 22 |
+
def find_item_by_name(name: str) -> Optional[Dict[str, Any]]:
|
| 23 |
+
c = load_catalog()
|
| 24 |
+
name_l = (name or "").strip().lower()
|
| 25 |
+
for it in c["items"]:
|
| 26 |
+
if it["name"].lower() == name_l:
|
| 27 |
+
return it
|
| 28 |
+
# lightweight alias match
|
| 29 |
+
if name_l in it["name"].lower():
|
| 30 |
+
return it
|
| 31 |
+
return None
|
| 32 |
+
|
| 33 |
+
def find_item_by_sku(sku: str) -> Optional[Dict[str, Any]]:
|
| 34 |
+
c = load_catalog()
|
| 35 |
+
for it in c["items"]:
|
| 36 |
+
if it["sku"] == sku:
|
| 37 |
+
return it
|
| 38 |
+
return None
|
| 39 |
+
|
| 40 |
+
def required_fields_for_category(category: str) -> List[str]:
|
| 41 |
+
c = load_catalog()
|
| 42 |
+
schema = c["schema"].get(category) or {}
|
| 43 |
+
return list(schema.get("required_fields") or [])
|
| 44 |
+
|
| 45 |
+
def optional_fields_for_category(category: str) -> List[str]:
|
| 46 |
+
c = load_catalog()
|
| 47 |
+
schema = c["schema"].get(category) or {}
|
| 48 |
+
return list(schema.get("optional_fields") or [])
|
| 49 |
+
|
| 50 |
+
def compute_missing_fields(order_item: Dict[str, Any]) -> List[str]:
|
| 51 |
+
"""
|
| 52 |
+
order_item: {"name": "...", "sku": optional, "qty": int, "<opts>": ...}
|
| 53 |
+
Uses catalog schema to see which fields are missing.
|
| 54 |
+
"""
|
| 55 |
+
it = None
|
| 56 |
+
if "sku" in order_item:
|
| 57 |
+
it = find_item_by_sku(order_item["sku"])
|
| 58 |
+
if not it and "name" in order_item:
|
| 59 |
+
it = find_item_by_name(order_item["name"])
|
| 60 |
+
if not it:
|
| 61 |
+
return ["name"] # we don’t even know the item yet
|
| 62 |
+
|
| 63 |
+
category = it["category"]
|
| 64 |
+
req = set(required_fields_for_category(category))
|
| 65 |
+
present = set([k for k in order_item.keys() if k in req or k == "qty" or k == "name" or k == "sku"])
|
| 66 |
+
|
| 67 |
+
# qty normalization: consider qty present if >=1
|
| 68 |
+
if "qty" in req and (order_item.get("qty") is None or int(order_item.get("qty", 0)) < 1):
|
| 69 |
+
# keep qty “missing”
|
| 70 |
+
pass
|
| 71 |
+
else:
|
| 72 |
+
present.add("qty")
|
| 73 |
+
|
| 74 |
+
missing = [f for f in req if f not in present]
|
| 75 |
+
return missing
|
| 76 |
+
|
| 77 |
+
def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
|
| 78 |
+
it = None
|
| 79 |
+
if "sku" in order_item:
|
| 80 |
+
it = find_item_by_sku(order_item["sku"])
|
| 81 |
+
if not it and "name" in order_item:
|
| 82 |
+
it = find_item_by_name(order_item["name"])
|
| 83 |
+
if not it:
|
| 84 |
+
return "Which item would you like to order?"
|
| 85 |
+
|
| 86 |
+
category = it["category"]
|
| 87 |
+
req = required_fields_for_category(category)
|
| 88 |
+
opt = optional_fields_for_category(category)
|
| 89 |
+
|
| 90 |
+
parts = []
|
| 91 |
+
opt_txt = ""
|
| 92 |
+
if opt:
|
| 93 |
+
opt_txt = f" Optional: {', '.join(opt)}."
|
| 94 |
+
if req:
|
| 95 |
+
parts.append(f"I need {', '.join(req)} for {it['name']}.{opt_txt}")
|
| 96 |
+
else:
|
| 97 |
+
parts.append(f"Please specify quantity for {it['name']}.{opt_txt}")
|
| 98 |
+
|
| 99 |
+
# Also list choices for required options
|
| 100 |
+
# e.g., size choices
|
| 101 |
+
opts = it.get("options") or {}
|
| 102 |
+
choice_bits = []
|
| 103 |
+
for k, spec in opts.items():
|
| 104 |
+
if spec.get("required"):
|
| 105 |
+
choices = spec.get("choices") or []
|
| 106 |
+
if choices:
|
| 107 |
+
choice_bits.append(f"{k}: {', '.join(choices)}")
|
| 108 |
+
if choice_bits:
|
| 109 |
+
parts.append("Choices → " + " | ".join(choice_bits))
|
| 110 |
+
return " ".join(parts)
|
app/gradio_app.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/gradio_app.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
import shutil
|
| 7 |
+
import uuid
|
| 8 |
+
from typing import List, Dict, Any, Tuple
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
|
| 12 |
+
# ---- External modules we rely on (light, stable) ----
|
| 13 |
+
# - ASR: faster-whisper wrapper you already have
|
| 14 |
+
# - TTS: local piper/ say via models/tts_router.py
|
| 15 |
+
# - LLM: optional local model; if missing, we fallback to a safe canned reply
|
| 16 |
+
try:
|
| 17 |
+
from models.asr_whisper import get_asr
|
| 18 |
+
except Exception:
|
| 19 |
+
get_asr = None
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from models.llm_chat import respond_chat as llm_respond_chat
|
| 23 |
+
except Exception:
|
| 24 |
+
llm_respond_chat = None
|
| 25 |
+
|
| 26 |
+
from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# =============================================================================
|
| 30 |
+
# Helpers (pure, modular)
|
| 31 |
+
# =============================================================================
|
| 32 |
+
|
| 33 |
+
def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
|
| 34 |
+
"""
|
| 35 |
+
Ask the chat LLM for a response. If it's not available, use a reasonable fallback.
|
| 36 |
+
"""
|
| 37 |
+
if llm_respond_chat is not None:
|
| 38 |
+
try:
|
| 39 |
+
# policy guard is optional; pass an empty dict
|
| 40 |
+
bot_text, _guard, _diag = llm_respond_chat(history or [], user_text, {})
|
| 41 |
+
if isinstance(bot_text, str) and bot_text.strip():
|
| 42 |
+
return bot_text.strip()
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print("[LLM] fallback due to error:", e)
|
| 45 |
+
# Fallback (LLM unavailable or failed)
|
| 46 |
+
return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _asr_transcribe(aud_path: str) -> str:
|
| 50 |
+
"""
|
| 51 |
+
Transcribe audio to text. If ASR is unavailable, return a safe message.
|
| 52 |
+
"""
|
| 53 |
+
if not aud_path:
|
| 54 |
+
return "(no audio)"
|
| 55 |
+
if get_asr is None:
|
| 56 |
+
return "(ASR unavailable)"
|
| 57 |
+
try:
|
| 58 |
+
asr = get_asr()
|
| 59 |
+
out = asr.transcribe(aud_path)
|
| 60 |
+
return (out.get("text") or "").strip() or "(no speech detected)"
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print("[ASR] error:", e)
|
| 63 |
+
return "(transcription failed)"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _tts_from_text(text: str) -> str | None:
|
| 67 |
+
"""
|
| 68 |
+
Synthesize assistant text to a WAV in runtime/audio.
|
| 69 |
+
Returns a file path or None.
|
| 70 |
+
"""
|
| 71 |
+
if not (text and text.strip()):
|
| 72 |
+
return None
|
| 73 |
+
path = tts_synthesize(text.strip())
|
| 74 |
+
if path and os.path.exists(path):
|
| 75 |
+
return path
|
| 76 |
+
# always attempt one more minimal fallback to avoid empty path
|
| 77 |
+
return tts_synthesize("How can I help with FutureCafe?")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _append_chat(history: List[Dict[str, str]] | None,
|
| 81 |
+
role: str, content: str) -> List[Dict[str, str]]:
|
| 82 |
+
hist = list(history or [])
|
| 83 |
+
hist.append({"role": role, "content": content})
|
| 84 |
+
return hist
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _startup_clean_runtime_audio():
|
| 88 |
+
"""
|
| 89 |
+
On app start, clean previous session audio artifacts.
|
| 90 |
+
"""
|
| 91 |
+
audio_dir = ensure_runtime_audio_dir()
|
| 92 |
+
try:
|
| 93 |
+
for name in os.listdir(audio_dir):
|
| 94 |
+
p = os.path.join(audio_dir, name)
|
| 95 |
+
if os.path.isfile(p):
|
| 96 |
+
os.remove(p)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print("[RUNTIME] Cannot clean runtime/audio:", e)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# =============================================================================
|
| 102 |
+
# Voice handlers (modular)
|
| 103 |
+
# =============================================================================
|
| 104 |
+
|
| 105 |
+
def handle_voice_turn(
|
| 106 |
+
user_audio_path: str,
|
| 107 |
+
voice_history: List[Dict[str, str]] | None
|
| 108 |
+
) -> Tuple[List[Dict[str, str]], str | None, Dict[str, Any]]:
|
| 109 |
+
"""
|
| 110 |
+
Single voice turn:
|
| 111 |
+
1) Transcribe user audio
|
| 112 |
+
2) Ask LLM for a reply (text)
|
| 113 |
+
3) TTS the reply to a WAV
|
| 114 |
+
4) Append both transcript and assistant text to the voice chat history
|
| 115 |
+
|
| 116 |
+
Returns: (new_voice_history, assistant_audio_path, diag_json)
|
| 117 |
+
"""
|
| 118 |
+
t0 = time.time()
|
| 119 |
+
|
| 120 |
+
transcript = _asr_transcribe(user_audio_path)
|
| 121 |
+
hist1 = _append_chat(voice_history, "user", transcript)
|
| 122 |
+
|
| 123 |
+
bot_text = _safe_llm_reply(hist1, transcript)
|
| 124 |
+
hist2 = _append_chat(hist1, "assistant", bot_text)
|
| 125 |
+
|
| 126 |
+
tts_path = _tts_from_text(bot_text)
|
| 127 |
+
|
| 128 |
+
diag = {
|
| 129 |
+
"intent": None,
|
| 130 |
+
"slots": {},
|
| 131 |
+
"tool_selected": None,
|
| 132 |
+
"tool_result": {
|
| 133 |
+
"transcript": transcript,
|
| 134 |
+
"llm_response": bot_text
|
| 135 |
+
},
|
| 136 |
+
"latency_ms": int((time.time() - t0) * 1000),
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
return hist2, tts_path, diag
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# =============================================================================
|
| 143 |
+
# Text handlers (modular)
|
| 144 |
+
# =============================================================================
|
| 145 |
+
|
| 146 |
+
def handle_text_turn(
|
| 147 |
+
user_text: str,
|
| 148 |
+
chat_history: List[Dict[str, str]] | None
|
| 149 |
+
) -> Tuple[List[Dict[str, str]], Dict[str, Any], str]:
|
| 150 |
+
"""
|
| 151 |
+
Single text turn:
|
| 152 |
+
1) Append user text
|
| 153 |
+
2) Ask LLM for a reply
|
| 154 |
+
3) Append assistant text
|
| 155 |
+
4) Prepare diagnostics
|
| 156 |
+
Returns: (new_chat_history, diag_json, clear_text_value)
|
| 157 |
+
"""
|
| 158 |
+
t0 = time.time()
|
| 159 |
+
user_text = (user_text or "").strip()
|
| 160 |
+
if not user_text:
|
| 161 |
+
return (chat_history or []), {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}, ""
|
| 162 |
+
|
| 163 |
+
hist1 = _append_chat(chat_history, "user", user_text)
|
| 164 |
+
bot_text = _safe_llm_reply(hist1, user_text)
|
| 165 |
+
hist2 = _append_chat(hist1, "assistant", bot_text)
|
| 166 |
+
|
| 167 |
+
diag = {
|
| 168 |
+
"intent": None,
|
| 169 |
+
"slots": {},
|
| 170 |
+
"tool_selected": None,
|
| 171 |
+
"tool_result": {"user": user_text, "llm_response": bot_text},
|
| 172 |
+
"latency_ms": int((time.time() - t0) * 1000),
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
return hist2, diag, ""
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# =============================================================================
|
| 179 |
+
# Fixed UI (as requested) + wiring
|
| 180 |
+
# =============================================================================
|
| 181 |
+
|
| 182 |
+
def build_demo():
|
| 183 |
+
"""
|
| 184 |
+
Fixed UI layout:
|
| 185 |
+
LEFT (Voice Call):
|
| 186 |
+
- voice_in (mic recorder)
|
| 187 |
+
- assistant_audio (autoplay)
|
| 188 |
+
- voice_chat (transcript chat)
|
| 189 |
+
- call_diag (JSON)
|
| 190 |
+
RIGHT (SMS/Chat):
|
| 191 |
+
- chat_box
|
| 192 |
+
- text_in (enter to send)
|
| 193 |
+
- chat_diag (JSON)
|
| 194 |
+
"""
|
| 195 |
+
_startup_clean_runtime_audio()
|
| 196 |
+
|
| 197 |
+
with gr.Blocks(title="FutureCafe Call/SMS Agent (MVP)") as demo:
|
| 198 |
+
gr.Markdown("### ☎️ FutureCafe AI Agent (MVP)\n**Call (voice)** on the left · **SMS/Chat** on the right")
|
| 199 |
+
|
| 200 |
+
# States
|
| 201 |
+
voice_state = gr.State([]) # list of {"role","content"} for voice transcript chat
|
| 202 |
+
chat_state = gr.State([]) # list of {"role","content"} for SMS chat
|
| 203 |
+
|
| 204 |
+
with gr.Row():
|
| 205 |
+
# ---------------- LEFT: VOICE ----------------
|
| 206 |
+
with gr.Column(scale=1, min_width=430):
|
| 207 |
+
gr.Markdown("#### 📞 Voice Call")
|
| 208 |
+
voice_in = gr.Audio(
|
| 209 |
+
label="Press Record → Speak → Stop (auto-sends)",
|
| 210 |
+
sources=["microphone"],
|
| 211 |
+
type="filepath",
|
| 212 |
+
format="wav",
|
| 213 |
+
interactive=True,
|
| 214 |
+
editable=False,
|
| 215 |
+
waveform_options={"show_recording_waveform": True},
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
assistant_audio = gr.Audio(
|
| 219 |
+
label="Assistant Response (auto-play)",
|
| 220 |
+
autoplay=True,
|
| 221 |
+
type="filepath",
|
| 222 |
+
interactive=False
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
voice_chat = gr.Chatbot(value=[], type="messages", height=220, label="Voice Chat (transcripts)")
|
| 226 |
+
|
| 227 |
+
call_diag = gr.JSON(
|
| 228 |
+
value={"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0},
|
| 229 |
+
label="Voice Diagnostics"
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# ---------------- RIGHT: SMS / CHAT ----------------
|
| 233 |
+
with gr.Column(scale=1, min_width=430):
|
| 234 |
+
gr.Markdown("#### 💬 SMS / Chat")
|
| 235 |
+
chat_box = gr.Chatbot(value=[], type="messages", height=360, label=None)
|
| 236 |
+
text_in = gr.Textbox(
|
| 237 |
+
placeholder="Type here… e.g., “Any vegan pizzas?”, “Book a table for 2 at 7.” (Enter to send)",
|
| 238 |
+
label=None, lines=1
|
| 239 |
+
)
|
| 240 |
+
chat_diag = gr.JSON(
|
| 241 |
+
value={"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0},
|
| 242 |
+
label="Chat Diagnostics"
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# ---------- Handlers (thin wrappers that call modular functions) ----------
|
| 246 |
+
def _clear_recorder():
|
| 247 |
+
# Only clears the recorder input; leaves assistant audio + transcripts intact
|
| 248 |
+
return gr.update(value=None, interactive=True)
|
| 249 |
+
|
| 250 |
+
def on_voice_change(aud_path: str | None, vhist: List[Dict[str, str]]):
|
| 251 |
+
if not aud_path:
|
| 252 |
+
# no audio; keep everything as-is
|
| 253 |
+
return vhist or [], None, {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}
|
| 254 |
+
|
| 255 |
+
new_vhist, tts_path, diag = handle_voice_turn(aud_path, vhist or [])
|
| 256 |
+
return new_vhist, tts_path, diag
|
| 257 |
+
|
| 258 |
+
def on_text_send(txt: str, hist: List[Dict[str, str]]):
|
| 259 |
+
new_hist, diag, clear_text = handle_text_turn(txt, hist or [])
|
| 260 |
+
return new_hist, diag, clear_text
|
| 261 |
+
|
| 262 |
+
# ---------- Wiring ----------
|
| 263 |
+
# Voice lane: update (voice_chat, assistant_audio, call_diag), do NOT clear recorder to keep it stable for now
|
| 264 |
+
# Try to fire on explicit Stop; fall back to generic change if not supported
|
| 265 |
+
rec_event = getattr(voice_in, "stop_recording", None)
|
| 266 |
+
if callable(rec_event):
|
| 267 |
+
rec_event(
|
| 268 |
+
on_voice_change,
|
| 269 |
+
inputs=[voice_in, voice_state],
|
| 270 |
+
outputs=[voice_chat, assistant_audio, call_diag],
|
| 271 |
+
).then(
|
| 272 |
+
_clear_recorder, # runs AFTER outputs are set → autoplay isn’t interrupted
|
| 273 |
+
inputs=None,
|
| 274 |
+
outputs=[voice_in],
|
| 275 |
+
)
|
| 276 |
+
else:
|
| 277 |
+
voice_in.change(
|
| 278 |
+
on_voice_change,
|
| 279 |
+
inputs=[voice_in, voice_state],
|
| 280 |
+
outputs=[voice_chat, assistant_audio, call_diag],
|
| 281 |
+
).then(
|
| 282 |
+
_clear_recorder,
|
| 283 |
+
inputs=None,
|
| 284 |
+
outputs=[voice_in],
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
# Keep voice_state in sync with what's shown in voice_chat (unchanged)
|
| 288 |
+
voice_chat.change(lambda x: x, inputs=[voice_chat], outputs=[voice_state])
|
| 289 |
+
|
| 290 |
+
# Text lane: Enter to send
|
| 291 |
+
text_in.submit(
|
| 292 |
+
on_text_send,
|
| 293 |
+
inputs=[text_in, chat_state],
|
| 294 |
+
outputs=[chat_box, chat_diag, text_in],
|
| 295 |
+
)
|
| 296 |
+
# Keep chat_state in sync with what's shown in chat_box
|
| 297 |
+
chat_box.change(lambda x: x, inputs=[chat_box], outputs=[chat_state])
|
| 298 |
+
|
| 299 |
+
return demo
|
app/intent_schema.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/intent_schema.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from typing import List, Optional, Literal
|
| 4 |
+
from pydantic import BaseModel, Field
|
| 5 |
+
|
| 6 |
+
IntentName = Literal["reservation.create", "order.create", "hours.get", "menu.search", "smalltalk", "other"]
|
| 7 |
+
|
| 8 |
+
class ReservationSlots(BaseModel):
|
| 9 |
+
name: Optional[str] = None
|
| 10 |
+
party_size: Optional[int] = Field(default=None, ge=1, le=20)
|
| 11 |
+
date: Optional[str] = None # ISO preferred (YYYY-MM-DD) or “today”
|
| 12 |
+
time: Optional[str] = None # “19:00” or “7 pm”
|
| 13 |
+
phone: Optional[str] = None
|
| 14 |
+
|
| 15 |
+
class OrderItem(BaseModel):
|
| 16 |
+
name: str
|
| 17 |
+
qty: int = Field(default=1, ge=1)
|
| 18 |
+
|
| 19 |
+
class OrderSlots(BaseModel):
|
| 20 |
+
items: List[OrderItem] = Field(default_factory=list)
|
| 21 |
+
notes: Optional[str] = None
|
| 22 |
+
|
| 23 |
+
class MenuSlots(BaseModel):
|
| 24 |
+
query: Optional[str] = None
|
| 25 |
+
dietary: List[str] = Field(default_factory=list) # e.g., ["vegan","gluten-free"]
|
| 26 |
+
|
| 27 |
+
class IntentEnvelope(BaseModel):
|
| 28 |
+
intent: IntentName
|
| 29 |
+
need_more_info: bool = False
|
| 30 |
+
ask_user: Optional[str] = None # a single, polite follow-up question if info missing
|
| 31 |
+
slots: dict = Field(default_factory=dict) # raw dict; we’ll validate by intent
|
app/orchestrator.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any
|
| 2 |
+
from models.llm_router import respond as route_fn, nlg
|
| 3 |
+
from app.tools import dispatch_tool
|
| 4 |
+
from utils.phone import extract_phone, looks_valid
|
| 5 |
+
|
| 6 |
+
def llm_route_and_execute(user_text: str) -> Dict[str, Any]:
|
| 7 |
+
route = route_fn(user_text) # {"tool": "get_hours"|..., "args": {...}}
|
| 8 |
+
tool = route.get("tool")
|
| 9 |
+
args = route.get("args") or {}
|
| 10 |
+
|
| 11 |
+
# enrich reservation with phone if present in the text
|
| 12 |
+
if tool == "create_reservation":
|
| 13 |
+
phone = extract_phone(user_text)
|
| 14 |
+
if looks_valid(phone):
|
| 15 |
+
args["phone"] = phone
|
| 16 |
+
if not args.get("name"):
|
| 17 |
+
# naive default name if user included "my name is ..."
|
| 18 |
+
import re
|
| 19 |
+
m = re.search(r"(?:my name is|i am|i'm)\s+([A-Z][a-z]+)", user_text, re.I)
|
| 20 |
+
if m: args["name"] = m.group(1)
|
| 21 |
+
|
| 22 |
+
tool_result = None
|
| 23 |
+
if tool:
|
| 24 |
+
tool_result = dispatch_tool(tool, args)
|
| 25 |
+
|
| 26 |
+
reply = nlg(tool or "", tool_result or {}, user_text)
|
| 27 |
+
|
| 28 |
+
return {
|
| 29 |
+
"intent": tool or ("smalltalk" if not tool else tool),
|
| 30 |
+
"slots": args,
|
| 31 |
+
"tool_selected": tool,
|
| 32 |
+
"tool_result": tool_result,
|
| 33 |
+
"response": reply,
|
| 34 |
+
}
|
app/policy.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/policy.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
import os, re
|
| 4 |
+
|
| 5 |
+
# --- Topic detection (very lightweight, fast) ---
|
| 6 |
+
CAFE_KEYWORDS = [
|
| 7 |
+
"menu","order","item","dish","pizza","burger","salad","pasta","vegan","gluten",
|
| 8 |
+
"price","special","deal","offer","hours","open","close","time","location","address",
|
| 9 |
+
"book","reserve","reservation","table","party","pickup","delivery","takeout","payment",
|
| 10 |
+
"futurecafe","future cafe","future-cafe","café","coffee","drinks","beverage","side"
|
| 11 |
+
]
|
| 12 |
+
_kw_re = re.compile(r"|".join([re.escape(k) for k in CAFE_KEYWORDS]), re.I)
|
| 13 |
+
|
| 14 |
+
SMALLTALK = r"\b(hi|hello|hey|good\s+(morning|afternoon|evening)|thanks|thank you|bye|goodbye)\b"
|
| 15 |
+
_smalltalk_re = re.compile(SMALLTALK, re.I)
|
| 16 |
+
|
| 17 |
+
def is_cafe_topic(text: str) -> bool:
|
| 18 |
+
return bool(text and _kw_re.search(text))
|
| 19 |
+
|
| 20 |
+
def is_smalltalk(text: str) -> bool:
|
| 21 |
+
return bool(text and _smalltalk_re.search(text))
|
| 22 |
+
|
| 23 |
+
def unrelated_limit() -> int:
|
| 24 |
+
"""How many off-topic turns allowed before ending."""
|
| 25 |
+
try:
|
| 26 |
+
n = int(os.getenv("CAFE_UNRELATED_LIMIT", "3"))
|
| 27 |
+
return max(1, min(5, n))
|
| 28 |
+
except Exception:
|
| 29 |
+
return 3
|
| 30 |
+
|
| 31 |
+
# --- Messages ---
|
| 32 |
+
POLITE_REFUSAL = (
|
| 33 |
+
"I’m here to help with FutureCafe—menu, hours, reservations, and orders. "
|
| 34 |
+
"Could you ask something about the restaurant?"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
POLITE_REFUSAL_2 = (
|
| 38 |
+
"To keep things focused, I can only help with FutureCafe. "
|
| 39 |
+
"Ask me about our menu, hours, or booking a table."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
def end_message() -> str:
|
| 43 |
+
return ("I’m only able to help with FutureCafe topics. "
|
| 44 |
+
"Let’s end this chat for now. If you need menu, hours, or reservations, "
|
| 45 |
+
"message me again anytime.")
|
app/sim_api.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/sim_api.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from typing import Dict, Any, List, Tuple
|
| 4 |
+
from app.catalog import load_catalog, find_item_by_name, find_item_by_sku
|
| 5 |
+
|
| 6 |
+
def _pick_item(order_it: Dict[str, Any]) -> Dict[str, Any] | None:
|
| 7 |
+
it = None
|
| 8 |
+
if "sku" in order_it:
|
| 9 |
+
it = find_item_by_sku(order_it["sku"])
|
| 10 |
+
if not it and "name" in order_it:
|
| 11 |
+
it = find_item_by_name(order_it["name"])
|
| 12 |
+
return it
|
| 13 |
+
|
| 14 |
+
def check_item_availability(order_it: Dict[str, Any]) -> Tuple[bool, Dict[str, Any]]:
|
| 15 |
+
"""
|
| 16 |
+
Returns (is_available, info)
|
| 17 |
+
info contains { "reason": "...", "alternatives": [...] } when not available
|
| 18 |
+
For size-based items, verify stock for requested size.
|
| 19 |
+
"""
|
| 20 |
+
it = _pick_item(order_it)
|
| 21 |
+
if not it:
|
| 22 |
+
return False, {"reason": "unknown_item", "alternatives": []}
|
| 23 |
+
|
| 24 |
+
qty = int(order_it.get("qty", 0) or 0)
|
| 25 |
+
if qty < 1:
|
| 26 |
+
return False, {"reason": "qty_missing", "alternatives": []}
|
| 27 |
+
|
| 28 |
+
# size key heuristics
|
| 29 |
+
size = order_it.get("size")
|
| 30 |
+
stock_map = it.get("stock") or {}
|
| 31 |
+
|
| 32 |
+
if "one_size" in stock_map:
|
| 33 |
+
avail = stock_map["one_size"]
|
| 34 |
+
if avail >= qty:
|
| 35 |
+
return True, {"price_each": (it.get("price") or {}).get("one_size", 0.0)}
|
| 36 |
+
else:
|
| 37 |
+
return False, {"reason": "insufficient_stock", "have": avail, "alternatives": []}
|
| 38 |
+
|
| 39 |
+
if size:
|
| 40 |
+
have = int(stock_map.get(size, 0))
|
| 41 |
+
if have >= qty:
|
| 42 |
+
return True, {"price_each": (it.get("price") or {}).get(size, 0.0)}
|
| 43 |
+
else:
|
| 44 |
+
# propose other sizes with stock
|
| 45 |
+
alts = []
|
| 46 |
+
for s, have_s in stock_map.items():
|
| 47 |
+
if have_s >= qty:
|
| 48 |
+
alts.append({"size": s, "have": have_s, "price_each": (it.get("price") or {}).get(s, 0.0)})
|
| 49 |
+
return False, {"reason": "size_out_of_stock", "have": have, "alternatives": alts}
|
| 50 |
+
else:
|
| 51 |
+
# missing required option — let schema enforcement ask; but if user skipped, treat as not available
|
| 52 |
+
return False, {"reason": "size_missing", "alternatives": [{"hint": "provide size"}]}
|
| 53 |
+
|
| 54 |
+
def place_order(order_items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 55 |
+
"""
|
| 56 |
+
Verifies each item and (if all available) returns summary.
|
| 57 |
+
We do not mutate stock here (sim).
|
| 58 |
+
"""
|
| 59 |
+
ok = True
|
| 60 |
+
lines = []
|
| 61 |
+
total = 0.0
|
| 62 |
+
for it in order_items:
|
| 63 |
+
item_def = _pick_item(it)
|
| 64 |
+
if not item_def:
|
| 65 |
+
return {"ok": False, "reason": "unknown_item", "item": it}
|
| 66 |
+
avail, info = check_item_availability(it)
|
| 67 |
+
if not avail:
|
| 68 |
+
return {"ok": False, "reason": info.get("reason"), "item": it, "alternatives": info.get("alternatives", [])}
|
| 69 |
+
qty = int(it["qty"])
|
| 70 |
+
unit = info.get("price_each", 0.0)
|
| 71 |
+
line_total = unit * qty
|
| 72 |
+
total += line_total
|
| 73 |
+
lines.append({
|
| 74 |
+
"sku": item_def["sku"],
|
| 75 |
+
"name": item_def["name"],
|
| 76 |
+
"qty": qty,
|
| 77 |
+
"options": {k: v for k, v in it.items() if k not in ("name","sku","qty")},
|
| 78 |
+
"unit": unit,
|
| 79 |
+
"line_total": line_total
|
| 80 |
+
})
|
| 81 |
+
return {"ok": True, "total": round(total, 2), "lines": lines}
|
app/tools.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Dict
|
| 2 |
+
from mock_api import service as svc
|
| 3 |
+
|
| 4 |
+
def dispatch_tool(tool: str, args: Dict[str, Any]) -> Dict[str, Any]:
|
| 5 |
+
if tool == "get_hours":
|
| 6 |
+
return svc.get_hours()
|
| 7 |
+
if tool == "menu_lookup":
|
| 8 |
+
return {"items": svc.menu_lookup(args.get("filters") or [])}
|
| 9 |
+
if tool == "create_reservation":
|
| 10 |
+
return svc.create_reservation(
|
| 11 |
+
name=args.get("name") or "Guest",
|
| 12 |
+
phone=args.get("phone"),
|
| 13 |
+
party_size=int(args.get("party_size") or 2),
|
| 14 |
+
datetime_str=args.get("datetime_str") or "",
|
| 15 |
+
)
|
| 16 |
+
if tool == "create_order":
|
| 17 |
+
return svc.create_order(args.get("items") or [])
|
| 18 |
+
raise ValueError(f"unknown tool: {tool}")
|
data/menu_catalog.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"items": [
|
| 3 |
+
{
|
| 4 |
+
"sku": "pizza.margherita",
|
| 5 |
+
"name": "Margherita Pizza",
|
| 6 |
+
"category": "pizza",
|
| 7 |
+
"options": {
|
| 8 |
+
"size": { "required": true, "choices": ["small", "medium", "large"] },
|
| 9 |
+
"crust": { "required": false, "choices": ["thin", "regular"] },
|
| 10 |
+
"toppings": { "required": false, "choices": ["extra cheese", "basil", "olives"], "multi": true }
|
| 11 |
+
},
|
| 12 |
+
"price": { "small": 9.0, "medium": 12.0, "large": 14.0 },
|
| 13 |
+
"stock": { "small": 10, "medium": 6, "large": 0 },
|
| 14 |
+
"tags": ["vegetarian"]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"sku": "pizza.pepperoni",
|
| 18 |
+
"name": "Pepperoni Pizza",
|
| 19 |
+
"category": "pizza",
|
| 20 |
+
"options": {
|
| 21 |
+
"size": { "required": true, "choices": ["small", "medium", "large"] },
|
| 22 |
+
"crust": { "required": false, "choices": ["thin", "regular"] },
|
| 23 |
+
"toppings": { "required": false, "choices": ["extra cheese", "jalapeno"], "multi": true }
|
| 24 |
+
},
|
| 25 |
+
"price": { "small": 10.0, "medium": 13.5, "large": 15.5 },
|
| 26 |
+
"stock": { "small": 3, "medium": 0, "large": 2 },
|
| 27 |
+
"tags": []
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"sku": "salad.house",
|
| 31 |
+
"name": "House Salad",
|
| 32 |
+
"category": "salad",
|
| 33 |
+
"options": {
|
| 34 |
+
"dressing": { "required": false, "choices": ["vinaigrette", "ranch", "no dressing"] }
|
| 35 |
+
},
|
| 36 |
+
"price": { "one_size": 7.5 },
|
| 37 |
+
"stock": { "one_size": 15 },
|
| 38 |
+
"tags": ["vegetarian", "vegan"]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"sku": "drink.cola",
|
| 42 |
+
"name": "Cola",
|
| 43 |
+
"category": "drink",
|
| 44 |
+
"options": {
|
| 45 |
+
"size": { "required": true, "choices": ["can", "bottle"] }
|
| 46 |
+
},
|
| 47 |
+
"price": { "can": 2.0, "bottle": 3.5 },
|
| 48 |
+
"stock": { "can": 20, "bottle": 4 },
|
| 49 |
+
"tags": []
|
| 50 |
+
}
|
| 51 |
+
],
|
| 52 |
+
"schema": {
|
| 53 |
+
"pizza": { "required_fields": ["size", "qty"], "optional_fields": ["crust", "toppings"] },
|
| 54 |
+
"salad": { "required_fields": ["qty"], "optional_fields": ["dressing"] },
|
| 55 |
+
"drink": { "required_fields": ["size", "qty"], "optional_fields": [] }
|
| 56 |
+
}
|
| 57 |
+
}
|
models/__init__.py
ADDED
|
File without changes
|
models/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (186 Bytes). View file
|
|
|
models/__pycache__/asr_whisper.cpython-312.pyc
ADDED
|
Binary file (1.99 kB). View file
|
|
|
models/__pycache__/llm_chat.cpython-312.pyc
ADDED
|
Binary file (4.85 kB). View file
|
|
|
models/__pycache__/llm_router.cpython-312.pyc
ADDED
|
Binary file (4.08 kB). View file
|
|
|
models/__pycache__/tts_router.cpython-312.pyc
ADDED
|
Binary file (6.28 kB). View file
|
|
|
models/asr_whisper.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/asr_whisper.py
|
| 2 |
+
from faster_whisper import WhisperModel
|
| 3 |
+
from utils.config import get_settings
|
| 4 |
+
|
| 5 |
+
_asr_singleton = None
|
| 6 |
+
|
| 7 |
+
class WhisperASR:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
s = get_settings()
|
| 10 |
+
# faster-whisper supports: 'cpu' or 'cuda' (no 'mps')
|
| 11 |
+
requested = (s.ASR_DEVICE or "cpu").lower()
|
| 12 |
+
device = "cpu" if requested not in ("cpu", "cuda") else requested
|
| 13 |
+
if requested == "mps":
|
| 14 |
+
print("[ASR] 'mps' not supported by faster-whisper; falling back to CPU.")
|
| 15 |
+
compute_type = "int8" if device == "cpu" else "float16"
|
| 16 |
+
self.model = WhisperModel("tiny", device=device, compute_type=compute_type)
|
| 17 |
+
|
| 18 |
+
def transcribe(self, path: str) -> dict:
|
| 19 |
+
segments, info = self.model.transcribe(path, beam_size=1, language="en")
|
| 20 |
+
text = " ".join(seg.text.strip() for seg in segments)
|
| 21 |
+
return {"text": text, "language": info.language, "segments": []}
|
| 22 |
+
|
| 23 |
+
def get_asr():
|
| 24 |
+
global _asr_singleton
|
| 25 |
+
if _asr_singleton is None:
|
| 26 |
+
_asr_singleton = WhisperASR()
|
| 27 |
+
return _asr_singleton
|
models/llm_chat.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/llm_chat.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from typing import List, Dict, Any, Tuple
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
from utils.config import get_settings
|
| 7 |
+
|
| 8 |
+
# --- Small, readable menu JSON kept in the system prompt for now ---
|
| 9 |
+
MENU_JSON = """
|
| 10 |
+
{
|
| 11 |
+
"pizzas": [
|
| 12 |
+
{"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}},
|
| 13 |
+
{"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}}
|
| 14 |
+
],
|
| 15 |
+
"salads": [
|
| 16 |
+
{"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}}
|
| 17 |
+
],
|
| 18 |
+
"drinks": [
|
| 19 |
+
{"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}}
|
| 20 |
+
],
|
| 21 |
+
"hours": "11:00–22:00 daily",
|
| 22 |
+
"address": "123 Main St",
|
| 23 |
+
"phone": "+1 (555) 010-0000"
|
| 24 |
+
}
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.
|
| 28 |
+
You talk naturally and help with:
|
| 29 |
+
- Menu questions, placing orders, hours/location, and reservations (lightweight).
|
| 30 |
+
- If the user asks for pizza/order: list choices from the MENU and ask for missing details (size, quantity, etc.).
|
| 31 |
+
- If user provides all details, confirm the order in words (no need to return JSON), include a brief total using MENU prices.
|
| 32 |
+
- For hours/location, reply from MENU.
|
| 33 |
+
- For unrelated topics, gently steer back to FutureCafe; if the user remains off-topic for 3 turns total, politely end.
|
| 34 |
+
- Keep replies concise and friendly. No long explanations.
|
| 35 |
+
|
| 36 |
+
MENU (JSON you can read from for options & prices):
|
| 37 |
+
{MENU_JSON}
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
# ---------------- llama.cpp singleton ----------------
|
| 41 |
+
_llm = None
|
| 42 |
+
|
| 43 |
+
def _get_local_llm():
|
| 44 |
+
"""Singleton llama.cpp model loader (GGUF)."""
|
| 45 |
+
global _llm
|
| 46 |
+
if _llm is not None:
|
| 47 |
+
return _llm
|
| 48 |
+
from llama_cpp import Llama
|
| 49 |
+
s = get_settings()
|
| 50 |
+
model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None))
|
| 51 |
+
if not model_path or not os.path.exists(model_path):
|
| 52 |
+
raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
|
| 53 |
+
_llm = Llama(
|
| 54 |
+
model_path=model_path,
|
| 55 |
+
n_ctx=2048,
|
| 56 |
+
n_threads=os.cpu_count() or 4,
|
| 57 |
+
n_gpu_layers=0, # CPU by default
|
| 58 |
+
verbose=False,
|
| 59 |
+
)
|
| 60 |
+
return _llm
|
| 61 |
+
|
| 62 |
+
def _apply_chat_template(messages: List[Dict[str, str]]) -> str:
|
| 63 |
+
parts = []
|
| 64 |
+
for m in messages:
|
| 65 |
+
role = m.get("role", "user")
|
| 66 |
+
content = m.get("content", "")
|
| 67 |
+
if role == "system":
|
| 68 |
+
parts.append(f"<|system|>\n{content}\n")
|
| 69 |
+
elif role == "user":
|
| 70 |
+
parts.append(f"<|user|>\n{content}\n")
|
| 71 |
+
else:
|
| 72 |
+
parts.append(f"<|assistant|>\n{content}\n")
|
| 73 |
+
parts.append("<|assistant|>\n")
|
| 74 |
+
return "\n".join(parts)
|
| 75 |
+
|
| 76 |
+
def _generate(messages: List[Dict[str, str]], temperature=0.3, max_tokens=320) -> str:
|
| 77 |
+
llm = _get_local_llm()
|
| 78 |
+
prompt = _apply_chat_template(messages)
|
| 79 |
+
out = llm(
|
| 80 |
+
prompt,
|
| 81 |
+
max_tokens=max_tokens,
|
| 82 |
+
temperature=temperature,
|
| 83 |
+
top_p=0.9,
|
| 84 |
+
repeat_penalty=1.1,
|
| 85 |
+
stop=["<|user|>", "<|system|>", "<|assistant|>"],
|
| 86 |
+
)
|
| 87 |
+
return (out["choices"][0]["text"] or "").strip()
|
| 88 |
+
|
| 89 |
+
def respond_chat(
|
| 90 |
+
history: List[Dict[str, str]],
|
| 91 |
+
user_text: str,
|
| 92 |
+
guard_state: Dict[str, Any] | None,
|
| 93 |
+
) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
|
| 94 |
+
"""
|
| 95 |
+
LLM-only conversational brain.
|
| 96 |
+
Returns: (assistant_text, new_guard_state, diag)
|
| 97 |
+
guard_state: {"unrelated": int, "ended": int, "limit": int}
|
| 98 |
+
"""
|
| 99 |
+
guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
|
| 100 |
+
if guard.get("ended"):
|
| 101 |
+
return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
|
| 102 |
+
|
| 103 |
+
msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 104 |
+
if history:
|
| 105 |
+
msgs.extend(history[-10:])
|
| 106 |
+
msgs.append({"role": "user", "content": user_text})
|
| 107 |
+
|
| 108 |
+
reply = _generate(msgs)
|
| 109 |
+
|
| 110 |
+
# A super-light off-topic guard without keywords: If the model signals ending, we respect it.
|
| 111 |
+
# Otherwise, keep conversation flowing; we do not hard-code keywords or intents here.
|
| 112 |
+
# (We still maintain the 'unrelated' counter if you later want to nudge based on signals.)
|
| 113 |
+
if "Let’s end" in reply or "Let's end" in reply:
|
| 114 |
+
guard["ended"] = 1
|
| 115 |
+
|
| 116 |
+
return reply, guard, {} # no tool_result/diagnostics needed for this simpler flow
|
models/llm_router.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from utils.config import get_settings
|
| 2 |
+
|
| 3 |
+
def small_router(text: str) -> dict:
|
| 4 |
+
t = (text or "").lower()
|
| 5 |
+
if any(k in t for k in ["hour", "open", "close", "address", "location"]):
|
| 6 |
+
return {"tool": "get_hours", "args": {}}
|
| 7 |
+
if any(k in t for k in ["menu", "vegan", "gluten", "pizza", "salad", "special"]):
|
| 8 |
+
flt = []
|
| 9 |
+
for k in ["vegan","gluten-free","pizza","salad"]:
|
| 10 |
+
if k in t: flt.append(k)
|
| 11 |
+
return {"tool": "menu_lookup", "args": {"filters": flt}}
|
| 12 |
+
if any(k in t for k in ["reserve","reservation","book","table"]):
|
| 13 |
+
# naive hints
|
| 14 |
+
party = 2 if ("2" in t or "two" in t) else None
|
| 15 |
+
time = "19:00" if "7" in t else None
|
| 16 |
+
return {"tool": "create_reservation", "args": {"party_size": party, "datetime_str": time}}
|
| 17 |
+
if any(k in t for k in ["order","buy"]):
|
| 18 |
+
return {"tool": "create_order", "args": {"items": []}}
|
| 19 |
+
return {"tool": None, "args": {}}
|
| 20 |
+
|
| 21 |
+
def nlg(intent: str, tool_result: dict, user_text: str) -> str:
|
| 22 |
+
if intent == "get_hours":
|
| 23 |
+
h = tool_result
|
| 24 |
+
return f"We’re open {h['open']}–{h['close']} daily at {h['address']}."
|
| 25 |
+
if intent == "menu_lookup":
|
| 26 |
+
items = (tool_result or {}).get("items") or []
|
| 27 |
+
if not items:
|
| 28 |
+
return "We have a variety of options—anything specific you’d like?"
|
| 29 |
+
tops = ", ".join(f"{it['name']} (${it['price']})" for it in items[:3])
|
| 30 |
+
return f"Popular picks: {tops}."
|
| 31 |
+
if intent == "create_reservation":
|
| 32 |
+
if tool_result.get("ok"):
|
| 33 |
+
return f"Reservation confirmed for {tool_result['party_size']} at {tool_result['when']}. Code {tool_result['reservation_id']}."
|
| 34 |
+
return "I couldn't confirm that reservation—want me to try again?"
|
| 35 |
+
if intent == "create_order":
|
| 36 |
+
if tool_result.get("ok"):
|
| 37 |
+
items = ", ".join(f"{it['qty']}× {it['name']}" for it in tool_result.get("items", []))
|
| 38 |
+
return f"Got it: {items}. Total ${tool_result.get('total', 0)}."
|
| 39 |
+
return "I couldn't place that order—want me to try again?"
|
| 40 |
+
# small talk
|
| 41 |
+
return "Hello, this is Marta, an AI agent for FutureCafe. How can I help you today?"
|
| 42 |
+
|
| 43 |
+
def respond(user_text: str) -> dict:
|
| 44 |
+
# MVP: use rule-based router; later swap to real LLM function-calling
|
| 45 |
+
route = small_router(user_text)
|
| 46 |
+
return route
|
models/tts_router.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/tts_router.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
import uuid
|
| 7 |
+
import wave
|
| 8 |
+
import shutil
|
| 9 |
+
import subprocess
|
| 10 |
+
from shutil import which
|
| 11 |
+
from typing import Optional
|
| 12 |
+
|
| 13 |
+
RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def ensure_runtime_audio_dir() -> str:
|
| 17 |
+
os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
|
| 18 |
+
return RUNTIME_AUDIO_DIR
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _have(cmd: str) -> bool:
|
| 22 |
+
return which(cmd) is not None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _is_valid_wav(path: str) -> bool:
|
| 26 |
+
try:
|
| 27 |
+
with wave.open(path, "rb") as w:
|
| 28 |
+
frames = w.getnframes()
|
| 29 |
+
rate = w.getframerate()
|
| 30 |
+
if frames <= 0 or rate <= 0:
|
| 31 |
+
return False
|
| 32 |
+
except Exception:
|
| 33 |
+
return False
|
| 34 |
+
return True
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _tts_with_piper(text: str) -> Optional[str]:
|
| 38 |
+
"""
|
| 39 |
+
Use local Piper if available.
|
| 40 |
+
Requires:
|
| 41 |
+
- env PIPER_MODEL to point to models/piper/<voice>.onnx
|
| 42 |
+
- `piper` binary in PATH (brew install piper or from releases)
|
| 43 |
+
"""
|
| 44 |
+
model = os.getenv("PIPER_MODEL")
|
| 45 |
+
if not model or not os.path.exists(model):
|
| 46 |
+
return None
|
| 47 |
+
if not _have("piper"):
|
| 48 |
+
return None
|
| 49 |
+
|
| 50 |
+
out_dir = ensure_runtime_audio_dir()
|
| 51 |
+
out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
|
| 52 |
+
|
| 53 |
+
# Avoid stray control chars that can confuse some engines
|
| 54 |
+
safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
|
| 55 |
+
try:
|
| 56 |
+
# Simple one-shot pipe
|
| 57 |
+
p = subprocess.Popen(
|
| 58 |
+
["piper", "--model", model, "--output_file", out_path],
|
| 59 |
+
stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
| 60 |
+
)
|
| 61 |
+
p.communicate(input=safe_text.encode("utf-8"), timeout=30)
|
| 62 |
+
if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
|
| 63 |
+
return out_path
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print("[TTS] Piper error:", e)
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _tts_with_say(text: str) -> Optional[str]:
|
| 70 |
+
"""
|
| 71 |
+
macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
|
| 72 |
+
else writes AIFF and returns it if WAV conversion fails.
|
| 73 |
+
"""
|
| 74 |
+
if os.name != "posix":
|
| 75 |
+
return None
|
| 76 |
+
if not _have("say"):
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
out_dir = ensure_runtime_audio_dir()
|
| 80 |
+
aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
|
| 81 |
+
wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
|
| 82 |
+
|
| 83 |
+
safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
|
| 84 |
+
try:
|
| 85 |
+
# Basic AIFF
|
| 86 |
+
subprocess.run(["say", "-o", aiff, safe_text], check=True)
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print("[TTS] say failed:", e)
|
| 89 |
+
return None
|
| 90 |
+
|
| 91 |
+
converted = False
|
| 92 |
+
# Prefer afconvert
|
| 93 |
+
if which("afconvert"):
|
| 94 |
+
try:
|
| 95 |
+
subprocess.run(
|
| 96 |
+
["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
|
| 97 |
+
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
| 98 |
+
)
|
| 99 |
+
converted = True
|
| 100 |
+
except Exception:
|
| 101 |
+
converted = False
|
| 102 |
+
# Else try ffmpeg
|
| 103 |
+
if not converted and which("ffmpeg"):
|
| 104 |
+
try:
|
| 105 |
+
subprocess.run(
|
| 106 |
+
["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
|
| 107 |
+
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
| 108 |
+
)
|
| 109 |
+
converted = True
|
| 110 |
+
except Exception:
|
| 111 |
+
converted = False
|
| 112 |
+
|
| 113 |
+
# Cleanup/return best
|
| 114 |
+
if converted and os.path.exists(wav) and _is_valid_wav(wav):
|
| 115 |
+
try:
|
| 116 |
+
os.remove(aiff)
|
| 117 |
+
except Exception:
|
| 118 |
+
pass
|
| 119 |
+
return wav
|
| 120 |
+
|
| 121 |
+
# Fallback: return AIFF if WAV conversion failed but aiff exists
|
| 122 |
+
if os.path.exists(aiff):
|
| 123 |
+
return aiff
|
| 124 |
+
|
| 125 |
+
return None
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def tts_synthesize(text: str) -> Optional[str]:
|
| 129 |
+
"""
|
| 130 |
+
High-level TTS router:
|
| 131 |
+
1) Piper (if configured)
|
| 132 |
+
2) macOS 'say'
|
| 133 |
+
3) None
|
| 134 |
+
Always writes to runtime/audio.
|
| 135 |
+
"""
|
| 136 |
+
if not (text and text.strip()):
|
| 137 |
+
return None
|
| 138 |
+
|
| 139 |
+
ensure_runtime_audio_dir()
|
| 140 |
+
|
| 141 |
+
# 1) Piper
|
| 142 |
+
out = _tts_with_piper(text)
|
| 143 |
+
if out:
|
| 144 |
+
return out
|
| 145 |
+
|
| 146 |
+
# 2) macOS say
|
| 147 |
+
out = _tts_with_say(text)
|
| 148 |
+
if out:
|
| 149 |
+
return out
|
| 150 |
+
|
| 151 |
+
return None
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>5.0
|
| 2 |
+
pydantic>=2.8
|
| 3 |
+
pydantic-settings>=2.5
|
| 4 |
+
numpy>=1.26
|
| 5 |
+
soundfile>=0.12
|
| 6 |
+
webrtcvad>=2.0.10
|
| 7 |
+
faster-whisper>=1.0.0
|
| 8 |
+
llama-cpp-python>=0.2.90
|
| 9 |
+
pyttsx3>=2.90
|
| 10 |
+
openai>=1.44.0
|
runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e96d0bc6697344d111427e3900cb71d28c54c5ff4fcc52b45819fa49da0b2f6c
|
| 3 |
+
size 370708
|
runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e3f8ea05d78887dd73e67e846efa7ff7f3afb9ba15a9c61dac69ed62f075025
|
| 3 |
+
size 216064
|
utils/__init__.py
ADDED
|
File without changes
|
utils/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (185 Bytes). View file
|
|
|
utils/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (1.97 kB). View file
|
|
|
utils/__pycache__/phone.cpython-312.pyc
ADDED
|
Binary file (1.12 kB). View file
|
|
|
utils/audio.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import soundfile as sf
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
def load_audio_mono_16k(path: str):
|
| 5 |
+
wav, sr = sf.read(path, dtype="float32", always_2d=False)
|
| 6 |
+
if wav.ndim == 2:
|
| 7 |
+
wav = wav.mean(axis=1)
|
| 8 |
+
if sr != 16000:
|
| 9 |
+
# lightweight resample
|
| 10 |
+
import numpy as np
|
| 11 |
+
import math
|
| 12 |
+
ratio = 16000 / sr
|
| 13 |
+
n = int(math.floor(len(wav) * ratio))
|
| 14 |
+
x_old = np.linspace(0, 1, len(wav), endpoint=False)
|
| 15 |
+
x_new = np.linspace(0, 1, n, endpoint=False)
|
| 16 |
+
wav = np.interp(x_new, x_old, wav).astype("float32")
|
| 17 |
+
sr = 16000
|
| 18 |
+
return wav, sr
|
utils/config.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import os
|
| 3 |
+
from pydantic_settings import BaseSettings
|
| 4 |
+
from pydantic import Field
|
| 5 |
+
|
| 6 |
+
class Settings(BaseSettings):
|
| 7 |
+
BACKEND_LLM: str = Field(default="llamacpp") # 'llamacpp' | 'openai' | 'groq'
|
| 8 |
+
LLAMACPP_MODEL_PATH: str = Field(default="models/qwen2.5-1.5b-instruct-q4_k_m.gguf")
|
| 9 |
+
|
| 10 |
+
N_CTX: int = 4096
|
| 11 |
+
N_THREADS: int = 4
|
| 12 |
+
N_GPU_LAYERS: int = 0
|
| 13 |
+
|
| 14 |
+
ASR_DEVICE: str = "mps" # 'mps' or 'cpu'
|
| 15 |
+
TTS_ENGINE: str = "pyttsx3" # 'pyttsx3' | 'say' | 'piper' (later)
|
| 16 |
+
|
| 17 |
+
OPENAI_API_KEY: str | None = None
|
| 18 |
+
GROQ_API_KEY: str | None = None
|
| 19 |
+
|
| 20 |
+
IS_HF_SPACE: bool = False
|
| 21 |
+
DEBUG: bool = True
|
| 22 |
+
|
| 23 |
+
class Config:
|
| 24 |
+
env_file = ".env"
|
| 25 |
+
extra = "ignore"
|
| 26 |
+
|
| 27 |
+
def pretty(self) -> dict:
|
| 28 |
+
d = self.model_dump()
|
| 29 |
+
if d.get("OPENAI_API_KEY"):
|
| 30 |
+
d["OPENAI_API_KEY"] = True
|
| 31 |
+
if d.get("GROQ_API_KEY"):
|
| 32 |
+
d["GROQ_API_KEY"] = True
|
| 33 |
+
return d
|
| 34 |
+
|
| 35 |
+
_settings: Settings | None = None
|
| 36 |
+
|
| 37 |
+
def get_settings() -> Settings:
|
| 38 |
+
global _settings
|
| 39 |
+
if _settings is None:
|
| 40 |
+
_settings = Settings()
|
| 41 |
+
return _settings
|
utils/phone.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
def extract_phone(text: str) -> str | None:
|
| 4 |
+
if not text:
|
| 5 |
+
return None
|
| 6 |
+
m = re.search(r"(\+?\d[\d\-\s]{8,}\d)", text)
|
| 7 |
+
return m.group(1).replace(" ", "") if m else None
|
| 8 |
+
|
| 9 |
+
def looks_valid(phone: str | None) -> bool:
|
| 10 |
+
if not phone: return False
|
| 11 |
+
digits = "".join(ch for ch in phone if ch.isdigit())
|
| 12 |
+
return len(digits) >= 10
|