Eyob-Sol commited on
Commit
74bb5fe
·
verified ·
1 Parent(s): af7a51d

Upload 38 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
37
+ runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,29 @@
1
  ---
2
- title: Futurecafe Voice Core
3
- emoji: 💻
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.46.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: FutureCafe Voice Core (Private)
3
+ emoji: ☎️
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # FutureCafe Voice Core (Private)
14
+
15
+ This Space runs the **full** Gradio app (voice + SMS). It’s **private** and will be called by a public wrapper Space via `gradio_client`.
16
+
17
+ ## Run
18
+
19
+ - Uses **Piper** TTS model at `models/piper/en_US-amy-medium.onnx`
20
+ - Uses **faster-whisper** (tiny) for ASR
21
+
22
+ ### Environment variables (set in Space → Settings → Secrets)
23
+ - `BACKEND_LLM=openai` (or `groq`)
24
+ - If `openai`: `OPENAI_API_KEY=<your-key>`
25
+ - If `groq`: `GROQ_API_KEY=<your-key>`
26
+ - `TTS_ENGINE=piper`
27
+ - `PIPER_MODEL=models/piper/en_US-amy-medium.onnx`
28
+
29
+ This project writes generated audio files into `runtime/audio`.
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (183 Bytes). View file
 
app/__pycache__/catalog.cpython-312.pyc ADDED
Binary file (5.51 kB). View file
 
app/__pycache__/gradio_app.cpython-312.pyc ADDED
Binary file (10.7 kB). View file
 
app/__pycache__/intent_schema.cpython-312.pyc ADDED
Binary file (2.18 kB). View file
 
app/__pycache__/orchestrator.cpython-312.pyc ADDED
Binary file (1.51 kB). View file
 
app/__pycache__/policy.cpython-312.pyc ADDED
Binary file (2.53 kB). View file
 
app/__pycache__/sim_api.cpython-312.pyc ADDED
Binary file (3.78 kB). View file
 
app/__pycache__/tools.cpython-312.pyc ADDED
Binary file (1.4 kB). View file
 
app/catalog.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/catalog.py
2
+ from __future__ import annotations
3
+ import json, os
4
+ from typing import Dict, Any, List, Optional
5
+
6
+ _CATALOG: Dict[str, Any] | None = None
7
+
8
+ def get_catalog_path() -> str:
9
+ here = os.path.dirname(os.path.abspath(__file__))
10
+ root = os.path.dirname(here)
11
+ return os.path.join(root, "data", "menu_catalog.json")
12
+
13
+ def load_catalog() -> Dict[str, Any]:
14
+ global _CATALOG
15
+ if _CATALOG is not None:
16
+ return _CATALOG
17
+ path = get_catalog_path()
18
+ with open(path, "r", encoding="utf-8") as f:
19
+ _CATALOG = json.load(f)
20
+ return _CATALOG
21
+
22
+ def find_item_by_name(name: str) -> Optional[Dict[str, Any]]:
23
+ c = load_catalog()
24
+ name_l = (name or "").strip().lower()
25
+ for it in c["items"]:
26
+ if it["name"].lower() == name_l:
27
+ return it
28
+ # lightweight alias match
29
+ if name_l in it["name"].lower():
30
+ return it
31
+ return None
32
+
33
+ def find_item_by_sku(sku: str) -> Optional[Dict[str, Any]]:
34
+ c = load_catalog()
35
+ for it in c["items"]:
36
+ if it["sku"] == sku:
37
+ return it
38
+ return None
39
+
40
+ def required_fields_for_category(category: str) -> List[str]:
41
+ c = load_catalog()
42
+ schema = c["schema"].get(category) or {}
43
+ return list(schema.get("required_fields") or [])
44
+
45
+ def optional_fields_for_category(category: str) -> List[str]:
46
+ c = load_catalog()
47
+ schema = c["schema"].get(category) or {}
48
+ return list(schema.get("optional_fields") or [])
49
+
50
+ def compute_missing_fields(order_item: Dict[str, Any]) -> List[str]:
51
+ """
52
+ order_item: {"name": "...", "sku": optional, "qty": int, "<opts>": ...}
53
+ Uses catalog schema to see which fields are missing.
54
+ """
55
+ it = None
56
+ if "sku" in order_item:
57
+ it = find_item_by_sku(order_item["sku"])
58
+ if not it and "name" in order_item:
59
+ it = find_item_by_name(order_item["name"])
60
+ if not it:
61
+ return ["name"] # we don’t even know the item yet
62
+
63
+ category = it["category"]
64
+ req = set(required_fields_for_category(category))
65
+ present = set([k for k in order_item.keys() if k in req or k == "qty" or k == "name" or k == "sku"])
66
+
67
+ # qty normalization: consider qty present if >=1
68
+ if "qty" in req and (order_item.get("qty") is None or int(order_item.get("qty", 0)) < 1):
69
+ # keep qty “missing”
70
+ pass
71
+ else:
72
+ present.add("qty")
73
+
74
+ missing = [f for f in req if f not in present]
75
+ return missing
76
+
77
+ def friendly_requirements_prompt(order_item: Dict[str, Any]) -> str:
78
+ it = None
79
+ if "sku" in order_item:
80
+ it = find_item_by_sku(order_item["sku"])
81
+ if not it and "name" in order_item:
82
+ it = find_item_by_name(order_item["name"])
83
+ if not it:
84
+ return "Which item would you like to order?"
85
+
86
+ category = it["category"]
87
+ req = required_fields_for_category(category)
88
+ opt = optional_fields_for_category(category)
89
+
90
+ parts = []
91
+ opt_txt = ""
92
+ if opt:
93
+ opt_txt = f" Optional: {', '.join(opt)}."
94
+ if req:
95
+ parts.append(f"I need {', '.join(req)} for {it['name']}.{opt_txt}")
96
+ else:
97
+ parts.append(f"Please specify quantity for {it['name']}.{opt_txt}")
98
+
99
+ # Also list choices for required options
100
+ # e.g., size choices
101
+ opts = it.get("options") or {}
102
+ choice_bits = []
103
+ for k, spec in opts.items():
104
+ if spec.get("required"):
105
+ choices = spec.get("choices") or []
106
+ if choices:
107
+ choice_bits.append(f"{k}: {', '.join(choices)}")
108
+ if choice_bits:
109
+ parts.append("Choices → " + " | ".join(choice_bits))
110
+ return " ".join(parts)
app/gradio_app.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/gradio_app.py
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import time
6
+ import shutil
7
+ import uuid
8
+ from typing import List, Dict, Any, Tuple
9
+
10
+ import gradio as gr
11
+
12
+ # ---- External modules we rely on (light, stable) ----
13
+ # - ASR: faster-whisper wrapper you already have
14
+ # - TTS: local piper/ say via models/tts_router.py
15
+ # - LLM: optional local model; if missing, we fallback to a safe canned reply
16
+ try:
17
+ from models.asr_whisper import get_asr
18
+ except Exception:
19
+ get_asr = None
20
+
21
+ try:
22
+ from models.llm_chat import respond_chat as llm_respond_chat
23
+ except Exception:
24
+ llm_respond_chat = None
25
+
26
+ from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
27
+
28
+
29
+ # =============================================================================
30
+ # Helpers (pure, modular)
31
+ # =============================================================================
32
+
33
+ def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
34
+ """
35
+ Ask the chat LLM for a response. If it's not available, use a reasonable fallback.
36
+ """
37
+ if llm_respond_chat is not None:
38
+ try:
39
+ # policy guard is optional; pass an empty dict
40
+ bot_text, _guard, _diag = llm_respond_chat(history or [], user_text, {})
41
+ if isinstance(bot_text, str) and bot_text.strip():
42
+ return bot_text.strip()
43
+ except Exception as e:
44
+ print("[LLM] fallback due to error:", e)
45
+ # Fallback (LLM unavailable or failed)
46
+ return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
47
+
48
+
49
+ def _asr_transcribe(aud_path: str) -> str:
50
+ """
51
+ Transcribe audio to text. If ASR is unavailable, return a safe message.
52
+ """
53
+ if not aud_path:
54
+ return "(no audio)"
55
+ if get_asr is None:
56
+ return "(ASR unavailable)"
57
+ try:
58
+ asr = get_asr()
59
+ out = asr.transcribe(aud_path)
60
+ return (out.get("text") or "").strip() or "(no speech detected)"
61
+ except Exception as e:
62
+ print("[ASR] error:", e)
63
+ return "(transcription failed)"
64
+
65
+
66
+ def _tts_from_text(text: str) -> str | None:
67
+ """
68
+ Synthesize assistant text to a WAV in runtime/audio.
69
+ Returns a file path or None.
70
+ """
71
+ if not (text and text.strip()):
72
+ return None
73
+ path = tts_synthesize(text.strip())
74
+ if path and os.path.exists(path):
75
+ return path
76
+ # always attempt one more minimal fallback to avoid empty path
77
+ return tts_synthesize("How can I help with FutureCafe?")
78
+
79
+
80
+ def _append_chat(history: List[Dict[str, str]] | None,
81
+ role: str, content: str) -> List[Dict[str, str]]:
82
+ hist = list(history or [])
83
+ hist.append({"role": role, "content": content})
84
+ return hist
85
+
86
+
87
+ def _startup_clean_runtime_audio():
88
+ """
89
+ On app start, clean previous session audio artifacts.
90
+ """
91
+ audio_dir = ensure_runtime_audio_dir()
92
+ try:
93
+ for name in os.listdir(audio_dir):
94
+ p = os.path.join(audio_dir, name)
95
+ if os.path.isfile(p):
96
+ os.remove(p)
97
+ except Exception as e:
98
+ print("[RUNTIME] Cannot clean runtime/audio:", e)
99
+
100
+
101
+ # =============================================================================
102
+ # Voice handlers (modular)
103
+ # =============================================================================
104
+
105
+ def handle_voice_turn(
106
+ user_audio_path: str,
107
+ voice_history: List[Dict[str, str]] | None
108
+ ) -> Tuple[List[Dict[str, str]], str | None, Dict[str, Any]]:
109
+ """
110
+ Single voice turn:
111
+ 1) Transcribe user audio
112
+ 2) Ask LLM for a reply (text)
113
+ 3) TTS the reply to a WAV
114
+ 4) Append both transcript and assistant text to the voice chat history
115
+
116
+ Returns: (new_voice_history, assistant_audio_path, diag_json)
117
+ """
118
+ t0 = time.time()
119
+
120
+ transcript = _asr_transcribe(user_audio_path)
121
+ hist1 = _append_chat(voice_history, "user", transcript)
122
+
123
+ bot_text = _safe_llm_reply(hist1, transcript)
124
+ hist2 = _append_chat(hist1, "assistant", bot_text)
125
+
126
+ tts_path = _tts_from_text(bot_text)
127
+
128
+ diag = {
129
+ "intent": None,
130
+ "slots": {},
131
+ "tool_selected": None,
132
+ "tool_result": {
133
+ "transcript": transcript,
134
+ "llm_response": bot_text
135
+ },
136
+ "latency_ms": int((time.time() - t0) * 1000),
137
+ }
138
+
139
+ return hist2, tts_path, diag
140
+
141
+
142
+ # =============================================================================
143
+ # Text handlers (modular)
144
+ # =============================================================================
145
+
146
+ def handle_text_turn(
147
+ user_text: str,
148
+ chat_history: List[Dict[str, str]] | None
149
+ ) -> Tuple[List[Dict[str, str]], Dict[str, Any], str]:
150
+ """
151
+ Single text turn:
152
+ 1) Append user text
153
+ 2) Ask LLM for a reply
154
+ 3) Append assistant text
155
+ 4) Prepare diagnostics
156
+ Returns: (new_chat_history, diag_json, clear_text_value)
157
+ """
158
+ t0 = time.time()
159
+ user_text = (user_text or "").strip()
160
+ if not user_text:
161
+ return (chat_history or []), {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}, ""
162
+
163
+ hist1 = _append_chat(chat_history, "user", user_text)
164
+ bot_text = _safe_llm_reply(hist1, user_text)
165
+ hist2 = _append_chat(hist1, "assistant", bot_text)
166
+
167
+ diag = {
168
+ "intent": None,
169
+ "slots": {},
170
+ "tool_selected": None,
171
+ "tool_result": {"user": user_text, "llm_response": bot_text},
172
+ "latency_ms": int((time.time() - t0) * 1000),
173
+ }
174
+
175
+ return hist2, diag, ""
176
+
177
+
178
+ # =============================================================================
179
+ # Fixed UI (as requested) + wiring
180
+ # =============================================================================
181
+
182
+ def build_demo():
183
+ """
184
+ Fixed UI layout:
185
+ LEFT (Voice Call):
186
+ - voice_in (mic recorder)
187
+ - assistant_audio (autoplay)
188
+ - voice_chat (transcript chat)
189
+ - call_diag (JSON)
190
+ RIGHT (SMS/Chat):
191
+ - chat_box
192
+ - text_in (enter to send)
193
+ - chat_diag (JSON)
194
+ """
195
+ _startup_clean_runtime_audio()
196
+
197
+ with gr.Blocks(title="FutureCafe Call/SMS Agent (MVP)") as demo:
198
+ gr.Markdown("### ☎️ FutureCafe AI Agent (MVP)\n**Call (voice)** on the left · **SMS/Chat** on the right")
199
+
200
+ # States
201
+ voice_state = gr.State([]) # list of {"role","content"} for voice transcript chat
202
+ chat_state = gr.State([]) # list of {"role","content"} for SMS chat
203
+
204
+ with gr.Row():
205
+ # ---------------- LEFT: VOICE ----------------
206
+ with gr.Column(scale=1, min_width=430):
207
+ gr.Markdown("#### 📞 Voice Call")
208
+ voice_in = gr.Audio(
209
+ label="Press Record → Speak → Stop (auto-sends)",
210
+ sources=["microphone"],
211
+ type="filepath",
212
+ format="wav",
213
+ interactive=True,
214
+ editable=False,
215
+ waveform_options={"show_recording_waveform": True},
216
+ )
217
+
218
+ assistant_audio = gr.Audio(
219
+ label="Assistant Response (auto-play)",
220
+ autoplay=True,
221
+ type="filepath",
222
+ interactive=False
223
+ )
224
+
225
+ voice_chat = gr.Chatbot(value=[], type="messages", height=220, label="Voice Chat (transcripts)")
226
+
227
+ call_diag = gr.JSON(
228
+ value={"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0},
229
+ label="Voice Diagnostics"
230
+ )
231
+
232
+ # ---------------- RIGHT: SMS / CHAT ----------------
233
+ with gr.Column(scale=1, min_width=430):
234
+ gr.Markdown("#### 💬 SMS / Chat")
235
+ chat_box = gr.Chatbot(value=[], type="messages", height=360, label=None)
236
+ text_in = gr.Textbox(
237
+ placeholder="Type here… e.g., “Any vegan pizzas?”, “Book a table for 2 at 7.” (Enter to send)",
238
+ label=None, lines=1
239
+ )
240
+ chat_diag = gr.JSON(
241
+ value={"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0},
242
+ label="Chat Diagnostics"
243
+ )
244
+
245
+ # ---------- Handlers (thin wrappers that call modular functions) ----------
246
+ def _clear_recorder():
247
+ # Only clears the recorder input; leaves assistant audio + transcripts intact
248
+ return gr.update(value=None, interactive=True)
249
+
250
+ def on_voice_change(aud_path: str | None, vhist: List[Dict[str, str]]):
251
+ if not aud_path:
252
+ # no audio; keep everything as-is
253
+ return vhist or [], None, {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}
254
+
255
+ new_vhist, tts_path, diag = handle_voice_turn(aud_path, vhist or [])
256
+ return new_vhist, tts_path, diag
257
+
258
+ def on_text_send(txt: str, hist: List[Dict[str, str]]):
259
+ new_hist, diag, clear_text = handle_text_turn(txt, hist or [])
260
+ return new_hist, diag, clear_text
261
+
262
+ # ---------- Wiring ----------
263
+ # Voice lane: update (voice_chat, assistant_audio, call_diag), do NOT clear recorder to keep it stable for now
264
+ # Try to fire on explicit Stop; fall back to generic change if not supported
265
+ rec_event = getattr(voice_in, "stop_recording", None)
266
+ if callable(rec_event):
267
+ rec_event(
268
+ on_voice_change,
269
+ inputs=[voice_in, voice_state],
270
+ outputs=[voice_chat, assistant_audio, call_diag],
271
+ ).then(
272
+ _clear_recorder, # runs AFTER outputs are set → autoplay isn’t interrupted
273
+ inputs=None,
274
+ outputs=[voice_in],
275
+ )
276
+ else:
277
+ voice_in.change(
278
+ on_voice_change,
279
+ inputs=[voice_in, voice_state],
280
+ outputs=[voice_chat, assistant_audio, call_diag],
281
+ ).then(
282
+ _clear_recorder,
283
+ inputs=None,
284
+ outputs=[voice_in],
285
+ )
286
+
287
+ # Keep voice_state in sync with what's shown in voice_chat (unchanged)
288
+ voice_chat.change(lambda x: x, inputs=[voice_chat], outputs=[voice_state])
289
+
290
+ # Text lane: Enter to send
291
+ text_in.submit(
292
+ on_text_send,
293
+ inputs=[text_in, chat_state],
294
+ outputs=[chat_box, chat_diag, text_in],
295
+ )
296
+ # Keep chat_state in sync with what's shown in chat_box
297
+ chat_box.change(lambda x: x, inputs=[chat_box], outputs=[chat_state])
298
+
299
+ return demo
app/intent_schema.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/intent_schema.py
2
+ from __future__ import annotations
3
+ from typing import List, Optional, Literal
4
+ from pydantic import BaseModel, Field
5
+
6
+ IntentName = Literal["reservation.create", "order.create", "hours.get", "menu.search", "smalltalk", "other"]
7
+
8
+ class ReservationSlots(BaseModel):
9
+ name: Optional[str] = None
10
+ party_size: Optional[int] = Field(default=None, ge=1, le=20)
11
+ date: Optional[str] = None # ISO preferred (YYYY-MM-DD) or “today”
12
+ time: Optional[str] = None # “19:00” or “7 pm”
13
+ phone: Optional[str] = None
14
+
15
+ class OrderItem(BaseModel):
16
+ name: str
17
+ qty: int = Field(default=1, ge=1)
18
+
19
+ class OrderSlots(BaseModel):
20
+ items: List[OrderItem] = Field(default_factory=list)
21
+ notes: Optional[str] = None
22
+
23
+ class MenuSlots(BaseModel):
24
+ query: Optional[str] = None
25
+ dietary: List[str] = Field(default_factory=list) # e.g., ["vegan","gluten-free"]
26
+
27
+ class IntentEnvelope(BaseModel):
28
+ intent: IntentName
29
+ need_more_info: bool = False
30
+ ask_user: Optional[str] = None # a single, polite follow-up question if info missing
31
+ slots: dict = Field(default_factory=dict) # raw dict; we’ll validate by intent
app/orchestrator.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ from models.llm_router import respond as route_fn, nlg
3
+ from app.tools import dispatch_tool
4
+ from utils.phone import extract_phone, looks_valid
5
+
6
+ def llm_route_and_execute(user_text: str) -> Dict[str, Any]:
7
+ route = route_fn(user_text) # {"tool": "get_hours"|..., "args": {...}}
8
+ tool = route.get("tool")
9
+ args = route.get("args") or {}
10
+
11
+ # enrich reservation with phone if present in the text
12
+ if tool == "create_reservation":
13
+ phone = extract_phone(user_text)
14
+ if looks_valid(phone):
15
+ args["phone"] = phone
16
+ if not args.get("name"):
17
+ # naive default name if user included "my name is ..."
18
+ import re
19
+ m = re.search(r"(?:my name is|i am|i'm)\s+([A-Z][a-z]+)", user_text, re.I)
20
+ if m: args["name"] = m.group(1)
21
+
22
+ tool_result = None
23
+ if tool:
24
+ tool_result = dispatch_tool(tool, args)
25
+
26
+ reply = nlg(tool or "", tool_result or {}, user_text)
27
+
28
+ return {
29
+ "intent": tool or ("smalltalk" if not tool else tool),
30
+ "slots": args,
31
+ "tool_selected": tool,
32
+ "tool_result": tool_result,
33
+ "response": reply,
34
+ }
app/policy.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/policy.py
2
+ from __future__ import annotations
3
+ import os, re
4
+
5
+ # --- Topic detection (very lightweight, fast) ---
6
+ CAFE_KEYWORDS = [
7
+ "menu","order","item","dish","pizza","burger","salad","pasta","vegan","gluten",
8
+ "price","special","deal","offer","hours","open","close","time","location","address",
9
+ "book","reserve","reservation","table","party","pickup","delivery","takeout","payment",
10
+ "futurecafe","future cafe","future-cafe","café","coffee","drinks","beverage","side"
11
+ ]
12
+ _kw_re = re.compile(r"|".join([re.escape(k) for k in CAFE_KEYWORDS]), re.I)
13
+
14
+ SMALLTALK = r"\b(hi|hello|hey|good\s+(morning|afternoon|evening)|thanks|thank you|bye|goodbye)\b"
15
+ _smalltalk_re = re.compile(SMALLTALK, re.I)
16
+
17
+ def is_cafe_topic(text: str) -> bool:
18
+ return bool(text and _kw_re.search(text))
19
+
20
+ def is_smalltalk(text: str) -> bool:
21
+ return bool(text and _smalltalk_re.search(text))
22
+
23
+ def unrelated_limit() -> int:
24
+ """How many off-topic turns allowed before ending."""
25
+ try:
26
+ n = int(os.getenv("CAFE_UNRELATED_LIMIT", "3"))
27
+ return max(1, min(5, n))
28
+ except Exception:
29
+ return 3
30
+
31
+ # --- Messages ---
32
+ POLITE_REFUSAL = (
33
+ "I’m here to help with FutureCafe—menu, hours, reservations, and orders. "
34
+ "Could you ask something about the restaurant?"
35
+ )
36
+
37
+ POLITE_REFUSAL_2 = (
38
+ "To keep things focused, I can only help with FutureCafe. "
39
+ "Ask me about our menu, hours, or booking a table."
40
+ )
41
+
42
+ def end_message() -> str:
43
+ return ("I’m only able to help with FutureCafe topics. "
44
+ "Let’s end this chat for now. If you need menu, hours, or reservations, "
45
+ "message me again anytime.")
app/sim_api.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/sim_api.py
2
+ from __future__ import annotations
3
+ from typing import Dict, Any, List, Tuple
4
+ from app.catalog import load_catalog, find_item_by_name, find_item_by_sku
5
+
6
+ def _pick_item(order_it: Dict[str, Any]) -> Dict[str, Any] | None:
7
+ it = None
8
+ if "sku" in order_it:
9
+ it = find_item_by_sku(order_it["sku"])
10
+ if not it and "name" in order_it:
11
+ it = find_item_by_name(order_it["name"])
12
+ return it
13
+
14
+ def check_item_availability(order_it: Dict[str, Any]) -> Tuple[bool, Dict[str, Any]]:
15
+ """
16
+ Returns (is_available, info)
17
+ info contains { "reason": "...", "alternatives": [...] } when not available
18
+ For size-based items, verify stock for requested size.
19
+ """
20
+ it = _pick_item(order_it)
21
+ if not it:
22
+ return False, {"reason": "unknown_item", "alternatives": []}
23
+
24
+ qty = int(order_it.get("qty", 0) or 0)
25
+ if qty < 1:
26
+ return False, {"reason": "qty_missing", "alternatives": []}
27
+
28
+ # size key heuristics
29
+ size = order_it.get("size")
30
+ stock_map = it.get("stock") or {}
31
+
32
+ if "one_size" in stock_map:
33
+ avail = stock_map["one_size"]
34
+ if avail >= qty:
35
+ return True, {"price_each": (it.get("price") or {}).get("one_size", 0.0)}
36
+ else:
37
+ return False, {"reason": "insufficient_stock", "have": avail, "alternatives": []}
38
+
39
+ if size:
40
+ have = int(stock_map.get(size, 0))
41
+ if have >= qty:
42
+ return True, {"price_each": (it.get("price") or {}).get(size, 0.0)}
43
+ else:
44
+ # propose other sizes with stock
45
+ alts = []
46
+ for s, have_s in stock_map.items():
47
+ if have_s >= qty:
48
+ alts.append({"size": s, "have": have_s, "price_each": (it.get("price") or {}).get(s, 0.0)})
49
+ return False, {"reason": "size_out_of_stock", "have": have, "alternatives": alts}
50
+ else:
51
+ # missing required option — let schema enforcement ask; but if user skipped, treat as not available
52
+ return False, {"reason": "size_missing", "alternatives": [{"hint": "provide size"}]}
53
+
54
+ def place_order(order_items: List[Dict[str, Any]]) -> Dict[str, Any]:
55
+ """
56
+ Verifies each item and (if all available) returns summary.
57
+ We do not mutate stock here (sim).
58
+ """
59
+ ok = True
60
+ lines = []
61
+ total = 0.0
62
+ for it in order_items:
63
+ item_def = _pick_item(it)
64
+ if not item_def:
65
+ return {"ok": False, "reason": "unknown_item", "item": it}
66
+ avail, info = check_item_availability(it)
67
+ if not avail:
68
+ return {"ok": False, "reason": info.get("reason"), "item": it, "alternatives": info.get("alternatives", [])}
69
+ qty = int(it["qty"])
70
+ unit = info.get("price_each", 0.0)
71
+ line_total = unit * qty
72
+ total += line_total
73
+ lines.append({
74
+ "sku": item_def["sku"],
75
+ "name": item_def["name"],
76
+ "qty": qty,
77
+ "options": {k: v for k, v in it.items() if k not in ("name","sku","qty")},
78
+ "unit": unit,
79
+ "line_total": line_total
80
+ })
81
+ return {"ok": True, "total": round(total, 2), "lines": lines}
app/tools.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+ from mock_api import service as svc
3
+
4
+ def dispatch_tool(tool: str, args: Dict[str, Any]) -> Dict[str, Any]:
5
+ if tool == "get_hours":
6
+ return svc.get_hours()
7
+ if tool == "menu_lookup":
8
+ return {"items": svc.menu_lookup(args.get("filters") or [])}
9
+ if tool == "create_reservation":
10
+ return svc.create_reservation(
11
+ name=args.get("name") or "Guest",
12
+ phone=args.get("phone"),
13
+ party_size=int(args.get("party_size") or 2),
14
+ datetime_str=args.get("datetime_str") or "",
15
+ )
16
+ if tool == "create_order":
17
+ return svc.create_order(args.get("items") or [])
18
+ raise ValueError(f"unknown tool: {tool}")
data/menu_catalog.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "items": [
3
+ {
4
+ "sku": "pizza.margherita",
5
+ "name": "Margherita Pizza",
6
+ "category": "pizza",
7
+ "options": {
8
+ "size": { "required": true, "choices": ["small", "medium", "large"] },
9
+ "crust": { "required": false, "choices": ["thin", "regular"] },
10
+ "toppings": { "required": false, "choices": ["extra cheese", "basil", "olives"], "multi": true }
11
+ },
12
+ "price": { "small": 9.0, "medium": 12.0, "large": 14.0 },
13
+ "stock": { "small": 10, "medium": 6, "large": 0 },
14
+ "tags": ["vegetarian"]
15
+ },
16
+ {
17
+ "sku": "pizza.pepperoni",
18
+ "name": "Pepperoni Pizza",
19
+ "category": "pizza",
20
+ "options": {
21
+ "size": { "required": true, "choices": ["small", "medium", "large"] },
22
+ "crust": { "required": false, "choices": ["thin", "regular"] },
23
+ "toppings": { "required": false, "choices": ["extra cheese", "jalapeno"], "multi": true }
24
+ },
25
+ "price": { "small": 10.0, "medium": 13.5, "large": 15.5 },
26
+ "stock": { "small": 3, "medium": 0, "large": 2 },
27
+ "tags": []
28
+ },
29
+ {
30
+ "sku": "salad.house",
31
+ "name": "House Salad",
32
+ "category": "salad",
33
+ "options": {
34
+ "dressing": { "required": false, "choices": ["vinaigrette", "ranch", "no dressing"] }
35
+ },
36
+ "price": { "one_size": 7.5 },
37
+ "stock": { "one_size": 15 },
38
+ "tags": ["vegetarian", "vegan"]
39
+ },
40
+ {
41
+ "sku": "drink.cola",
42
+ "name": "Cola",
43
+ "category": "drink",
44
+ "options": {
45
+ "size": { "required": true, "choices": ["can", "bottle"] }
46
+ },
47
+ "price": { "can": 2.0, "bottle": 3.5 },
48
+ "stock": { "can": 20, "bottle": 4 },
49
+ "tags": []
50
+ }
51
+ ],
52
+ "schema": {
53
+ "pizza": { "required_fields": ["size", "qty"], "optional_fields": ["crust", "toppings"] },
54
+ "salad": { "required_fields": ["qty"], "optional_fields": ["dressing"] },
55
+ "drink": { "required_fields": ["size", "qty"], "optional_fields": [] }
56
+ }
57
+ }
models/__init__.py ADDED
File without changes
models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (186 Bytes). View file
 
models/__pycache__/asr_whisper.cpython-312.pyc ADDED
Binary file (1.99 kB). View file
 
models/__pycache__/llm_chat.cpython-312.pyc ADDED
Binary file (4.85 kB). View file
 
models/__pycache__/llm_router.cpython-312.pyc ADDED
Binary file (4.08 kB). View file
 
models/__pycache__/tts_router.cpython-312.pyc ADDED
Binary file (6.28 kB). View file
 
models/asr_whisper.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/asr_whisper.py
2
+ from faster_whisper import WhisperModel
3
+ from utils.config import get_settings
4
+
5
+ _asr_singleton = None
6
+
7
+ class WhisperASR:
8
+ def __init__(self):
9
+ s = get_settings()
10
+ # faster-whisper supports: 'cpu' or 'cuda' (no 'mps')
11
+ requested = (s.ASR_DEVICE or "cpu").lower()
12
+ device = "cpu" if requested not in ("cpu", "cuda") else requested
13
+ if requested == "mps":
14
+ print("[ASR] 'mps' not supported by faster-whisper; falling back to CPU.")
15
+ compute_type = "int8" if device == "cpu" else "float16"
16
+ self.model = WhisperModel("tiny", device=device, compute_type=compute_type)
17
+
18
+ def transcribe(self, path: str) -> dict:
19
+ segments, info = self.model.transcribe(path, beam_size=1, language="en")
20
+ text = " ".join(seg.text.strip() for seg in segments)
21
+ return {"text": text, "language": info.language, "segments": []}
22
+
23
+ def get_asr():
24
+ global _asr_singleton
25
+ if _asr_singleton is None:
26
+ _asr_singleton = WhisperASR()
27
+ return _asr_singleton
models/llm_chat.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/llm_chat.py
2
+ from __future__ import annotations
3
+ from typing import List, Dict, Any, Tuple
4
+ import os
5
+
6
+ from utils.config import get_settings
7
+
8
+ # --- Small, readable menu JSON kept in the system prompt for now ---
9
+ MENU_JSON = """
10
+ {
11
+ "pizzas": [
12
+ {"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}},
13
+ {"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}}
14
+ ],
15
+ "salads": [
16
+ {"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}}
17
+ ],
18
+ "drinks": [
19
+ {"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}}
20
+ ],
21
+ "hours": "11:00–22:00 daily",
22
+ "address": "123 Main St",
23
+ "phone": "+1 (555) 010-0000"
24
+ }
25
+ """
26
+
27
+ SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.
28
+ You talk naturally and help with:
29
+ - Menu questions, placing orders, hours/location, and reservations (lightweight).
30
+ - If the user asks for pizza/order: list choices from the MENU and ask for missing details (size, quantity, etc.).
31
+ - If user provides all details, confirm the order in words (no need to return JSON), include a brief total using MENU prices.
32
+ - For hours/location, reply from MENU.
33
+ - For unrelated topics, gently steer back to FutureCafe; if the user remains off-topic for 3 turns total, politely end.
34
+ - Keep replies concise and friendly. No long explanations.
35
+
36
+ MENU (JSON you can read from for options & prices):
37
+ {MENU_JSON}
38
+ """
39
+
40
+ # ---------------- llama.cpp singleton ----------------
41
+ _llm = None
42
+
43
+ def _get_local_llm():
44
+ """Singleton llama.cpp model loader (GGUF)."""
45
+ global _llm
46
+ if _llm is not None:
47
+ return _llm
48
+ from llama_cpp import Llama
49
+ s = get_settings()
50
+ model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None))
51
+ if not model_path or not os.path.exists(model_path):
52
+ raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
53
+ _llm = Llama(
54
+ model_path=model_path,
55
+ n_ctx=2048,
56
+ n_threads=os.cpu_count() or 4,
57
+ n_gpu_layers=0, # CPU by default
58
+ verbose=False,
59
+ )
60
+ return _llm
61
+
62
+ def _apply_chat_template(messages: List[Dict[str, str]]) -> str:
63
+ parts = []
64
+ for m in messages:
65
+ role = m.get("role", "user")
66
+ content = m.get("content", "")
67
+ if role == "system":
68
+ parts.append(f"<|system|>\n{content}\n")
69
+ elif role == "user":
70
+ parts.append(f"<|user|>\n{content}\n")
71
+ else:
72
+ parts.append(f"<|assistant|>\n{content}\n")
73
+ parts.append("<|assistant|>\n")
74
+ return "\n".join(parts)
75
+
76
+ def _generate(messages: List[Dict[str, str]], temperature=0.3, max_tokens=320) -> str:
77
+ llm = _get_local_llm()
78
+ prompt = _apply_chat_template(messages)
79
+ out = llm(
80
+ prompt,
81
+ max_tokens=max_tokens,
82
+ temperature=temperature,
83
+ top_p=0.9,
84
+ repeat_penalty=1.1,
85
+ stop=["<|user|>", "<|system|>", "<|assistant|>"],
86
+ )
87
+ return (out["choices"][0]["text"] or "").strip()
88
+
89
+ def respond_chat(
90
+ history: List[Dict[str, str]],
91
+ user_text: str,
92
+ guard_state: Dict[str, Any] | None,
93
+ ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
94
+ """
95
+ LLM-only conversational brain.
96
+ Returns: (assistant_text, new_guard_state, diag)
97
+ guard_state: {"unrelated": int, "ended": int, "limit": int}
98
+ """
99
+ guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
100
+ if guard.get("ended"):
101
+ return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
102
+
103
+ msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
104
+ if history:
105
+ msgs.extend(history[-10:])
106
+ msgs.append({"role": "user", "content": user_text})
107
+
108
+ reply = _generate(msgs)
109
+
110
+ # A super-light off-topic guard without keywords: If the model signals ending, we respect it.
111
+ # Otherwise, keep conversation flowing; we do not hard-code keywords or intents here.
112
+ # (We still maintain the 'unrelated' counter if you later want to nudge based on signals.)
113
+ if "Let’s end" in reply or "Let's end" in reply:
114
+ guard["ended"] = 1
115
+
116
+ return reply, guard, {} # no tool_result/diagnostics needed for this simpler flow
models/llm_router.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.config import get_settings
2
+
3
+ def small_router(text: str) -> dict:
4
+ t = (text or "").lower()
5
+ if any(k in t for k in ["hour", "open", "close", "address", "location"]):
6
+ return {"tool": "get_hours", "args": {}}
7
+ if any(k in t for k in ["menu", "vegan", "gluten", "pizza", "salad", "special"]):
8
+ flt = []
9
+ for k in ["vegan","gluten-free","pizza","salad"]:
10
+ if k in t: flt.append(k)
11
+ return {"tool": "menu_lookup", "args": {"filters": flt}}
12
+ if any(k in t for k in ["reserve","reservation","book","table"]):
13
+ # naive hints
14
+ party = 2 if ("2" in t or "two" in t) else None
15
+ time = "19:00" if "7" in t else None
16
+ return {"tool": "create_reservation", "args": {"party_size": party, "datetime_str": time}}
17
+ if any(k in t for k in ["order","buy"]):
18
+ return {"tool": "create_order", "args": {"items": []}}
19
+ return {"tool": None, "args": {}}
20
+
21
+ def nlg(intent: str, tool_result: dict, user_text: str) -> str:
22
+ if intent == "get_hours":
23
+ h = tool_result
24
+ return f"We’re open {h['open']}–{h['close']} daily at {h['address']}."
25
+ if intent == "menu_lookup":
26
+ items = (tool_result or {}).get("items") or []
27
+ if not items:
28
+ return "We have a variety of options—anything specific you’d like?"
29
+ tops = ", ".join(f"{it['name']} (${it['price']})" for it in items[:3])
30
+ return f"Popular picks: {tops}."
31
+ if intent == "create_reservation":
32
+ if tool_result.get("ok"):
33
+ return f"Reservation confirmed for {tool_result['party_size']} at {tool_result['when']}. Code {tool_result['reservation_id']}."
34
+ return "I couldn't confirm that reservation—want me to try again?"
35
+ if intent == "create_order":
36
+ if tool_result.get("ok"):
37
+ items = ", ".join(f"{it['qty']}× {it['name']}" for it in tool_result.get("items", []))
38
+ return f"Got it: {items}. Total ${tool_result.get('total', 0)}."
39
+ return "I couldn't place that order—want me to try again?"
40
+ # small talk
41
+ return "Hello, this is Marta, an AI agent for FutureCafe. How can I help you today?"
42
+
43
+ def respond(user_text: str) -> dict:
44
+ # MVP: use rule-based router; later swap to real LLM function-calling
45
+ route = small_router(user_text)
46
+ return route
models/tts_router.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models/tts_router.py
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import re
6
+ import uuid
7
+ import wave
8
+ import shutil
9
+ import subprocess
10
+ from shutil import which
11
+ from typing import Optional
12
+
13
+ RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
14
+
15
+
16
+ def ensure_runtime_audio_dir() -> str:
17
+ os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
18
+ return RUNTIME_AUDIO_DIR
19
+
20
+
21
+ def _have(cmd: str) -> bool:
22
+ return which(cmd) is not None
23
+
24
+
25
+ def _is_valid_wav(path: str) -> bool:
26
+ try:
27
+ with wave.open(path, "rb") as w:
28
+ frames = w.getnframes()
29
+ rate = w.getframerate()
30
+ if frames <= 0 or rate <= 0:
31
+ return False
32
+ except Exception:
33
+ return False
34
+ return True
35
+
36
+
37
+ def _tts_with_piper(text: str) -> Optional[str]:
38
+ """
39
+ Use local Piper if available.
40
+ Requires:
41
+ - env PIPER_MODEL to point to models/piper/<voice>.onnx
42
+ - `piper` binary in PATH (brew install piper or from releases)
43
+ """
44
+ model = os.getenv("PIPER_MODEL")
45
+ if not model or not os.path.exists(model):
46
+ return None
47
+ if not _have("piper"):
48
+ return None
49
+
50
+ out_dir = ensure_runtime_audio_dir()
51
+ out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
52
+
53
+ # Avoid stray control chars that can confuse some engines
54
+ safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
55
+ try:
56
+ # Simple one-shot pipe
57
+ p = subprocess.Popen(
58
+ ["piper", "--model", model, "--output_file", out_path],
59
+ stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
60
+ )
61
+ p.communicate(input=safe_text.encode("utf-8"), timeout=30)
62
+ if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
63
+ return out_path
64
+ except Exception as e:
65
+ print("[TTS] Piper error:", e)
66
+ return None
67
+
68
+
69
+ def _tts_with_say(text: str) -> Optional[str]:
70
+ """
71
+ macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
72
+ else writes AIFF and returns it if WAV conversion fails.
73
+ """
74
+ if os.name != "posix":
75
+ return None
76
+ if not _have("say"):
77
+ return None
78
+
79
+ out_dir = ensure_runtime_audio_dir()
80
+ aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
81
+ wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
82
+
83
+ safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
84
+ try:
85
+ # Basic AIFF
86
+ subprocess.run(["say", "-o", aiff, safe_text], check=True)
87
+ except Exception as e:
88
+ print("[TTS] say failed:", e)
89
+ return None
90
+
91
+ converted = False
92
+ # Prefer afconvert
93
+ if which("afconvert"):
94
+ try:
95
+ subprocess.run(
96
+ ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
97
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
98
+ )
99
+ converted = True
100
+ except Exception:
101
+ converted = False
102
+ # Else try ffmpeg
103
+ if not converted and which("ffmpeg"):
104
+ try:
105
+ subprocess.run(
106
+ ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
107
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
108
+ )
109
+ converted = True
110
+ except Exception:
111
+ converted = False
112
+
113
+ # Cleanup/return best
114
+ if converted and os.path.exists(wav) and _is_valid_wav(wav):
115
+ try:
116
+ os.remove(aiff)
117
+ except Exception:
118
+ pass
119
+ return wav
120
+
121
+ # Fallback: return AIFF if WAV conversion failed but aiff exists
122
+ if os.path.exists(aiff):
123
+ return aiff
124
+
125
+ return None
126
+
127
+
128
+ def tts_synthesize(text: str) -> Optional[str]:
129
+ """
130
+ High-level TTS router:
131
+ 1) Piper (if configured)
132
+ 2) macOS 'say'
133
+ 3) None
134
+ Always writes to runtime/audio.
135
+ """
136
+ if not (text and text.strip()):
137
+ return None
138
+
139
+ ensure_runtime_audio_dir()
140
+
141
+ # 1) Piper
142
+ out = _tts_with_piper(text)
143
+ if out:
144
+ return out
145
+
146
+ # 2) macOS say
147
+ out = _tts_with_say(text)
148
+ if out:
149
+ return out
150
+
151
+ return None
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>5.0
2
+ pydantic>=2.8
3
+ pydantic-settings>=2.5
4
+ numpy>=1.26
5
+ soundfile>=0.12
6
+ webrtcvad>=2.0.10
7
+ faster-whisper>=1.0.0
8
+ llama-cpp-python>=0.2.90
9
+ pyttsx3>=2.90
10
+ openai>=1.44.0
runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e96d0bc6697344d111427e3900cb71d28c54c5ff4fcc52b45819fa49da0b2f6c
3
+ size 370708
runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3f8ea05d78887dd73e67e846efa7ff7f3afb9ba15a9c61dac69ed62f075025
3
+ size 216064
utils/__init__.py ADDED
File without changes
utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (185 Bytes). View file
 
utils/__pycache__/config.cpython-312.pyc ADDED
Binary file (1.97 kB). View file
 
utils/__pycache__/phone.cpython-312.pyc ADDED
Binary file (1.12 kB). View file
 
utils/audio.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile as sf
2
+ import numpy as np
3
+
4
+ def load_audio_mono_16k(path: str):
5
+ wav, sr = sf.read(path, dtype="float32", always_2d=False)
6
+ if wav.ndim == 2:
7
+ wav = wav.mean(axis=1)
8
+ if sr != 16000:
9
+ # lightweight resample
10
+ import numpy as np
11
+ import math
12
+ ratio = 16000 / sr
13
+ n = int(math.floor(len(wav) * ratio))
14
+ x_old = np.linspace(0, 1, len(wav), endpoint=False)
15
+ x_new = np.linspace(0, 1, n, endpoint=False)
16
+ wav = np.interp(x_new, x_old, wav).astype("float32")
17
+ sr = 16000
18
+ return wav, sr
utils/config.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import os
3
+ from pydantic_settings import BaseSettings
4
+ from pydantic import Field
5
+
6
+ class Settings(BaseSettings):
7
+ BACKEND_LLM: str = Field(default="llamacpp") # 'llamacpp' | 'openai' | 'groq'
8
+ LLAMACPP_MODEL_PATH: str = Field(default="models/qwen2.5-1.5b-instruct-q4_k_m.gguf")
9
+
10
+ N_CTX: int = 4096
11
+ N_THREADS: int = 4
12
+ N_GPU_LAYERS: int = 0
13
+
14
+ ASR_DEVICE: str = "mps" # 'mps' or 'cpu'
15
+ TTS_ENGINE: str = "pyttsx3" # 'pyttsx3' | 'say' | 'piper' (later)
16
+
17
+ OPENAI_API_KEY: str | None = None
18
+ GROQ_API_KEY: str | None = None
19
+
20
+ IS_HF_SPACE: bool = False
21
+ DEBUG: bool = True
22
+
23
+ class Config:
24
+ env_file = ".env"
25
+ extra = "ignore"
26
+
27
+ def pretty(self) -> dict:
28
+ d = self.model_dump()
29
+ if d.get("OPENAI_API_KEY"):
30
+ d["OPENAI_API_KEY"] = True
31
+ if d.get("GROQ_API_KEY"):
32
+ d["GROQ_API_KEY"] = True
33
+ return d
34
+
35
+ _settings: Settings | None = None
36
+
37
+ def get_settings() -> Settings:
38
+ global _settings
39
+ if _settings is None:
40
+ _settings = Settings()
41
+ return _settings
utils/phone.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def extract_phone(text: str) -> str | None:
4
+ if not text:
5
+ return None
6
+ m = re.search(r"(\+?\d[\d\-\s]{8,}\d)", text)
7
+ return m.group(1).replace(" ", "") if m else None
8
+
9
+ def looks_valid(phone: str | None) -> bool:
10
+ if not phone: return False
11
+ digits = "".join(ch for ch in phone if ch.isdigit())
12
+ return len(digits) >= 10