kcrobot25 commited on
Commit
e05484b
·
verified ·
1 Parent(s): 43ae242

inial commit

Browse files
Files changed (1) hide show
  1. app.py +282 -0
app.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # app.py — KC Robot AI v4.2 — Cloud Brain (Gradio + REST API)
3
+ # Features:
4
+ # - Gradio UI (chat, record, TTS)
5
+ # - HF Inference API for text generation & STT (requires HF_API_TOKEN in Secrets to use)
6
+ # - gTTS TTS (fallback)
7
+ # - Telegram notify (optional via TELEGRAM_TOKEN & TELEGRAM_CHATID)
8
+ # - Endpoints for ESP32: /api/ask, /api/tts, /api/stt, /api/presence, /api/display, /api/config
9
+ # Notes: Add HF_API_TOKEN (and optional TELEGRAM_TOKEN/TELEGRAM_CHATID) in Space Secrets.
10
+
11
+ import os, io, time, threading, logging
12
+ from typing import Any, List, Tuple, Optional
13
+ import requests, gradio as gr
14
+ from gtts import gTTS
15
+ from fastapi import Request, UploadFile, File
16
+ from starlette.responses import JSONResponse, Response
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger("kcrobot.v4.2.cloud")
20
+
21
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
22
+ HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large").strip()
23
+ HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
24
+
25
+ TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
26
+ TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
27
+
28
+ HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
29
+
30
+ CONVERSATION: List[Tuple[str, str]] = []
31
+ DISPLAY_BUFFER: List[str] = []
32
+ DISPLAY_LIMIT = 16
33
+
34
+ def push_display(line: str):
35
+ DISPLAY_BUFFER.append(line)
36
+ if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
37
+ DISPLAY_BUFFER.pop(0)
38
+
39
+ def detect_vi_or_en(text: str) -> str:
40
+ if not text: return "en"
41
+ vi_chars = "ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụýỳỷỹỵ"
42
+ for ch in text.lower():
43
+ if ch in vi_chars:
44
+ return "vi"
45
+ return "en"
46
+
47
+ def _parse_hf_text_response(data: Any) -> str:
48
+ try:
49
+ if isinstance(data, list) and data and isinstance(data[0], dict):
50
+ return data[0].get("generated_text", "") or str(data[0])
51
+ if isinstance(data, dict) and "generated_text" in data:
52
+ return data.get("generated_text", "")
53
+ if isinstance(data, dict) and "text" in data:
54
+ return data.get("text", "")
55
+ if isinstance(data, dict) and "choices" in data:
56
+ c0 = data["choices"][0]
57
+ return c0.get("text") or c0.get("message", {}).get("content", "") or str(c0)
58
+ return str(data)
59
+ except Exception:
60
+ return str(data)
61
+
62
+ def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
63
+ if not HF_API_TOKEN:
64
+ return "[ERROR] HF_API_TOKEN not configured in Space Secrets."
65
+ model = model or HF_MODEL
66
+ url = f"https://api-inference.huggingface.co/models/{model}"
67
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)}, "options": {"wait_for_model": True}}
68
+ try:
69
+ r = requests.post(url, headers=HF_HEADERS, json=payload, timeout=120)
70
+ if r.status_code != 200:
71
+ logger.error("HF text gen failed %s: %s", r.status_code, r.text[:400])
72
+ return f"[ERROR] HF text gen {r.status_code}: {r.text[:300]}"
73
+ return _parse_hf_text_response(r.json())
74
+ except Exception as e:
75
+ logger.exception("HF text exception")
76
+ return f"[ERROR] HF text exception: {e}"
77
+
78
+ def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
79
+ if not HF_API_TOKEN:
80
+ return "[ERROR] HF_API_TOKEN not configured."
81
+ model = model or HF_STT_MODEL
82
+ url = f"https://api-inference.huggingface.co/models/{model}"
83
+ headers = dict(HF_HEADERS); headers["Content-Type"] = "application/octet-stream"
84
+ try:
85
+ r = requests.post(url, headers=headers, data=audio_bytes, timeout=180)
86
+ if r.status_code != 200:
87
+ logger.error("HF STT failed %s: %s", r.status_code, r.text[:400])
88
+ return f"[ERROR] HF STT {r.status_code}: {r.text[:300]}"
89
+ out = r.json()
90
+ if isinstance(out, dict) and "text" in out:
91
+ return out["text"]
92
+ return _parse_hf_text_response(out)
93
+ except Exception as e:
94
+ logger.exception("HF STT exception")
95
+ return f"[ERROR] HF STT exception: {e}"
96
+
97
+ def tts_gtts_bytes(text: str) -> bytes:
98
+ if not text: return b""
99
+ lang = detect_vi_or_en(text)
100
+ try:
101
+ tts = gTTS(text=text, lang="vi" if lang == "vi" else "en")
102
+ bio = io.BytesIO(); tts.write_to_fp(bio); bio.seek(0)
103
+ return bio.read()
104
+ except Exception as e:
105
+ logger.exception("gTTS error")
106
+ return b""
107
+
108
+ def send_telegram_message(text: str):
109
+ if not TELEGRAM_TOKEN or not TELEGRAM_CHATID:
110
+ logger.debug("Telegram not configured")
111
+ return
112
+ try:
113
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
114
+ requests.post(url, json={"chat_id": TELEGRAM_CHATID, "text": text}, timeout=10)
115
+ except Exception:
116
+ logger.exception("send_telegram_message failed")
117
+
118
+ def _start_telegram_poller():
119
+ if not TELEGRAM_TOKEN:
120
+ logger.info("Telegram poll disabled"); return
121
+ base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"; offset = None
122
+ logger.info("Telegram poller started")
123
+ while True:
124
+ try:
125
+ params = {"timeout":30}
126
+ if offset: params["offset"] = offset
127
+ r = requests.get(base + "/getUpdates", params=params, timeout=35)
128
+ if r.status_code != 200:
129
+ time.sleep(2); continue
130
+ data = r.json()
131
+ for upd in data.get("result", []):
132
+ offset = upd.get("update_id", 0) + 1
133
+ msg = upd.get("message") or {}
134
+ chat = msg.get("chat", {}); chat_id = chat.get("id"); text = (msg.get("text") or "").strip()
135
+ if not text: continue
136
+ logger.info("TG msg: %s", text)
137
+ if text.lower().startswith("/ask "):
138
+ q = text[5:].strip(); ans = hf_text_generate(q)
139
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
140
+ elif text.lower().startswith("/say "):
141
+ phrase = text[5:].strip()
142
+ audio = tts_gtts_bytes(phrase)
143
+ if audio:
144
+ files = {"audio": ("reply.mp3", audio, "audio/mpeg")}
145
+ requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
146
+ else:
147
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "[TTS failed]"}, timeout=10)
148
+ elif text.lower().startswith("/status"):
149
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running"}, timeout=10)
150
+ else:
151
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10)
152
+ except Exception:
153
+ logger.exception("Telegram poller exception")
154
+ time.sleep(3)
155
+
156
+ if TELEGRAM_TOKEN:
157
+ t = threading.Thread(target=_start_telegram_poller, daemon=True); t.start()
158
+
159
+ # Gradio UI
160
+ with gr.Blocks(title="KC Robot AI v4.2 — Cloud Brain") as demo:
161
+ gr.Markdown("## 🤖 KC Robot AI v4.2 — Cloud Brain\n(Requires HF_API_TOKEN in Secrets for full AI/STT)")
162
+ with gr.Row():
163
+ with gr.Column(scale=2):
164
+ chatbot = gr.Chatbot(height=440, type="messages", elem_id="chatbot")
165
+ text_in = gr.Textbox(lines=2, placeholder="Nhập câu (VN/EN)...", label="Text input")
166
+ mic = gr.Audio(source="microphone", type="filepath", label="Record voice (browser mic)")
167
+ send = gr.Button("Send")
168
+ with gr.Row():
169
+ temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
170
+ tokens = gr.Slider(32, 1024, value=256, step=16, label="Max tokens")
171
+ model_override = gr.Textbox(label="HF model override (optional)")
172
+ with gr.Column(scale=1):
173
+ gr.Markdown("### TTS / STT")
174
+ tts_box = gr.Textbox(lines=2, label="Text → TTS")
175
+ tts_btn = gr.Button("Create TTS")
176
+ tts_audio = gr.Audio(label="TTS audio", interactive=False)
177
+ gr.Markdown("Upload audio for STT")
178
+ up = gr.Audio(source="upload", type="filepath", label="Upload audio")
179
+ stt_btn = gr.Button("Transcribe")
180
+ stt_out = gr.Textbox(label="Transcription")
181
+
182
+ def chat_fn(audio_file, typed_text, temperature, max_tokens, model_override_val, history):
183
+ user_text = (typed_text or "").strip()
184
+ if audio_file:
185
+ try:
186
+ with open(audio_file, "rb") as f: b = f.read()
187
+ stt = hf_stt_from_bytes(b)
188
+ if stt and not stt.startswith("[ERROR]"): user_text = stt
189
+ except Exception:
190
+ logger.exception("STT from audio failed")
191
+ if not user_text: return history or [], ""
192
+ prompt = f"You are KC Robot AI, bilingual assistant. Answer in the same language as the user.\n\nUser: {user_text}\nAssistant:"
193
+ model = model_override_val.strip() if model_override_val else HF_MODEL
194
+ ans = hf_text_generate(prompt, model=model, max_new_tokens=int(max_tokens), temperature=float(temperature))
195
+ CONVERSATION.append((user_text, ans)); push_display("YOU: "+user_text[:80]); push_display("BOT: "+ans[:80])
196
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
197
+ try: send_telegram_message(f"You: {user_text}\nBot: {ans}")
198
+ except: logger.exception("telegram notify failed")
199
+ history = history or []; history.append(("You", user_text)); history.append(("Bot", ans))
200
+ return history, ""
201
+
202
+ def tts_fn(text_in, model_override_val):
203
+ if not text_in or not text_in.strip(): return None
204
+ audio = tts_gtts_bytes(text_in)
205
+ if audio == b"": raise gr.Error("TTS generation failed (gTTS).")
206
+ return (audio, "audio/mpeg")
207
+
208
+ def stt_fn(local_path, model_override_val):
209
+ if not local_path: return ""
210
+ with open(local_path, "rb") as f: b = f.read()
211
+ txt = hf_stt_from_bytes(b); push_display("Voice: "+(txt[:80] if isinstance(txt,str) else str(txt)))
212
+ return txt
213
+
214
+ send.click(chat_fn, inputs=[mic, text_in, temp, tokens, model_override], outputs=[chatbot, text_in])
215
+ tts_btn.click(tts_fn, inputs=[tts_box, model_override], outputs=[tts_audio])
216
+ stt_btn.click(stt_fn, inputs=[up, model_override], outputs=[stt_out])
217
+
218
+ # FastAPI endpoints for ESP32
219
+ app = demo.app
220
+
221
+ @app.post("/api/ask")
222
+ async def api_ask(req: Request):
223
+ try: j = await req.json()
224
+ except: return JSONResponse({"error":"invalid json"}, status_code=400)
225
+ text = (j.get("text","") or "").strip(); lang = (j.get("lang","auto") or "auto").strip().lower()
226
+ if not text: return JSONResponse({"error":"no text"}, status_code=400)
227
+ if not HF_API_TOKEN: return JSONResponse({"error":"HF_API_TOKEN not configured in Space Secrets."}, status_code=500)
228
+ if lang == "vi": prompt = "Bạn là trợ lý thông minh. Trả lời bằng tiếng Việt, rõ ràng:\n\n"+text
229
+ elif lang == "en": prompt = "You are a helpful assistant. Answer in English:\n\n"+text
230
+ else: prompt = "You are bilingual. Answer in the language of the question.\n\n"+text
231
+ ans = hf_text_generate(prompt); CONVERSATION.append((text, ans)); push_display("YOU: "+text[:80]); push_display("BOT: "+ans[:80])
232
+ return {"answer": ans}
233
+
234
+ @app.post("/api/tts")
235
+ async def api_tts(req: Request):
236
+ try: j = await req.json()
237
+ except: return JSONResponse({"error":"invalid json"}, status_code=400)
238
+ text = (j.get("text","") or "").strip()
239
+ if not text: return JSONResponse({"error":"no text"}, status_code=400)
240
+ audio = tts_gtts_bytes(text)
241
+ if audio == b"": return JSONResponse({"error":"TTS generation failed (gTTS)."}, status_code=500)
242
+ return Response(content=audio, media_type="audio/mpeg")
243
+
244
+ @app.post("/api/stt")
245
+ async def api_stt(file: UploadFile = File(...)):
246
+ try: content = await file.read()
247
+ except: return JSONResponse({"error":"file read error"}, status_code=400)
248
+ if not content: return JSONResponse({"error":"no audio content"}, status_code=400)
249
+ if not HF_API_TOKEN: return JSONResponse({"error":"HF_API_TOKEN not configured in Space Secrets."}, status_code=500)
250
+ txt = hf_stt_from_bytes(content)
251
+ CONVERSATION.append((f"[voice] {txt}", "")); push_display("Voice: "+(txt[:80] if isinstance(txt,str) else str(txt)))
252
+ return {"text": txt}
253
+
254
+ @app.post("/api/presence")
255
+ async def api_presence(req: Request):
256
+ try: j = await req.json()
257
+ except: return JSONResponse({"error":"invalid json"}, status_code=400)
258
+ note = (j.get("note","Có người phía trước") or "").strip()
259
+ greeting = f"Xin chào! {note}"
260
+ push_display("RADAR: "+note[:80]); CONVERSATION.append(("__presence__", greeting))
261
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
262
+ try: send_telegram_message(f"⚠️ Robot: Phát hiện người - {note}")
263
+ except: logger.exception("telegram notify failed")
264
+ # Also produce a friendly greeting for the robot to play
265
+ # Return the greeting so ESP32 can fetch via /api/tts if desired
266
+ return {"greeting": greeting}
267
+
268
+ @app.get("/api/display")
269
+ async def api_display():
270
+ return {"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONVERSATION)}
271
+
272
+ @app.post("/api/config")
273
+ async def api_config(req: Request):
274
+ try: j = await req.json()
275
+ except: return JSONResponse({"error":"invalid json"}, status_code=400)
276
+ changed = {}; global HF_MODEL, HF_STT_MODEL
277
+ if "hf_model" in j: HF_MODEL = j["hf_model"]; changed["hf_model"]=HF_MODEL
278
+ if "hf_stt_model" in j: HF_STT_MODEL = j["hf_stt_model"]; changed["hf_stt_model"]=HF_STT_MODEL
279
+ return {"changed": changed}
280
+
281
+ if __name__ == "__main__":
282
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))