Spaces:

kcrobot25
/

kcrobot

Runtime error

App Files Files Community

kcrobot25 commited on Oct 8

Commit

e05484b

verified ·

1 Parent(s): 43ae242

inial commit

Browse files

Files changed (1) hide show

app.py +282 -0

app.py ADDED Viewed

	@@ -0,0 +1,282 @@

+# app.py — KC Robot AI v4.2 — Cloud Brain (Gradio + REST API)
+# Features:
+# - Gradio UI (chat, record, TTS)
+# - HF Inference API for text generation & STT (requires HF_API_TOKEN in Secrets to use)
+# - gTTS TTS (fallback)
+# - Telegram notify (optional via TELEGRAM_TOKEN & TELEGRAM_CHATID)
+# - Endpoints for ESP32: /api/ask, /api/tts, /api/stt, /api/presence, /api/display, /api/config
+# Notes: Add HF_API_TOKEN (and optional TELEGRAM_TOKEN/TELEGRAM_CHATID) in Space Secrets.
+import os, io, time, threading, logging
+from typing import Any, List, Tuple, Optional
+import requests, gradio as gr
+from gtts import gTTS
+from fastapi import Request, UploadFile, File
+from starlette.responses import JSONResponse, Response
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("kcrobot.v4.2.cloud")
+HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
+HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large").strip()
+HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
+TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
+TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
+HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
+CONVERSATION: List[Tuple[str, str]] = []
+DISPLAY_BUFFER: List[str] = []
+DISPLAY_LIMIT = 16
+def push_display(line: str):
+    DISPLAY_BUFFER.append(line)
+    if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
+        DISPLAY_BUFFER.pop(0)
+def detect_vi_or_en(text: str) -> str:
+    if not text: return "en"
+    vi_chars = "ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụýỳỷỹỵ"
+    for ch in text.lower():
+        if ch in vi_chars:
+            return "vi"
+    return "en"
+def _parse_hf_text_response(data: Any) -> str:
+    try:
+        if isinstance(data, list) and data and isinstance(data[0], dict):
+            return data[0].get("generated_text", "") or str(data[0])
+        if isinstance(data, dict) and "generated_text" in data:
+            return data.get("generated_text", "")
+        if isinstance(data, dict) and "text" in data:
+            return data.get("text", "")
+        if isinstance(data, dict) and "choices" in data:
+            c0 = data["choices"][0]
+            return c0.get("text") or c0.get("message", {}).get("content", "") or str(c0)
+        return str(data)
+    except Exception:
+        return str(data)
+def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
+    if not HF_API_TOKEN:
+        return "[ERROR] HF_API_TOKEN not configured in Space Secrets."
+    model = model or HF_MODEL
+    url = f"https://api-inference.huggingface.co/models/{model}"
+    payload = {"inputs": prompt, "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)}, "options": {"wait_for_model": True}}
+    try:
+        r = requests.post(url, headers=HF_HEADERS, json=payload, timeout=120)
+        if r.status_code != 200:
+            logger.error("HF text gen failed %s: %s", r.status_code, r.text[:400])
+            return f"[ERROR] HF text gen {r.status_code}: {r.text[:300]}"
+        return _parse_hf_text_response(r.json())
+    except Exception as e:
+        logger.exception("HF text exception")
+        return f"[ERROR] HF text exception: {e}"
+def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
+    if not HF_API_TOKEN:
+        return "[ERROR] HF_API_TOKEN not configured."
+    model = model or HF_STT_MODEL
+    url = f"https://api-inference.huggingface.co/models/{model}"
+    headers = dict(HF_HEADERS); headers["Content-Type"] = "application/octet-stream"
+    try:
+        r = requests.post(url, headers=headers, data=audio_bytes, timeout=180)
+        if r.status_code != 200:
+            logger.error("HF STT failed %s: %s", r.status_code, r.text[:400])
+            return f"[ERROR] HF STT {r.status_code}: {r.text[:300]}"
+        out = r.json()
+        if isinstance(out, dict) and "text" in out:
+            return out["text"]
+        return _parse_hf_text_response(out)
+    except Exception as e:
+        logger.exception("HF STT exception")
+        return f"[ERROR] HF STT exception: {e}"
+def tts_gtts_bytes(text: str) -> bytes:
+    if not text: return b""
+    lang = detect_vi_or_en(text)
+    try:
+        tts = gTTS(text=text, lang="vi" if lang == "vi" else "en")
+        bio = io.BytesIO(); tts.write_to_fp(bio); bio.seek(0)
+        return bio.read()
+    except Exception as e:
+        logger.exception("gTTS error")
+        return b""
+def send_telegram_message(text: str):
+    if not TELEGRAM_TOKEN or not TELEGRAM_CHATID:
+        logger.debug("Telegram not configured")
+        return
+    try:
+        url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
+        requests.post(url, json={"chat_id": TELEGRAM_CHATID, "text": text}, timeout=10)
+    except Exception:
+        logger.exception("send_telegram_message failed")
+def _start_telegram_poller():
+    if not TELEGRAM_TOKEN:
+        logger.info("Telegram poll disabled"); return
+    base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"; offset = None
+    logger.info("Telegram poller started")
+    while True:
+        try:
+            params = {"timeout":30}
+            if offset: params["offset"] = offset
+            r = requests.get(base + "/getUpdates", params=params, timeout=35)
+            if r.status_code != 200:
+                time.sleep(2); continue
+            data = r.json()
+            for upd in data.get("result", []):
+                offset = upd.get("update_id", 0) + 1
+                msg = upd.get("message") or {}
+                chat = msg.get("chat", {}); chat_id = chat.get("id"); text = (msg.get("text") or "").strip()
+                if not text: continue
+                logger.info("TG msg: %s", text)
+                if text.lower().startswith("/ask "):
+                    q = text[5:].strip(); ans = hf_text_generate(q)
+                    requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
+                elif text.lower().startswith("/say "):
+                    phrase = text[5:].strip()
+                    audio = tts_gtts_bytes(phrase)
+                    if audio:
+                        files = {"audio": ("reply.mp3", audio, "audio/mpeg")}
+                        requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
+                    else:
+                        requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "[TTS failed]"}, timeout=10)
+                elif text.lower().startswith("/status"):
+                    requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running"}, timeout=10)
+                else:
+                    requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10)
+        except Exception:
+            logger.exception("Telegram poller exception")
+            time.sleep(3)
+if TELEGRAM_TOKEN:
+    t = threading.Thread(target=_start_telegram_poller, daemon=True); t.start()
+# Gradio UI
+with gr.Blocks(title="KC Robot AI v4.2 — Cloud Brain") as demo:
+    gr.Markdown("## 🤖 KC Robot AI v4.2 — Cloud Brain\n(Requires HF_API_TOKEN in Secrets for full AI/STT)")
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=440, type="messages", elem_id="chatbot")
+            text_in = gr.Textbox(lines=2, placeholder="Nhập câu (VN/EN)...", label="Text input")
+            mic = gr.Audio(source="microphone", type="filepath", label="Record voice (browser mic)")
+            send = gr.Button("Send")
+            with gr.Row():
+                temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
+                tokens = gr.Slider(32, 1024, value=256, step=16, label="Max tokens")
+            model_override = gr.Textbox(label="HF model override (optional)")
+        with gr.Column(scale=1):
+            gr.Markdown("### TTS / STT")
+            tts_box = gr.Textbox(lines=2, label="Text → TTS")
+            tts_btn = gr.Button("Create TTS")
+            tts_audio = gr.Audio(label="TTS audio", interactive=False)
+            gr.Markdown("Upload audio for STT")
+            up = gr.Audio(source="upload", type="filepath", label="Upload audio")
+            stt_btn = gr.Button("Transcribe")
+            stt_out = gr.Textbox(label="Transcription")
+    def chat_fn(audio_file, typed_text, temperature, max_tokens, model_override_val, history):
+        user_text = (typed_text or "").strip()
+        if audio_file:
+            try:
+                with open(audio_file, "rb") as f: b = f.read()
+                stt = hf_stt_from_bytes(b)
+                if stt and not stt.startswith("[ERROR]"): user_text = stt
+            except Exception:
+                logger.exception("STT from audio failed")
+        if not user_text: return history or [], ""
+        prompt = f"You are KC Robot AI, bilingual assistant. Answer in the same language as the user.\n\nUser: {user_text}\nAssistant:"
+        model = model_override_val.strip() if model_override_val else HF_MODEL
+        ans = hf_text_generate(prompt, model=model, max_new_tokens=int(max_tokens), temperature=float(temperature))
+        CONVERSATION.append((user_text, ans)); push_display("YOU: "+user_text[:80]); push_display("BOT: "+ans[:80])
+        if TELEGRAM_TOKEN and TELEGRAM_CHATID:
+            try: send_telegram_message(f"You: {user_text}\nBot: {ans}")
+            except: logger.exception("telegram notify failed")
+        history = history or []; history.append(("You", user_text)); history.append(("Bot", ans))
+        return history, ""
+    def tts_fn(text_in, model_override_val):
+        if not text_in or not text_in.strip(): return None
+        audio = tts_gtts_bytes(text_in)
+        if audio == b"": raise gr.Error("TTS generation failed (gTTS).")
+        return (audio, "audio/mpeg")
+    def stt_fn(local_path, model_override_val):
+        if not local_path: return ""
+        with open(local_path, "rb") as f: b = f.read()
+        txt = hf_stt_from_bytes(b); push_display("Voice: "+(txt[:80] if isinstance(txt,str) else str(txt)))
+        return txt
+    send.click(chat_fn, inputs=[mic, text_in, temp, tokens, model_override], outputs=[chatbot, text_in])
+    tts_btn.click(tts_fn, inputs=[tts_box, model_override], outputs=[tts_audio])
+    stt_btn.click(stt_fn, inputs=[up, model_override], outputs=[stt_out])
+# FastAPI endpoints for ESP32
+app = demo.app
+@app.post("/api/ask")
+async def api_ask(req: Request):
+    try: j = await req.json()
+    except: return JSONResponse({"error":"invalid json"}, status_code=400)
+    text = (j.get("text","") or "").strip(); lang = (j.get("lang","auto") or "auto").strip().lower()
+    if not text: return JSONResponse({"error":"no text"}, status_code=400)
+    if not HF_API_TOKEN: return JSONResponse({"error":"HF_API_TOKEN not configured in Space Secrets."}, status_code=500)
+    if lang == "vi": prompt = "Bạn là trợ lý thông minh. Trả lời bằng tiếng Việt, rõ ràng:\n\n"+text
+    elif lang == "en": prompt = "You are a helpful assistant. Answer in English:\n\n"+text
+    else: prompt = "You are bilingual. Answer in the language of the question.\n\n"+text
+    ans = hf_text_generate(prompt); CONVERSATION.append((text, ans)); push_display("YOU: "+text[:80]); push_display("BOT: "+ans[:80])
+    return {"answer": ans}
+@app.post("/api/tts")
+async def api_tts(req: Request):
+    try: j = await req.json()
+    except: return JSONResponse({"error":"invalid json"}, status_code=400)
+    text = (j.get("text","") or "").strip()
+    if not text: return JSONResponse({"error":"no text"}, status_code=400)
+    audio = tts_gtts_bytes(text)
+    if audio == b"": return JSONResponse({"error":"TTS generation failed (gTTS)."}, status_code=500)
+    return Response(content=audio, media_type="audio/mpeg")
+@app.post("/api/stt")
+async def api_stt(file: UploadFile = File(...)):
+    try: content = await file.read()
+    except: return JSONResponse({"error":"file read error"}, status_code=400)
+    if not content: return JSONResponse({"error":"no audio content"}, status_code=400)
+    if not HF_API_TOKEN: return JSONResponse({"error":"HF_API_TOKEN not configured in Space Secrets."}, status_code=500)
+    txt = hf_stt_from_bytes(content)
+    CONVERSATION.append((f"[voice] {txt}", "")); push_display("Voice: "+(txt[:80] if isinstance(txt,str) else str(txt)))
+    return {"text": txt}
+@app.post("/api/presence")
+async def api_presence(req: Request):
+    try: j = await req.json()
+    except: return JSONResponse({"error":"invalid json"}, status_code=400)
+    note = (j.get("note","Có người phía trước") or "").strip()
+    greeting = f"Xin chào! {note}"
+    push_display("RADAR: "+note[:80]); CONVERSATION.append(("__presence__", greeting))
+    if TELEGRAM_TOKEN and TELEGRAM_CHATID:
+        try: send_telegram_message(f"⚠️ Robot: Phát hiện người - {note}")
+        except: logger.exception("telegram notify failed")
+    # Also produce a friendly greeting for the robot to play
+    # Return the greeting so ESP32 can fetch via /api/tts if desired
+    return {"greeting": greeting}
+@app.get("/api/display")
+async def api_display():
+    return {"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONVERSATION)}
+@app.post("/api/config")
+async def api_config(req: Request):
+    try: j = await req.json()
+    except: return JSONResponse({"error":"invalid json"}, status_code=400)
+    changed = {}; global HF_MODEL, HF_STT_MODEL
+    if "hf_model" in j: HF_MODEL = j["hf_model"]; changed["hf_model"]=HF_MODEL
+    if "hf_stt_model" in j: HF_STT_MODEL = j["hf_stt_model"]; changed["hf_stt_model"]=HF_STT_MODEL
+    return {"changed": changed}
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))