Spaces:

42Cummer
/

UofTearsBotAPI

Paused

App Files Files Community

42Cummer commited on Aug 24

Commit

b48d8e8

verified ·

1 Parent(s): e556488

Streaming Replies

Browse files

Files changed (3) hide show

AdviceGenerator.py +13 -14
UofTearsBot.py +15 -17
app.py +17 -10

AdviceGenerator.py CHANGED Viewed

@@ -34,7 +34,7 @@ class AdviceGenerator(object):
         max_tokens: int = 600,        # give enough headroom
         temperature: float = 0.6,
         top_p: float = 0.9,
-    ) -> Dict[str, str]:
         msgs = [self.role]
@@ -53,16 +53,15 @@ class AdviceGenerator(object):
                 "Follow the system instructions strictly. Do NOT ask vague questions first."
             ),
         })
-        try:
-            resp = self.llm.create_chat_completion(
-                messages=msgs,
-                temperature=temperature,
-                top_p=top_p,
-                max_tokens=max_tokens,
-                stream=False,
-            )
-            text = resp["choices"][0]["message"]["content"].strip()
-            return {"text": text}
-        except Exception as e:
-            return {"text": f"I'm here to listen. Could you tell me more about how \"{user_text}\" is affecting you?"}

         max_tokens: int = 600,        # give enough headroom
         temperature: float = 0.6,
         top_p: float = 0.9,
+    ):
         msgs = [self.role]
                 "Follow the system instructions strictly. Do NOT ask vague questions first."
             ),
         })
+        stream = self.llm.create_chat_completion(
+            messages=msgs,
+            temperature=temperature,
+            top_p=top_p,
+            max_tokens=max_tokens,
+            stream=True,
+        )
+        for chunk in stream:
+            if "choices" in chunk:
+                delta = chunk["choices"][0]["delta"].get("content", "")
+                if delta:
+                    yield delta

UofTearsBot.py CHANGED Viewed

@@ -4,10 +4,10 @@ from IllnessClassifier import IllnessClassifier
 from typing import List, Dict
 class UofTearsBot(object):
-    def __init__(self, llm, threshold: float = 0.86, max_history_msgs: int = 50):
         self.suicidality_detector = SIDetector()
         self.illness_classifier = IllnessClassifier()
-        self.chatbot = AdviceGenerator(llm)
         self.history: List[Dict[str, str]] = []
         self.FLAG = False   # suicidal crisis flag
         self.threshold = threshold
@@ -37,24 +37,22 @@ class UofTearsBot(object):
     def converse(self, user_text: str) -> str:
         disorder = self.safety_check(user_text)
-        # store user text into history
         self.history.append({"role": "user", "content": user_text})
         if self.FLAG:
-            # crisis flow: respond with fixed crisis message only
             crisis_msg = self.userCrisis()
             self.history.append({"role": "assistant", "content": crisis_msg})
-            return crisis_msg
-        # normal advice generation
-        pruned_history = self._prune_history()
-        advice = self.chatbot.generate_advice(
             disorder=disorder,
             user_text=user_text,
-            history=pruned_history,
-        )['text']
-        # add bot response to history
-        self.history.append({"role": "assistant", "content": advice})
-        return advice

 from typing import List, Dict
 class UofTearsBot(object):
+    def __init__(self, threshold: float = 0.86, max_history_msgs: int = 50):
         self.suicidality_detector = SIDetector()
         self.illness_classifier = IllnessClassifier()
+        self.chatbot = AdviceGenerator()
         self.history: List[Dict[str, str]] = []
         self.FLAG = False   # suicidal crisis flag
         self.threshold = threshold
     def converse(self, user_text: str) -> str:
         disorder = self.safety_check(user_text)
+        # store user input
         self.history.append({"role": "user", "content": user_text})
+        # crisis branch
         if self.FLAG:
             crisis_msg = self.userCrisis()
             self.history.append({"role": "assistant", "content": crisis_msg})
+            yield crisis_msg
+            return
+        # normal branch: stream advice tokens
+        reply_so_far = ""
+        for delta in self.chatbot.generate_advice(
             disorder=disorder,
             user_text=user_text,
+            history=self._prune_history(),
+        ):
+            reply_so_far += delta
+            yield delta   # stream to FastAPI as soon as a token arrives
+        # once stream is done, save full reply
+        self.history.append({"role": "assistant", "content": reply_so_far})

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import dotenv
 import torch
 from fastapi import FastAPI, HTTPException, Request
-from fastapi.responses import JSONResponse, HTMLResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download, login
@@ -21,9 +21,9 @@ from transformers import (
 from UofTearsBot import UofTearsBot
-MODEL_REPO="bartowski/Mistral-7B-Instruct-v0.3-GGUF"
-MODEL_FILE="Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
-CHAT_FORMAT="mistral-instruct"
 dotenv.load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
@@ -51,18 +51,25 @@ class ChatRequest(BaseModel):
     user_id: str
     user_text: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
     try:
         if request.user_id not in chatbots:
             chatbots[request.user_id] = UofTearsBot(llm)
         current_bot = chatbots[request.user_id]
-        print("[INFO] Model is generating response...", flush=True)
-        response = current_bot.converse(request.user_text)
-        return JSONResponse(content={"response": response, "history": current_bot.history})
     except Exception as e:
         import traceback
-        traceback.print_exc()  # logs full stack trace to HF Logs
         return JSONResponse(
             status_code=500,
             content={"error": str(e)}
@@ -72,7 +79,7 @@ async def chat(request: ChatRequest):
 @app.get("/", response_class=HTMLResponse)
 async def home():
     return "<h1>App is running 🚀</h1>"
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860) # huggingface port

 import torch
 from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import JSONResponse, HTMLResponse, StreamingResponse
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download, login
 from UofTearsBot import UofTearsBot
+MODEL_REPO = "bartowski/Mistral-7B-Instruct-v0.3-GGUF"
+MODEL_FILE = "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
+CHAT_FORMAT = "mistral-instruct"
 dotenv.load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
     user_id: str
     user_text: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
     try:
         if request.user_id not in chatbots:
             chatbots[request.user_id] = UofTearsBot(llm)
         current_bot = chatbots[request.user_id]
+        def token_generator():
+            print("[INFO] Model is streaming response...", flush=True)
+            for token in current_bot.converse(request.user_text):
+                yield token
+            print("[INFO] Model finished streaming ✅", flush=True)
+        return StreamingResponse(token_generator(), media_type="text/plain")
     except Exception as e:
         import traceback
+        traceback.print_exc()  # logs to HF logs
         return JSONResponse(
             status_code=500,
             content={"error": str(e)}
 @app.get("/", response_class=HTMLResponse)
 async def home():
     return "<h1>App is running 🚀</h1>"
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)  # huggingface port