chats

Sleeping

App Files Files Community

abdullahalioo commited on May 4

Commit

984a117

verified ·

1 Parent(s): 256ed7f

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -37

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.responses import StreamingResponse
-from openai import AsyncOpenAI
 from collections import defaultdict
 app = FastAPI()
-# Define available models
 AVAILABLE_MODELS = {
     "openai/gpt-4.1": "OpenAI GPT-4.1",
     "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
@@ -55,53 +56,56 @@ AVAILABLE_MODELS = {
     "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
 }
-# In-memory chat history (chat_id: messages[])
 chat_histories = defaultdict(list)
-# Function to generate response using chat history
-async def generate_ai_response(chat_id: str, model: str):
     token = os.getenv("GITHUB_TOKEN")
     if not token:
         raise HTTPException(status_code=500, detail="GitHub token not configured")
-    endpoint = "https://models.github.ai/inference"
     if model not in AVAILABLE_MODELS:
-        raise HTTPException(status_code=400, detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}")
-    client = AsyncOpenAI(base_url=endpoint, api_key=token)
-    try:
-        stream = await client.chat.completions.create(
-            messages=chat_histories[chat_id],  # full chat history
-            model=model,
-            temperature=1.0,
-            top_p=1.0,
-            stream=True
-        )
-        async for chunk in stream:
-            if chunk.choices and chunk.choices[0].delta.content:
-                content = chunk.choices[0].delta.content
-                yield content
-                # Add assistant reply to history
-                chat_histories[chat_id].append({"role": "assistant", "content": content})
-    except Exception as err:
-        yield f"Error: {str(err)}"
-        raise HTTPException(status_code=500, detail="AI generation failed")
-# Endpoint to generate a response with chat memory
 @app.post("/generate")
 async def generate_response(
-    chat_id: str = Query(..., description="Unique ID for the chat session"),
-    prompt: str = Query(..., description="The user message"),
-    model: str = Query("openai/gpt-4.1-mini", description="The model to use for generation")
 ):
     if not prompt:
         raise HTTPException(status_code=400, detail="Prompt cannot be empty")
-    # Add user message to history
     chat_histories[chat_id].append({"role": "user", "content": prompt})
     return StreamingResponse(
@@ -109,14 +113,13 @@ async def generate_response(
         media_type="text/event-stream"
     )
-# Optional: endpoint to reset chat history
 @app.post("/reset")
-async def reset_chat(chat_id: str = Query(..., description="ID of chat to reset")):
     if chat_id in chat_histories:
         chat_histories[chat_id].clear()
         return {"message": f"Chat {chat_id} history reset."}
-    else:
-        raise HTTPException(status_code=404, detail="Chat ID not found")
 def get_app():
     return app

 import os
+import httpx
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from collections import defaultdict
+from typing import AsyncGenerator
 app = FastAPI()
+# Model list (unchanged)
 AVAILABLE_MODELS = {
     "openai/gpt-4.1": "OpenAI GPT-4.1",
     "openai/gpt-4.1-mini": "OpenAI GPT-4.1-mini",
     "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
 }
+# In-memory history
 chat_histories = defaultdict(list)
+# Async generator for AI response
+async def generate_ai_response(chat_id: str, model: str) -> AsyncGenerator[str, None]:
     token = os.getenv("GITHUB_TOKEN")
     if not token:
         raise HTTPException(status_code=500, detail="GitHub token not configured")
     if model not in AVAILABLE_MODELS:
+        raise HTTPException(status_code=400, detail=f"Invalid model. Choose from: {', '.join(AVAILABLE_MODELS.keys())}")
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": model,
+        "messages": chat_histories[chat_id],
+        "stream": True,
+        "temperature": 1.0,
+        "top_p": 1.0
+    }
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        try:
+            async with client.stream("POST", "https://models.github.ai/inference", headers=headers, json=payload) as response:
+                async for line in response.aiter_lines():
+                    if line.startswith("data:"):
+                        data = line[len("data:"):].strip()
+                        if data == "[DONE]":
+                            break
+                        if data:
+                            yield f"{data}\n"
+                            # Optionally: append to chat history
+                            chat_histories[chat_id].append({"role": "assistant", "content": data})
+        except Exception as e:
+            yield f"Error: {str(e)}"
+# Generate response endpoint
 @app.post("/generate")
 async def generate_response(
+    chat_id: str = Query(..., description="Chat session ID"),
+    prompt: str = Query(..., description="User input message"),
+    model: str = Query("openai/gpt-4.1-mini", description="Model to use")
 ):
     if not prompt:
         raise HTTPException(status_code=400, detail="Prompt cannot be empty")
     chat_histories[chat_id].append({"role": "user", "content": prompt})
     return StreamingResponse(
         media_type="text/event-stream"
     )
+# Reset chat history endpoint
 @app.post("/reset")
+async def reset_chat(chat_id: str = Query(...)):
     if chat_id in chat_histories:
         chat_histories[chat_id].clear()
         return {"message": f"Chat {chat_id} history reset."}
+    raise HTTPException(status_code=404, detail="Chat ID not found")
 def get_app():
     return app