chats

Sleeping

App Files Files Community

abdullahalioo commited on May 4

Commit

b685be0

verified ·

1 Parent(s): 2a9b961

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -56,79 +56,81 @@ AVAILABLE_MODELS = {
     "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
 }
-# Chat memory (in-memory)
 chat_histories = defaultdict(list)
-MAX_HISTORY = 100  # limit memory to avoid crashes
-# Generate response stream
 async def generate_ai_response(chat_id: str, model: str):
     token = os.getenv("GITHUB_TOKEN")
     if not token:
         raise HTTPException(status_code=500, detail="GitHub token not configured")
-    endpoint = "https://models.github.ai/inference"
     if model not in AVAILABLE_MODELS:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Model not available. Choose from: {', '.join(AVAILABLE_MODELS.keys())}"
-        )
-    client = AsyncOpenAI(base_url=endpoint, api_key=token)
     try:
-        stream = await asyncio.wait_for(
-            client.chat.completions.create(
-                messages=chat_histories[chat_id],
-                model=model,
-                temperature=1.0,
-                top_p=1.0,
-                stream=True
-            ),
-            timeout=60  # Prevent hangs
-        )
         async for chunk in stream:
             if chunk.choices and chunk.choices[0].delta.content:
                 content = chunk.choices[0].delta.content
                 yield content
-                chat_histories[chat_id].append({"role": "assistant", "content": content})
-                chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
     except asyncio.TimeoutError:
         yield "Error: Response timed out."
-        raise HTTPException(status_code=504, detail="Model timed out.")
-    except Exception as err:
-        yield f"Error: {str(err)}"
         raise HTTPException(status_code=500, detail="AI generation failed")
-# Chat endpoint
 @app.post("/generate")
 async def generate_response(
     chat_id: str = Query(..., description="Unique chat ID"),
-    prompt: str = Query(..., description="User message"),
     model: str = Query("openai/gpt-4.1-mini", description="Model to use")
 ):
-    if not prompt:
-        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
-    chat_histories[chat_id].append({"role": "user", "content": prompt})
-    chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
     return StreamingResponse(
         generate_ai_response(chat_id, model),
         media_type="text/event-stream"
     )
-# Optional: reset chat history
 @app.post("/reset")
-async def reset_chat(chat_id: str = Query(..., description="ID of chat to reset")):
-    if chat_id in chat_histories:
         chat_histories[chat_id].clear()
-        return {"message": f"Chat {chat_id} history reset."}
-    else:
-        raise HTTPException(status_code=404, detail="Chat ID not found")
-# For ASGI servers like Uvicorn
 def get_app():
     return app

     "mistral-ai/mistral-small-2503": "Mistral Small 3.1"
 }
+# In-memory chat history and locks
 chat_histories = defaultdict(list)
+chat_locks = defaultdict(asyncio.Lock)
+MAX_HISTORY = 100
+# Streaming AI generation
 async def generate_ai_response(chat_id: str, model: str):
     token = os.getenv("GITHUB_TOKEN")
     if not token:
+        yield "Error: GitHub token not configured"
         raise HTTPException(status_code=500, detail="GitHub token not configured")
     if model not in AVAILABLE_MODELS:
+        yield f"Error: Invalid model {model}"
+        raise HTTPException(status_code=400, detail="Invalid model")
+    client = AsyncOpenAI(
+        base_url="https://models.github.ai/inference",
+        api_key=token
+    )
     try:
+        async with chat_locks[chat_id]:
+            stream = await asyncio.wait_for(
+                client.chat.completions.create(
+                    messages=chat_histories[chat_id],
+                    model=model,
+                    temperature=1.0,
+                    top_p=1.0,
+                    stream=True
+                ),
+                timeout=60
+            )
         async for chunk in stream:
             if chunk.choices and chunk.choices[0].delta.content:
                 content = chunk.choices[0].delta.content
                 yield content
+                async with chat_locks[chat_id]:
+                    chat_histories[chat_id].append({"role": "assistant", "content": content})
+                    chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
     except asyncio.TimeoutError:
         yield "Error: Response timed out."
+        raise HTTPException(status_code=504, detail="Timeout")
+    except Exception as e:
+        yield f"Error: {str(e)}"
         raise HTTPException(status_code=500, detail="AI generation failed")
+# POST /generate
 @app.post("/generate")
 async def generate_response(
     chat_id: str = Query(..., description="Unique chat ID"),
+    prompt: str = Query(..., description="User prompt"),
     model: str = Query("openai/gpt-4.1-mini", description="Model to use")
 ):
+    if not prompt.strip():
+        raise HTTPException(status_code=400, detail="Prompt is required")
+    async with chat_locks[chat_id]:
+        chat_histories[chat_id].append({"role": "user", "content": prompt})
+        chat_histories[chat_id] = chat_histories[chat_id][-MAX_HISTORY:]
     return StreamingResponse(
         generate_ai_response(chat_id, model),
         media_type="text/event-stream"
     )
+# POST /reset
 @app.post("/reset")
+async def reset_chat(chat_id: str = Query(..., description="Chat ID to reset")):
+    async with chat_locks[chat_id]:
         chat_histories[chat_id].clear()
+    return {"message": f"Chat history for {chat_id} cleared."}
+# For ASGI hosting
 def get_app():
     return app