Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

Mark-Lasfar commited on Sep 13

Commit

d28afad

1 Parent(s): 8c49d21

update main.py

Browse files

Files changed (3) hide show

api/endpoints.py +30 -15
main.py +60 -7
utils/generation.py +62 -45

api/endpoints.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import os
 import uuid
 from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
@@ -31,9 +35,9 @@ if not BACKUP_HF_TOKEN:
     logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
@@ -141,7 +145,6 @@ async def performance_stats():
         "uptime": os.popen("uptime").read().strip()
     }
 @router.post("/api/chat")
 async def chat_endpoint(
     request: Request,
@@ -183,7 +186,7 @@ async def chat_endpoint(
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.warning(f"Model {model_name} is not available at {api_endpoint}, trying fallback model.")
-        model_name = SECONDARY_MODEL_NAME  # جرب النموذج البديل
         is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
         if not is_available:
             logger.error(f"Fallback model {model_name} is not available at {selected_endpoint}")
@@ -209,6 +212,7 @@ async def chat_endpoint(
         audio_chunks = []
         try:
             for chunk in stream:
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
@@ -225,14 +229,14 @@ async def chat_endpoint(
     response_chunks = []
     try:
         for chunk in stream:
-            if isinstance(chunk, str):
                 response_chunks.append(chunk)
             else:
-                logger.warning(f"Unexpected non-string chunk in text stream: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.warning(f"Empty response from {model_name}. Trying fallback model {SECONDARY_MODEL_NAME}.")
-            # جرب النموذج البديل
             model_name = SECONDARY_MODEL_NAME
             is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
             if not is_available:
@@ -254,10 +258,11 @@ async def chat_endpoint(
             )
             response_chunks = []
             for chunk in stream:
-                if isinstance(chunk, str):
                     response_chunks.append(chunk)
                 else:
-                    logger.warning(f"Unexpected non-string chunk in text stream: {chunk}")
             response = "".join(response_chunks)
             if not response.strip():
                 logger.error(f"Empty response from fallback model {model_name}.")
@@ -281,6 +286,7 @@ async def chat_endpoint(
         }
     return {"response": response}
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(
     request: Request,
@@ -338,6 +344,7 @@ async def audio_transcription_endpoint(
     response_chunks = []
     try:
         for chunk in stream:
             if isinstance(chunk, str):
                 response_chunks.append(chunk)
             else:
@@ -401,6 +408,7 @@ async def text_to_speech_endpoint(
     audio_chunks = []
     try:
         for chunk in stream:
             if isinstance(chunk, bytes):
                 audio_chunks.append(chunk)
             else:
@@ -460,6 +468,7 @@ async def code_endpoint(
         audio_chunks = []
         try:
             for chunk in stream:
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
@@ -476,10 +485,11 @@ async def code_endpoint(
     response_chunks = []
     try:
         for chunk in stream:
-            if isinstance(chunk, str):
                 response_chunks.append(chunk)
             else:
-                logger.warning(f"Unexpected non-string chunk in code stream: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.error("Empty code response generated.")
@@ -532,6 +542,7 @@ async def analysis_endpoint(
         audio_chunks = []
         try:
             for chunk in stream:
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
@@ -548,10 +559,11 @@ async def analysis_endpoint(
     response_chunks = []
     try:
         for chunk in stream:
-            if isinstance(chunk, str):
                 response_chunks.append(chunk)
             else:
-                logger.warning(f"Unexpected non-string chunk in analysis stream: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.error("Empty analysis response generated.")
@@ -624,6 +636,7 @@ async def image_analysis_endpoint(
         audio_chunks = []
         try:
             for chunk in stream:
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
@@ -640,10 +653,11 @@ async def image_analysis_endpoint(
     response_chunks = []
     try:
         for chunk in stream:
-            if isinstance(chunk, str):
                 response_chunks.append(chunk)
             else:
-                logger.warning(f"Unexpected non-string chunk in image analysis stream: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.error("Empty image analysis response generated.")
@@ -681,6 +695,7 @@ async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
             messages=[{"role": "user", "content": "Test"}],
             max_tokens=50
         )
         return {"status": "success", "response": response.choices[0].message.content}
     except Exception as e:
         logger.error(f"Test model failed: {e}")

+# api/endpoints.py
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
 import os
 import uuid
 from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
     logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
         "uptime": os.popen("uptime").read().strip()
     }
 @router.post("/api/chat")
 async def chat_endpoint(
     request: Request,
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.warning(f"Model {model_name} is not available at {api_endpoint}, trying fallback model.")
+        model_name = SECONDARY_MODEL_NAME
         is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
         if not is_available:
             logger.error(f"Fallback model {model_name} is not available at {selected_endpoint}")
         audio_chunks = []
         try:
             for chunk in stream:
+                logger.debug(f"Processing audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
     response_chunks = []
     try:
         for chunk in stream:
+            logger.debug(f"Processing text chunk: {chunk[:100]}...")
+            if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
                 response_chunks.append(chunk)
             else:
+                logger.warning(f"Skipping chunk: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.warning(f"Empty response from {model_name}. Trying fallback model {SECONDARY_MODEL_NAME}.")
             model_name = SECONDARY_MODEL_NAME
             is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
             if not is_available:
             )
             response_chunks = []
             for chunk in stream:
+                logger.debug(f"Processing fallback text chunk: {chunk[:100]}...")
+                if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
                     response_chunks.append(chunk)
                 else:
+                    logger.warning(f"Skipping fallback chunk: {chunk}")
             response = "".join(response_chunks)
             if not response.strip():
                 logger.error(f"Empty response from fallback model {model_name}.")
         }
     return {"response": response}
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(
     request: Request,
     response_chunks = []
     try:
         for chunk in stream:
+            logger.debug(f"Processing transcription chunk: {chunk[:100]}...")
             if isinstance(chunk, str):
                 response_chunks.append(chunk)
             else:
     audio_chunks = []
     try:
         for chunk in stream:
+            logger.debug(f"Processing TTS chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
             if isinstance(chunk, bytes):
                 audio_chunks.append(chunk)
             else:
         audio_chunks = []
         try:
             for chunk in stream:
+                logger.debug(f"Processing code audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
     response_chunks = []
     try:
         for chunk in stream:
+            logger.debug(f"Processing code text chunk: {chunk[:100]}...")
+            if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
                 response_chunks.append(chunk)
             else:
+                logger.warning(f"Skipping code chunk: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.error("Empty code response generated.")
         audio_chunks = []
         try:
             for chunk in stream:
+                logger.debug(f"Processing analysis audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
     response_chunks = []
     try:
         for chunk in stream:
+            logger.debug(f"Processing analysis text chunk: {chunk[:100]}...")
+            if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
                 response_chunks.append(chunk)
             else:
+                logger.warning(f"Skipping analysis chunk: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.error("Empty analysis response generated.")
         audio_chunks = []
         try:
             for chunk in stream:
+                logger.debug(f"Processing image analysis audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
                 if isinstance(chunk, bytes):
                     audio_chunks.append(chunk)
                 else:
     response_chunks = []
     try:
         for chunk in stream:
+            logger.debug(f"Processing image analysis text chunk: {chunk[:100]}...")
+            if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
                 response_chunks.append(chunk)
             else:
+                logger.warning(f"Skipping image analysis chunk: {chunk}")
         response = "".join(response_chunks)
         if not response.strip():
             logger.error("Empty image analysis response generated.")
             messages=[{"role": "user", "content": "Test"}],
             max_tokens=50
         )
+        logger.debug(f"Test model response: {response.choices[0].message.content}")
         return {"status": "success", "response": response.choices[0].message.content}
     except Exception as e:
         logger.error(f"Test model failed: {e}")

main.py CHANGED Viewed

@@ -1,3 +1,4 @@
 # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
 # SPDX-License-Identifier: Apache-2.0
@@ -27,12 +28,13 @@ from hashlib import md5
 from datetime import datetime
 from httpx_oauth.exceptions import GetIdEmailError
 import re
-import anyio  # أضف هذا الـ import
 # Setup logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-logger.info("Files in current dir: %s", os.listdir(os.getcwd()))
 # Check environment variables
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -40,6 +42,10 @@ if not HF_TOKEN:
     logger.error("HF_TOKEN is not set in environment variables.")
     raise ValueError("HF_TOKEN is required for Inference API.")
 MONGO_URI = os.getenv("MONGODB_URI")
 if not MONGO_URI:
     logger.error("MONGODB_URI is not set in environment variables.")
@@ -50,6 +56,9 @@ if not JWT_SECRET or len(JWT_SECRET) < 32:
     logger.error("JWT_SECRET is not set or too short.")
     raise ValueError("JWT_SECRET is required (at least 32 characters).")
 # MongoDB setup
 client = AsyncIOMotorClient(MONGO_URI)
 mongo_db = client["hager"]
@@ -57,9 +66,14 @@ session_message_counts = mongo_db["session_message_counts"]
 # Create MongoDB index
 async def setup_mongo_index():
-    await session_message_counts.create_index("session_id", unique=True)
 # Jinja2 setup
 templates = Jinja2Templates(directory="templates")
 templates.env.filters['markdown'] = lambda text: markdown2.markdown(text)
@@ -75,22 +89,27 @@ class BlogPost(BaseModel):
 # Application settings
 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
 # Initialize FastAPI app
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    await init_db()  # استدعاء دالة init_db بشكل async
     await setup_mongo_index()
     yield
 app = FastAPI(title="MGZon Chatbot API", lifespan=lifespan)
 # Add SessionMiddleware
 app.add_middleware(SessionMiddleware, secret_key=JWT_SECRET)
 # Mount static files
 os.makedirs("static", exist_ok=True)
 app.mount("/static", StaticFiles(directory="static"), name="static")
 # CORS setup
 app.add_middleware(
@@ -98,21 +117,25 @@ app.add_middleware(
     allow_origins=[
         "https://mgzon-mgzon-app.hf.space",
         "http://localhost:7860",
         "https://mgzon-mgzon-app.hf.space/auth/google/callback",
         "https://mgzon-mgzon-app.hf.space/auth/github/callback",
     ],
     allow_credentials=True,
-    allow_methods=["GET", "POST", "OPTIONS"],
-    allow_headers=["Accept", "Content-Type", "Authorization"],
 )
 # Include routers
 app.include_router(api_router)
 get_auth_router(app)  # Add OAuth and auth routers
 # Add logout endpoint
 @app.get("/logout")
 async def logout(request: Request):
     request.session.clear()
     response = RedirectResponse("/login")
     response.delete_cookie("access_token")
@@ -121,6 +144,7 @@ async def logout(request: Request):
 # Debug routes endpoint
 @app.get("/debug/routes", response_class=PlainTextResponse)
 async def debug_routes():
     routes = []
     for route in app.routes:
         methods = getattr(route, "methods", [])
@@ -160,6 +184,7 @@ class NotFoundMiddleware(BaseHTTPMiddleware):
             )
 app.add_middleware(NotFoundMiddleware)
 # OAuth error handler
 @app.exception_handler(GetIdEmailError)
@@ -174,30 +199,37 @@ async def handle_oauth_error(request: Request, exc: GetIdEmailError):
 # Root endpoint
 @app.get("/", response_class=HTMLResponse)
 async def root(request: Request, user: User = Depends(current_active_user)):
     return templates.TemplateResponse("index.html", {"request": request, "user": user})
 # Google verification
 @app.get("/google97468ef1f6b6e804.html", response_class=PlainTextResponse)
 async def google_verification():
     return "google-site-verification: google97468ef1f6b6e804.html"
 # Login page
 @app.get("/login", response_class=HTMLResponse)
 async def login_page(request: Request, user: User = Depends(current_active_user)):
     if user:
         return RedirectResponse(url="/chat", status_code=302)
     return templates.TemplateResponse("login.html", {"request": request})
 # Register page
 @app.get("/register", response_class=HTMLResponse)
 async def register_page(request: Request, user: User = Depends(current_active_user)):
     if user:
         return RedirectResponse(url="/chat", status_code=302)
     return templates.TemplateResponse("register.html", {"request": request})
 # Chat page
 @app.get("/chat", response_class=HTMLResponse)
 async def chat(request: Request, user: User = Depends(current_active_user)):
     return templates.TemplateResponse("chat.html", {"request": request, "user": user})
 # Specific conversation page
@@ -209,6 +241,7 @@ async def chat_conversation(
     db: AsyncSession = Depends(get_db)
 ):
     if not user:
         return RedirectResponse(url="/login", status_code=302)
     conversation = await db.execute(
@@ -219,7 +252,10 @@ async def chat_conversation(
     )
     conversation = conversation.scalar_one_or_none()
     if not conversation:
         raise HTTPException(status_code=404, detail="Conversation not found")
     return templates.TemplateResponse(
         "chat.html",
         {
@@ -233,6 +269,7 @@ async def chat_conversation(
 # About page
 @app.get("/about", response_class=HTMLResponse)
 async def about(request: Request, user: User = Depends(current_active_user)):
     return templates.TemplateResponse("about.html", {"request": request, "user": user})
 # Serve static files
@@ -241,6 +278,7 @@ async def serve_static(path: str):
     clean_path = re.sub(r'\?.*', '', path)
     file_path = Path("static") / clean_path
     if not file_path.exists():
         raise HTTPException(status_code=404, detail="File not found")
     cache_duration = 31536000 if not clean_path.endswith(('.js', '.css')) else 3600
     with open(file_path, "rb") as f:
@@ -250,35 +288,42 @@ async def serve_static(path: str):
         "ETag": file_hash,
         "Last-Modified": datetime.utcfromtimestamp(file_path.stat().st_mtime).strftime('%a, %d %b %Y %H:%M:%S GMT')
     }
     return FileResponse(file_path, headers=headers)
 # Blog page
 @app.get("/blog", response_class=HTMLResponse)
 async def blog(request: Request, skip: int = Query(0, ge=0), limit: int = Query(10, ge=1, le=100)):
     posts = await mongo_db.blog_posts.find().skip(skip).limit(limit).to_list(limit)
     return templates.TemplateResponse("blog.html", {"request": request, "posts": posts})
 # Individual blog post
 @app.get("/blog/{post_id}", response_class=HTMLResponse)
 async def blog_post(request: Request, post_id: str):
     post = await mongo_db.blog_posts.find_one({"id": post_id})
     if not post:
         raise HTTPException(status_code=404, detail="Post not found")
     return templates.TemplateResponse("blog_post.html", {"request": request, "post": post})
 # Docs page
 @app.get("/docs", response_class=HTMLResponse)
 async def docs(request: Request):
     return templates.TemplateResponse("docs.html", {"request": request})
 # Swagger UI
 @app.get("/swagger", response_class=HTMLResponse)
 async def swagger_ui():
     return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
 # Sitemap
 @app.get("/sitemap.xml", response_class=PlainTextResponse)
 async def sitemap():
     posts = await mongo_db.blog_posts.find().to_list(100)
     current_date = datetime.utcnow().strftime('%Y-%m-%d')
     xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
@@ -338,7 +383,15 @@ async def sitemap():
 # Redirect /gradio to /chat
 @app.get("/gradio", response_class=RedirectResponse)
 async def launch_chatbot():
     return RedirectResponse(url="/chat", status_code=302)
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

+# main.py
 # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
 # SPDX-License-Identifier: Apache-2.0
 from datetime import datetime
 from httpx_oauth.exceptions import GetIdEmailError
 import re
+import anyio
 # Setup logging
+logging.basicConfig(level=logging.DEBUG)  # غيّرنا لـ DEBUG عشان نعرف نتبع كل حاجة
 logger = logging.getLogger(__name__)
+logger.info("Starting application...")
+logger.debug("Files in current directory: %s", os.listdir(os.getcwd()))
 # Check environment variables
 HF_TOKEN = os.getenv("HF_TOKEN")
     logger.error("HF_TOKEN is not set in environment variables.")
     raise ValueError("HF_TOKEN is required for Inference API.")
+BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
+if not BACKUP_HF_TOKEN:
+    logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
 MONGO_URI = os.getenv("MONGODB_URI")
 if not MONGO_URI:
     logger.error("MONGODB_URI is not set in environment variables.")
     logger.error("JWT_SECRET is not set or too short.")
     raise ValueError("JWT_SECRET is required (at least 32 characters).")
+ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
+logger.debug(f"ROUTER_API_URL set to: {ROUTER_API_URL}")
 # MongoDB setup
 client = AsyncIOMotorClient(MONGO_URI)
 mongo_db = client["hager"]
 # Create MongoDB index
 async def setup_mongo_index():
+    try:
+        await session_message_counts.create_index("session_id", unique=True)
+        logger.info("MongoDB index created successfully for session_id")
+    except Exception as e:
+        logger.error(f"Failed to create MongoDB index: {e}")
 # Jinja2 setup
+os.makedirs("templates", exist_ok=True)  # تأكد إن مجلد templates موجود
 templates = Jinja2Templates(directory="templates")
 templates.env.filters['markdown'] = lambda text: markdown2.markdown(text)
 # Application settings
 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
+logger.debug(f"Application settings: QUEUE_SIZE={QUEUE_SIZE}, CONCURRENCY_LIMIT={CONCURRENCY_LIMIT}")
 # Initialize FastAPI app
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    logger.info("Initializing database and MongoDB index...")
+    await init_db()
     await setup_mongo_index()
     yield
+    logger.info("Shutting down application...")
 app = FastAPI(title="MGZon Chatbot API", lifespan=lifespan)
 # Add SessionMiddleware
 app.add_middleware(SessionMiddleware, secret_key=JWT_SECRET)
+logger.debug("SessionMiddleware added with JWT_SECRET")
 # Mount static files
 os.makedirs("static", exist_ok=True)
 app.mount("/static", StaticFiles(directory="static"), name="static")
+logger.debug("Static files mounted at /static")
 # CORS setup
 app.add_middleware(
     allow_origins=[
         "https://mgzon-mgzon-app.hf.space",
         "http://localhost:7860",
+        "http://localhost:8000",  # أضفنا ده للتستيج المحلي
         "https://mgzon-mgzon-app.hf.space/auth/google/callback",
         "https://mgzon-mgzon-app.hf.space/auth/github/callback",
     ],
     allow_credentials=True,
+    allow_methods=["GET", "POST", "OPTIONS", "PUT", "DELETE"],
+    allow_headers=["Accept", "Content-Type", "Authorization", "X-Requested-With"],
 )
+logger.debug("CORS middleware configured with allowed origins")
 # Include routers
 app.include_router(api_router)
 get_auth_router(app)  # Add OAuth and auth routers
+logger.debug("API and auth routers included")
 # Add logout endpoint
 @app.get("/logout")
 async def logout(request: Request):
+    logger.info("User logout requested")
     request.session.clear()
     response = RedirectResponse("/login")
     response.delete_cookie("access_token")
 # Debug routes endpoint
 @app.get("/debug/routes", response_class=PlainTextResponse)
 async def debug_routes():
+    logger.debug("Fetching debug routes")
     routes = []
     for route in app.routes:
         methods = getattr(route, "methods", [])
             )
 app.add_middleware(NotFoundMiddleware)
+logger.debug("NotFoundMiddleware added")
 # OAuth error handler
 @app.exception_handler(GetIdEmailError)
 # Root endpoint
 @app.get("/", response_class=HTMLResponse)
 async def root(request: Request, user: User = Depends(current_active_user)):
+    logger.debug(f"Root endpoint accessed by user: {user.email if user else 'Anonymous'}")
     return templates.TemplateResponse("index.html", {"request": request, "user": user})
 # Google verification
 @app.get("/google97468ef1f6b6e804.html", response_class=PlainTextResponse)
 async def google_verification():
+    logger.debug("Google verification endpoint accessed")
     return "google-site-verification: google97468ef1f6b6e804.html"
 # Login page
 @app.get("/login", response_class=HTMLResponse)
 async def login_page(request: Request, user: User = Depends(current_active_user)):
     if user:
+        logger.debug(f"User {user.email} already logged in, redirecting to /chat")
         return RedirectResponse(url="/chat", status_code=302)
+    logger.debug("Login page accessed")
     return templates.TemplateResponse("login.html", {"request": request})
 # Register page
 @app.get("/register", response_class=HTMLResponse)
 async def register_page(request: Request, user: User = Depends(current_active_user)):
     if user:
+        logger.debug(f"User {user.email} already logged in, redirecting to /chat")
         return RedirectResponse(url="/chat", status_code=302)
+    logger.debug("Register page accessed")
     return templates.TemplateResponse("register.html", {"request": request})
 # Chat page
 @app.get("/chat", response_class=HTMLResponse)
 async def chat(request: Request, user: User = Depends(current_active_user)):
+    logger.debug(f"Chat page accessed by user: {user.email if user else 'Anonymous'}")
     return templates.TemplateResponse("chat.html", {"request": request, "user": user})
 # Specific conversation page
     db: AsyncSession = Depends(get_db)
 ):
     if not user:
+        logger.debug("Anonymous user attempted to access conversation page, redirecting to /login")
         return RedirectResponse(url="/login", status_code=302)
     conversation = await db.execute(
     )
     conversation = conversation.scalar_one_or_none()
     if not conversation:
+        logger.warning(f"Conversation {conversation_id} not found for user {user.email}")
         raise HTTPException(status_code=404, detail="Conversation not found")
+    logger.debug(f"Conversation page accessed: {conversation_id} by user: {user.email}")
     return templates.TemplateResponse(
         "chat.html",
         {
 # About page
 @app.get("/about", response_class=HTMLResponse)
 async def about(request: Request, user: User = Depends(current_active_user)):
+    logger.debug(f"About page accessed by user: {user.email if user else 'Anonymous'}")
     return templates.TemplateResponse("about.html", {"request": request, "user": user})
 # Serve static files
     clean_path = re.sub(r'\?.*', '', path)
     file_path = Path("static") / clean_path
     if not file_path.exists():
+        logger.warning(f"Static file not found: {file_path}")
         raise HTTPException(status_code=404, detail="File not found")
     cache_duration = 31536000 if not clean_path.endswith(('.js', '.css')) else 3600
     with open(file_path, "rb") as f:
         "ETag": file_hash,
         "Last-Modified": datetime.utcfromtimestamp(file_path.stat().st_mtime).strftime('%a, %d %b %Y %H:%M:%S GMT')
     }
+    logger.debug(f"Serving static file: {file_path}")
     return FileResponse(file_path, headers=headers)
 # Blog page
 @app.get("/blog", response_class=HTMLResponse)
 async def blog(request: Request, skip: int = Query(0, ge=0), limit: int = Query(10, ge=1, le=100)):
+    logger.debug(f"Blog page accessed with skip={skip}, limit={limit}")
     posts = await mongo_db.blog_posts.find().skip(skip).limit(limit).to_list(limit)
     return templates.TemplateResponse("blog.html", {"request": request, "posts": posts})
 # Individual blog post
 @app.get("/blog/{post_id}", response_class=HTMLResponse)
 async def blog_post(request: Request, post_id: str):
+    logger.debug(f"Blog post accessed: {post_id}")
     post = await mongo_db.blog_posts.find_one({"id": post_id})
     if not post:
+        logger.warning(f"Blog post not found: {post_id}")
         raise HTTPException(status_code=404, detail="Post not found")
     return templates.TemplateResponse("blog_post.html", {"request": request, "post": post})
 # Docs page
 @app.get("/docs", response_class=HTMLResponse)
 async def docs(request: Request):
+    logger.debug("Docs page accessed")
     return templates.TemplateResponse("docs.html", {"request": request})
 # Swagger UI
 @app.get("/swagger", response_class=HTMLResponse)
 async def swagger_ui():
+    logger.debug("Swagger UI accessed")
     return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
 # Sitemap
 @app.get("/sitemap.xml", response_class=PlainTextResponse)
 async def sitemap():
+    logger.debug("Sitemap accessed")
     posts = await mongo_db.blog_posts.find().to_list(100)
     current_date = datetime.utcnow().strftime('%Y-%m-%d')
     xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
 # Redirect /gradio to /chat
 @app.get("/gradio", response_class=RedirectResponse)
 async def launch_chatbot():
+    logger.debug("Redirecting /gradio to /chat")
     return RedirectResponse(url="/chat", status_code=302)
+# Health check endpoint
+@app.get("/health", response_class=PlainTextResponse)
+async def health_check():
+    logger.debug("Health check endpoint accessed")
+    return "OK"
 if __name__ == "__main__":
+    logger.info(f"Starting uvicorn server on port {os.getenv('PORT', 7860)}")
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

utils/generation.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import os
 import re
 import json
@@ -34,11 +38,11 @@ LATEX_DELIMS = [
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")  # استبدال Qwen بنموذج متاح
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
@@ -46,7 +50,7 @@ TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
 # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
-    "huggingface": API_ENDPOINT  # استخدام Hugging Face فقط
 }
 def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
@@ -56,6 +60,7 @@ def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str,
             headers={"Authorization": f"Bearer {api_key}"},
             timeout=30
         )
         if response.status_code == 200:
             logger.info(f"Model {model_name} is available at {API_ENDPOINT}")
             return True, api_key, API_ENDPOINT
@@ -76,7 +81,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
         model_name = MODEL_ALIASES[preferred_model]
         is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
         if is_available:
-            logger.info(f"Selected preferred model {model_name} with endpoint {endpoint} for query: {query}")
             return model_name, endpoint
     query_lower = query.lower()
@@ -92,7 +97,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
     ]
     for pattern in image_patterns:
         if re.search(pattern, query_lower, re.IGNORECASE):
-            logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
     available_models = [
         (MODEL_NAME, API_ENDPOINT),
@@ -102,7 +107,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
     for model_name, api_endpoint in available_models:
         is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
         if is_available:
-            logger.info(f"Selected {model_name} with endpoint {endpoint} for query: {query}")
             return model_name, endpoint
     logger.error("No models available. Falling back to default.")
     return MODEL_NAME, API_ENDPOINT
@@ -150,7 +155,7 @@ def request_generation(
     client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
     task_type = "general"
     enhanced_system_prompt = system_prompt
-    buffer = ""  # تعريف buffer هنا لتجنب UnboundLocalError
     if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
@@ -166,6 +171,7 @@ def request_generation(
                 file=audio_file,
                 response_format="text"
             )
             yield transcription
             cache[cache_key] = [transcription]
             return
@@ -185,6 +191,7 @@ def request_generation(
             torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
             audio_file.seek(0)
             audio_data = audio_file.read()
             yield audio_data
             cache[cache_key] = [audio_data]
             return
@@ -204,6 +211,7 @@ def request_generation(
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
             result = f"Image analysis result: {probs.tolist()}"
             if output_format == "audio":
                 model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
                 processor = AutoProcessor.from_pretrained(TTS_MODEL)
@@ -267,16 +275,18 @@ def request_generation(
     cached_chunks = []
     try:
-        stream = client.chat.completions.create(
-            model=model_name,
-            messages=input_messages,
-            temperature=temperature,
-            max_tokens=max_new_tokens,
-            stream=True,
-            tools=tools,
-            tool_choice=tool_choice,
-        )
         reasoning_started = False
         reasoning_closed = False
         saw_visible_output = False
@@ -284,7 +294,8 @@ def request_generation(
         last_tool_args = None
         for chunk in stream:
-            if chunk.choices[0].delta.content:
                 content = chunk.choices[0].delta.content
                 if content == "<|channel|>analysis<|message|>":
                     if not reasoning_started:
@@ -308,7 +319,7 @@ def request_generation(
                     buffer = ""
                 continue
-            if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
                 tool_call = chunk.choices[0].delta.tool_calls[0]
                 name = getattr(tool_call, "function", {}).get("name", None)
                 args = getattr(tool_call, "function", {}).get("arguments", None)
@@ -318,7 +329,7 @@ def request_generation(
                     last_tool_args = args
                 continue
-            if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
                 if buffer:
                     cached_chunks.append(buffer)
                     yield buffer
@@ -404,18 +415,21 @@ def request_generation(
                     yield f"Error: Fallback model {fallback_model} is not available."
                     return
                 client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
-                stream = client.chat.completions.create(
-                    model=fallback_model,
-                    messages=input_messages,
-                    temperature=temperature,
-                    max_tokens=max_new_tokens,
-                    stream=True,
-                    tools=[],
-                    tool_choice="none",
-                )
-                buffer = ""  # تعريف buffer للنموذج البديل
                 for chunk in stream:
-                    if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
                         if content == "<|channel|>analysis<|message|>":
                             if not reasoning_started:
@@ -439,7 +453,7 @@ def request_generation(
                             buffer = ""
                         continue
-                    if chunk.choices[0].finish_reason in ("stop", "error", "length"):
                         if buffer:
                             cached_chunks.append(buffer)
                             yield buffer
@@ -487,18 +501,21 @@ def request_generation(
                         yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
                         return
                     client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
-                    stream = client.chat.completions.create(
-                        model=TERTIARY_MODEL_NAME,
-                        messages=input_messages,
-                        temperature=temperature,
-                        max_tokens=max_new_tokens,
-                        stream=True,
-                        tools=[],
-                        tool_choice="none",
-                    )
-                    buffer = ""  # تعريف buffer للنموذج الثالث
                     for chunk in stream:
-                        if chunk.choices[0].delta.content:
                             content = chunk.choices[0].delta.content
                             saw_visible_output = True
                             buffer += content
@@ -507,7 +524,7 @@ def request_generation(
                                 yield buffer
                                 buffer = ""
                             continue
-                        if chunk.choices[0].finish_reason in ("stop", "error", "length"):
                             if buffer:
                                 cached_chunks.append(buffer)
                                 yield buffer

+# utils/generation.py
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
 import os
 import re
 import json
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
+    "huggingface": API_ENDPOINT
 }
 def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
             headers={"Authorization": f"Bearer {api_key}"},
             timeout=30
         )
+        logger.debug(f"Checking model {model_name}: {response.status_code} - {response.text}")
         if response.status_code == 200:
             logger.info(f"Model {model_name} is available at {API_ENDPOINT}")
             return True, api_key, API_ENDPOINT
         model_name = MODEL_ALIASES[preferred_model]
         is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
         if is_available:
+            logger.info(f"Selected preferred model {model_name} with endpoint {endpoint} for query: {query[:50]}...")
             return model_name, endpoint
     query_lower = query.lower()
     ]
     for pattern in image_patterns:
         if re.search(pattern, query_lower, re.IGNORECASE):
+            logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query[:50]}...")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
     available_models = [
         (MODEL_NAME, API_ENDPOINT),
     for model_name, api_endpoint in available_models:
         is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
         if is_available:
+            logger.info(f"Selected {model_name} with endpoint {endpoint} for query: {query[:50]}...")
             return model_name, endpoint
     logger.error("No models available. Falling back to default.")
     return MODEL_NAME, API_ENDPOINT
     client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
     task_type = "general"
     enhanced_system_prompt = system_prompt
+    buffer = ""
     if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
                 file=audio_file,
                 response_format="text"
             )
+            logger.debug(f"Transcription response: {transcription}")
             yield transcription
             cache[cache_key] = [transcription]
             return
             torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
             audio_file.seek(0)
             audio_data = audio_file.read()
+            logger.debug(f"Generated audio data of length: {len(audio_data)} bytes")
             yield audio_data
             cache[cache_key] = [audio_data]
             return
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
             result = f"Image analysis result: {probs.tolist()}"
+            logger.debug(f"Image analysis result: {result}")
             if output_format == "audio":
                 model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
                 processor = AutoProcessor.from_pretrained(TTS_MODEL)
     cached_chunks = []
     try:
+        payload = {
+            "model": model_name,
+            "messages": input_messages,
+            "temperature": temperature,
+            "max_tokens": max_new_tokens,
+            "stream": True,
+            "tools": tools,
+            "tool_choice": tool_choice
+        }
+        logger.debug(f"Sending payload to {selected_endpoint}/chat/completions: {json.dumps(payload, indent=2, ensure_ascii=False)}")
+        stream = client.chat.completions.create(**payload)
         reasoning_started = False
         reasoning_closed = False
         saw_visible_output = False
         last_tool_args = None
         for chunk in stream:
+            logger.debug(f"Received chunk: {chunk}")
+            if chunk.choices and chunk.choices[0].delta.content:
                 content = chunk.choices[0].delta.content
                 if content == "<|channel|>analysis<|message|>":
                     if not reasoning_started:
                     buffer = ""
                 continue
+            if chunk.choices and chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
                 tool_call = chunk.choices[0].delta.tool_calls[0]
                 name = getattr(tool_call, "function", {}).get("name", None)
                 args = getattr(tool_call, "function", {}).get("arguments", None)
                     last_tool_args = args
                 continue
+            if chunk.choices and chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
                 if buffer:
                     cached_chunks.append(buffer)
                     yield buffer
                     yield f"Error: Fallback model {fallback_model} is not available."
                     return
                 client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
+                payload = {
+                    "model": fallback_model,
+                    "messages": input_messages,
+                    "temperature": temperature,
+                    "max_tokens": max_new_tokens,
+                    "stream": True,
+                    "tools": [],
+                    "tool_choice": "none"
+                }
+                logger.debug(f"Sending payload to {selected_endpoint}/chat/completions: {json.dumps(payload, indent=2, ensure_ascii=False)}")
+                stream = client.chat.completions.create(**payload)
+                buffer = ""
                 for chunk in stream:
+                    logger.debug(f"Received chunk from fallback: {chunk}")
+                    if chunk.choices and chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
                         if content == "<|channel|>analysis<|message|>":
                             if not reasoning_started:
                             buffer = ""
                         continue
+                    if chunk.choices and chunk.choices[0].finish_reason in ("stop", "error", "length"):
                         if buffer:
                             cached_chunks.append(buffer)
                             yield buffer
                         yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
                         return
                     client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
+                    payload = {
+                        "model": TERTIARY_MODEL_NAME,
+                        "messages": input_messages,
+                        "temperature": temperature,
+                        "max_tokens": max_new_tokens,
+                        "stream": True,
+                        "tools": [],
+                        "tool_choice": "none"
+                    }
+                    logger.debug(f"Sending payload to {selected_endpoint}/chat/completions: {json.dumps(payload, indent=2, ensure_ascii=False)}")
+                    stream = client.chat.completions.create(**payload)
+                    buffer = ""
                     for chunk in stream:
+                        logger.debug(f"Received chunk from tertiary: {chunk}")
+                        if chunk.choices and chunk.choices[0].delta.content:
                             content = chunk.choices[0].delta.content
                             saw_visible_output = True
                             buffer += content
                                 yield buffer
                                 buffer = ""
                             continue
+                        if chunk.choices and chunk.choices[0].finish_reason in ("stop", "error", "length"):
                             if buffer:
                                 cached_chunks.append(buffer)
                                 yield buffer