Mark-Lasfar commited on
Commit
d28afad
·
1 Parent(s): 8c49d21

update main.py

Browse files
Files changed (3) hide show
  1. api/endpoints.py +30 -15
  2. main.py +60 -7
  3. utils/generation.py +62 -45
api/endpoints.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import os
2
  import uuid
3
  from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
@@ -31,9 +35,9 @@ if not BACKUP_HF_TOKEN:
31
  logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
32
 
33
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
34
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
35
  FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
36
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
37
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
38
  TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
39
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
@@ -141,7 +145,6 @@ async def performance_stats():
141
  "uptime": os.popen("uptime").read().strip()
142
  }
143
 
144
-
145
  @router.post("/api/chat")
146
  async def chat_endpoint(
147
  request: Request,
@@ -183,7 +186,7 @@ async def chat_endpoint(
183
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
184
  if not is_available:
185
  logger.warning(f"Model {model_name} is not available at {api_endpoint}, trying fallback model.")
186
- model_name = SECONDARY_MODEL_NAME # جرب النموذج البديل
187
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
188
  if not is_available:
189
  logger.error(f"Fallback model {model_name} is not available at {selected_endpoint}")
@@ -209,6 +212,7 @@ async def chat_endpoint(
209
  audio_chunks = []
210
  try:
211
  for chunk in stream:
 
212
  if isinstance(chunk, bytes):
213
  audio_chunks.append(chunk)
214
  else:
@@ -225,14 +229,14 @@ async def chat_endpoint(
225
  response_chunks = []
226
  try:
227
  for chunk in stream:
228
- if isinstance(chunk, str):
 
229
  response_chunks.append(chunk)
230
  else:
231
- logger.warning(f"Unexpected non-string chunk in text stream: {chunk}")
232
  response = "".join(response_chunks)
233
  if not response.strip():
234
  logger.warning(f"Empty response from {model_name}. Trying fallback model {SECONDARY_MODEL_NAME}.")
235
- # جرب النموذج البديل
236
  model_name = SECONDARY_MODEL_NAME
237
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
238
  if not is_available:
@@ -254,10 +258,11 @@ async def chat_endpoint(
254
  )
255
  response_chunks = []
256
  for chunk in stream:
257
- if isinstance(chunk, str):
 
258
  response_chunks.append(chunk)
259
  else:
260
- logger.warning(f"Unexpected non-string chunk in text stream: {chunk}")
261
  response = "".join(response_chunks)
262
  if not response.strip():
263
  logger.error(f"Empty response from fallback model {model_name}.")
@@ -281,6 +286,7 @@ async def chat_endpoint(
281
  }
282
 
283
  return {"response": response}
 
284
  @router.post("/api/audio-transcription")
285
  async def audio_transcription_endpoint(
286
  request: Request,
@@ -338,6 +344,7 @@ async def audio_transcription_endpoint(
338
  response_chunks = []
339
  try:
340
  for chunk in stream:
 
341
  if isinstance(chunk, str):
342
  response_chunks.append(chunk)
343
  else:
@@ -401,6 +408,7 @@ async def text_to_speech_endpoint(
401
  audio_chunks = []
402
  try:
403
  for chunk in stream:
 
404
  if isinstance(chunk, bytes):
405
  audio_chunks.append(chunk)
406
  else:
@@ -460,6 +468,7 @@ async def code_endpoint(
460
  audio_chunks = []
461
  try:
462
  for chunk in stream:
 
463
  if isinstance(chunk, bytes):
464
  audio_chunks.append(chunk)
465
  else:
@@ -476,10 +485,11 @@ async def code_endpoint(
476
  response_chunks = []
477
  try:
478
  for chunk in stream:
479
- if isinstance(chunk, str):
 
480
  response_chunks.append(chunk)
481
  else:
482
- logger.warning(f"Unexpected non-string chunk in code stream: {chunk}")
483
  response = "".join(response_chunks)
484
  if not response.strip():
485
  logger.error("Empty code response generated.")
@@ -532,6 +542,7 @@ async def analysis_endpoint(
532
  audio_chunks = []
533
  try:
534
  for chunk in stream:
 
535
  if isinstance(chunk, bytes):
536
  audio_chunks.append(chunk)
537
  else:
@@ -548,10 +559,11 @@ async def analysis_endpoint(
548
  response_chunks = []
549
  try:
550
  for chunk in stream:
551
- if isinstance(chunk, str):
 
552
  response_chunks.append(chunk)
553
  else:
554
- logger.warning(f"Unexpected non-string chunk in analysis stream: {chunk}")
555
  response = "".join(response_chunks)
556
  if not response.strip():
557
  logger.error("Empty analysis response generated.")
@@ -624,6 +636,7 @@ async def image_analysis_endpoint(
624
  audio_chunks = []
625
  try:
626
  for chunk in stream:
 
627
  if isinstance(chunk, bytes):
628
  audio_chunks.append(chunk)
629
  else:
@@ -640,10 +653,11 @@ async def image_analysis_endpoint(
640
  response_chunks = []
641
  try:
642
  for chunk in stream:
643
- if isinstance(chunk, str):
 
644
  response_chunks.append(chunk)
645
  else:
646
- logger.warning(f"Unexpected non-string chunk in image analysis stream: {chunk}")
647
  response = "".join(response_chunks)
648
  if not response.strip():
649
  logger.error("Empty image analysis response generated.")
@@ -681,6 +695,7 @@ async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
681
  messages=[{"role": "user", "content": "Test"}],
682
  max_tokens=50
683
  )
 
684
  return {"status": "success", "response": response.choices[0].message.content}
685
  except Exception as e:
686
  logger.error(f"Test model failed: {e}")
 
1
+ # api/endpoints.py
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
  import os
6
  import uuid
7
  from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
 
35
  logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
36
 
37
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
38
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
39
  FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
40
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
41
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
42
  TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
43
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 
145
  "uptime": os.popen("uptime").read().strip()
146
  }
147
 
 
148
  @router.post("/api/chat")
149
  async def chat_endpoint(
150
  request: Request,
 
186
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
187
  if not is_available:
188
  logger.warning(f"Model {model_name} is not available at {api_endpoint}, trying fallback model.")
189
+ model_name = SECONDARY_MODEL_NAME
190
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
191
  if not is_available:
192
  logger.error(f"Fallback model {model_name} is not available at {selected_endpoint}")
 
212
  audio_chunks = []
213
  try:
214
  for chunk in stream:
215
+ logger.debug(f"Processing audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
216
  if isinstance(chunk, bytes):
217
  audio_chunks.append(chunk)
218
  else:
 
229
  response_chunks = []
230
  try:
231
  for chunk in stream:
232
+ logger.debug(f"Processing text chunk: {chunk[:100]}...")
233
+ if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
234
  response_chunks.append(chunk)
235
  else:
236
+ logger.warning(f"Skipping chunk: {chunk}")
237
  response = "".join(response_chunks)
238
  if not response.strip():
239
  logger.warning(f"Empty response from {model_name}. Trying fallback model {SECONDARY_MODEL_NAME}.")
 
240
  model_name = SECONDARY_MODEL_NAME
241
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
242
  if not is_available:
 
258
  )
259
  response_chunks = []
260
  for chunk in stream:
261
+ logger.debug(f"Processing fallback text chunk: {chunk[:100]}...")
262
+ if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
263
  response_chunks.append(chunk)
264
  else:
265
+ logger.warning(f"Skipping fallback chunk: {chunk}")
266
  response = "".join(response_chunks)
267
  if not response.strip():
268
  logger.error(f"Empty response from fallback model {model_name}.")
 
286
  }
287
 
288
  return {"response": response}
289
+
290
  @router.post("/api/audio-transcription")
291
  async def audio_transcription_endpoint(
292
  request: Request,
 
344
  response_chunks = []
345
  try:
346
  for chunk in stream:
347
+ logger.debug(f"Processing transcription chunk: {chunk[:100]}...")
348
  if isinstance(chunk, str):
349
  response_chunks.append(chunk)
350
  else:
 
408
  audio_chunks = []
409
  try:
410
  for chunk in stream:
411
+ logger.debug(f"Processing TTS chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
412
  if isinstance(chunk, bytes):
413
  audio_chunks.append(chunk)
414
  else:
 
468
  audio_chunks = []
469
  try:
470
  for chunk in stream:
471
+ logger.debug(f"Processing code audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
472
  if isinstance(chunk, bytes):
473
  audio_chunks.append(chunk)
474
  else:
 
485
  response_chunks = []
486
  try:
487
  for chunk in stream:
488
+ logger.debug(f"Processing code text chunk: {chunk[:100]}...")
489
+ if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
490
  response_chunks.append(chunk)
491
  else:
492
+ logger.warning(f"Skipping code chunk: {chunk}")
493
  response = "".join(response_chunks)
494
  if not response.strip():
495
  logger.error("Empty code response generated.")
 
542
  audio_chunks = []
543
  try:
544
  for chunk in stream:
545
+ logger.debug(f"Processing analysis audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
546
  if isinstance(chunk, bytes):
547
  audio_chunks.append(chunk)
548
  else:
 
559
  response_chunks = []
560
  try:
561
  for chunk in stream:
562
+ logger.debug(f"Processing analysis text chunk: {chunk[:100]}...")
563
+ if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
564
  response_chunks.append(chunk)
565
  else:
566
+ logger.warning(f"Skipping analysis chunk: {chunk}")
567
  response = "".join(response_chunks)
568
  if not response.strip():
569
  logger.error("Empty analysis response generated.")
 
636
  audio_chunks = []
637
  try:
638
  for chunk in stream:
639
+ logger.debug(f"Processing image analysis audio chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
640
  if isinstance(chunk, bytes):
641
  audio_chunks.append(chunk)
642
  else:
 
653
  response_chunks = []
654
  try:
655
  for chunk in stream:
656
+ logger.debug(f"Processing image analysis text chunk: {chunk[:100]}...")
657
+ if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
658
  response_chunks.append(chunk)
659
  else:
660
+ logger.warning(f"Skipping image analysis chunk: {chunk}")
661
  response = "".join(response_chunks)
662
  if not response.strip():
663
  logger.error("Empty image analysis response generated.")
 
695
  messages=[{"role": "user", "content": "Test"}],
696
  max_tokens=50
697
  )
698
+ logger.debug(f"Test model response: {response.choices[0].message.content}")
699
  return {"status": "success", "response": response.choices[0].message.content}
700
  except Exception as e:
701
  logger.error(f"Test model failed: {e}")
main.py CHANGED
@@ -1,3 +1,4 @@
 
1
  # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
2
  # SPDX-License-Identifier: Apache-2.0
3
 
@@ -27,12 +28,13 @@ from hashlib import md5
27
  from datetime import datetime
28
  from httpx_oauth.exceptions import GetIdEmailError
29
  import re
30
- import anyio # أضف هذا الـ import
31
 
32
  # Setup logging
33
- logging.basicConfig(level=logging.INFO)
34
  logger = logging.getLogger(__name__)
35
- logger.info("Files in current dir: %s", os.listdir(os.getcwd()))
 
36
 
37
  # Check environment variables
38
  HF_TOKEN = os.getenv("HF_TOKEN")
@@ -40,6 +42,10 @@ if not HF_TOKEN:
40
  logger.error("HF_TOKEN is not set in environment variables.")
41
  raise ValueError("HF_TOKEN is required for Inference API.")
42
 
 
 
 
 
43
  MONGO_URI = os.getenv("MONGODB_URI")
44
  if not MONGO_URI:
45
  logger.error("MONGODB_URI is not set in environment variables.")
@@ -50,6 +56,9 @@ if not JWT_SECRET or len(JWT_SECRET) < 32:
50
  logger.error("JWT_SECRET is not set or too short.")
51
  raise ValueError("JWT_SECRET is required (at least 32 characters).")
52
 
 
 
 
53
  # MongoDB setup
54
  client = AsyncIOMotorClient(MONGO_URI)
55
  mongo_db = client["hager"]
@@ -57,9 +66,14 @@ session_message_counts = mongo_db["session_message_counts"]
57
 
58
  # Create MongoDB index
59
  async def setup_mongo_index():
60
- await session_message_counts.create_index("session_id", unique=True)
 
 
 
 
61
 
62
  # Jinja2 setup
 
63
  templates = Jinja2Templates(directory="templates")
64
  templates.env.filters['markdown'] = lambda text: markdown2.markdown(text)
65
 
@@ -75,22 +89,27 @@ class BlogPost(BaseModel):
75
  # Application settings
76
  QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
77
  CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
 
78
 
79
  # Initialize FastAPI app
80
  @asynccontextmanager
81
  async def lifespan(app: FastAPI):
82
- await init_db() # استدعاء دالة init_db بشكل async
 
83
  await setup_mongo_index()
84
  yield
 
85
 
86
  app = FastAPI(title="MGZon Chatbot API", lifespan=lifespan)
87
 
88
  # Add SessionMiddleware
89
  app.add_middleware(SessionMiddleware, secret_key=JWT_SECRET)
 
90
 
91
  # Mount static files
92
  os.makedirs("static", exist_ok=True)
93
  app.mount("/static", StaticFiles(directory="static"), name="static")
 
94
 
95
  # CORS setup
96
  app.add_middleware(
@@ -98,21 +117,25 @@ app.add_middleware(
98
  allow_origins=[
99
  "https://mgzon-mgzon-app.hf.space",
100
  "http://localhost:7860",
 
101
  "https://mgzon-mgzon-app.hf.space/auth/google/callback",
102
  "https://mgzon-mgzon-app.hf.space/auth/github/callback",
103
  ],
104
  allow_credentials=True,
105
- allow_methods=["GET", "POST", "OPTIONS"],
106
- allow_headers=["Accept", "Content-Type", "Authorization"],
107
  )
 
108
 
109
  # Include routers
110
  app.include_router(api_router)
111
  get_auth_router(app) # Add OAuth and auth routers
 
112
 
113
  # Add logout endpoint
114
  @app.get("/logout")
115
  async def logout(request: Request):
 
116
  request.session.clear()
117
  response = RedirectResponse("/login")
118
  response.delete_cookie("access_token")
@@ -121,6 +144,7 @@ async def logout(request: Request):
121
  # Debug routes endpoint
122
  @app.get("/debug/routes", response_class=PlainTextResponse)
123
  async def debug_routes():
 
124
  routes = []
125
  for route in app.routes:
126
  methods = getattr(route, "methods", [])
@@ -160,6 +184,7 @@ class NotFoundMiddleware(BaseHTTPMiddleware):
160
  )
161
 
162
  app.add_middleware(NotFoundMiddleware)
 
163
 
164
  # OAuth error handler
165
  @app.exception_handler(GetIdEmailError)
@@ -174,30 +199,37 @@ async def handle_oauth_error(request: Request, exc: GetIdEmailError):
174
  # Root endpoint
175
  @app.get("/", response_class=HTMLResponse)
176
  async def root(request: Request, user: User = Depends(current_active_user)):
 
177
  return templates.TemplateResponse("index.html", {"request": request, "user": user})
178
 
179
  # Google verification
180
  @app.get("/google97468ef1f6b6e804.html", response_class=PlainTextResponse)
181
  async def google_verification():
 
182
  return "google-site-verification: google97468ef1f6b6e804.html"
183
 
184
  # Login page
185
  @app.get("/login", response_class=HTMLResponse)
186
  async def login_page(request: Request, user: User = Depends(current_active_user)):
187
  if user:
 
188
  return RedirectResponse(url="/chat", status_code=302)
 
189
  return templates.TemplateResponse("login.html", {"request": request})
190
 
191
  # Register page
192
  @app.get("/register", response_class=HTMLResponse)
193
  async def register_page(request: Request, user: User = Depends(current_active_user)):
194
  if user:
 
195
  return RedirectResponse(url="/chat", status_code=302)
 
196
  return templates.TemplateResponse("register.html", {"request": request})
197
 
198
  # Chat page
199
  @app.get("/chat", response_class=HTMLResponse)
200
  async def chat(request: Request, user: User = Depends(current_active_user)):
 
201
  return templates.TemplateResponse("chat.html", {"request": request, "user": user})
202
 
203
  # Specific conversation page
@@ -209,6 +241,7 @@ async def chat_conversation(
209
  db: AsyncSession = Depends(get_db)
210
  ):
211
  if not user:
 
212
  return RedirectResponse(url="/login", status_code=302)
213
 
214
  conversation = await db.execute(
@@ -219,7 +252,10 @@ async def chat_conversation(
219
  )
220
  conversation = conversation.scalar_one_or_none()
221
  if not conversation:
 
222
  raise HTTPException(status_code=404, detail="Conversation not found")
 
 
223
  return templates.TemplateResponse(
224
  "chat.html",
225
  {
@@ -233,6 +269,7 @@ async def chat_conversation(
233
  # About page
234
  @app.get("/about", response_class=HTMLResponse)
235
  async def about(request: Request, user: User = Depends(current_active_user)):
 
236
  return templates.TemplateResponse("about.html", {"request": request, "user": user})
237
 
238
  # Serve static files
@@ -241,6 +278,7 @@ async def serve_static(path: str):
241
  clean_path = re.sub(r'\?.*', '', path)
242
  file_path = Path("static") / clean_path
243
  if not file_path.exists():
 
244
  raise HTTPException(status_code=404, detail="File not found")
245
  cache_duration = 31536000 if not clean_path.endswith(('.js', '.css')) else 3600
246
  with open(file_path, "rb") as f:
@@ -250,35 +288,42 @@ async def serve_static(path: str):
250
  "ETag": file_hash,
251
  "Last-Modified": datetime.utcfromtimestamp(file_path.stat().st_mtime).strftime('%a, %d %b %Y %H:%M:%S GMT')
252
  }
 
253
  return FileResponse(file_path, headers=headers)
254
 
255
  # Blog page
256
  @app.get("/blog", response_class=HTMLResponse)
257
  async def blog(request: Request, skip: int = Query(0, ge=0), limit: int = Query(10, ge=1, le=100)):
 
258
  posts = await mongo_db.blog_posts.find().skip(skip).limit(limit).to_list(limit)
259
  return templates.TemplateResponse("blog.html", {"request": request, "posts": posts})
260
 
261
  # Individual blog post
262
  @app.get("/blog/{post_id}", response_class=HTMLResponse)
263
  async def blog_post(request: Request, post_id: str):
 
264
  post = await mongo_db.blog_posts.find_one({"id": post_id})
265
  if not post:
 
266
  raise HTTPException(status_code=404, detail="Post not found")
267
  return templates.TemplateResponse("blog_post.html", {"request": request, "post": post})
268
 
269
  # Docs page
270
  @app.get("/docs", response_class=HTMLResponse)
271
  async def docs(request: Request):
 
272
  return templates.TemplateResponse("docs.html", {"request": request})
273
 
274
  # Swagger UI
275
  @app.get("/swagger", response_class=HTMLResponse)
276
  async def swagger_ui():
 
277
  return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
278
 
279
  # Sitemap
280
  @app.get("/sitemap.xml", response_class=PlainTextResponse)
281
  async def sitemap():
 
282
  posts = await mongo_db.blog_posts.find().to_list(100)
283
  current_date = datetime.utcnow().strftime('%Y-%m-%d')
284
  xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
@@ -338,7 +383,15 @@ async def sitemap():
338
  # Redirect /gradio to /chat
339
  @app.get("/gradio", response_class=RedirectResponse)
340
  async def launch_chatbot():
 
341
  return RedirectResponse(url="/chat", status_code=302)
342
 
 
 
 
 
 
 
343
  if __name__ == "__main__":
 
344
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
 
1
+ # main.py
2
  # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
  # SPDX-License-Identifier: Apache-2.0
4
 
 
28
  from datetime import datetime
29
  from httpx_oauth.exceptions import GetIdEmailError
30
  import re
31
+ import anyio
32
 
33
  # Setup logging
34
+ logging.basicConfig(level=logging.DEBUG) # غيّرنا لـ DEBUG عشان نعرف نتبع كل حاجة
35
  logger = logging.getLogger(__name__)
36
+ logger.info("Starting application...")
37
+ logger.debug("Files in current directory: %s", os.listdir(os.getcwd()))
38
 
39
  # Check environment variables
40
  HF_TOKEN = os.getenv("HF_TOKEN")
 
42
  logger.error("HF_TOKEN is not set in environment variables.")
43
  raise ValueError("HF_TOKEN is required for Inference API.")
44
 
45
+ BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
46
+ if not BACKUP_HF_TOKEN:
47
+ logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
48
+
49
  MONGO_URI = os.getenv("MONGODB_URI")
50
  if not MONGO_URI:
51
  logger.error("MONGODB_URI is not set in environment variables.")
 
56
  logger.error("JWT_SECRET is not set or too short.")
57
  raise ValueError("JWT_SECRET is required (at least 32 characters).")
58
 
59
+ ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
60
+ logger.debug(f"ROUTER_API_URL set to: {ROUTER_API_URL}")
61
+
62
  # MongoDB setup
63
  client = AsyncIOMotorClient(MONGO_URI)
64
  mongo_db = client["hager"]
 
66
 
67
  # Create MongoDB index
68
  async def setup_mongo_index():
69
+ try:
70
+ await session_message_counts.create_index("session_id", unique=True)
71
+ logger.info("MongoDB index created successfully for session_id")
72
+ except Exception as e:
73
+ logger.error(f"Failed to create MongoDB index: {e}")
74
 
75
  # Jinja2 setup
76
+ os.makedirs("templates", exist_ok=True) # تأكد إن مجلد templates موجود
77
  templates = Jinja2Templates(directory="templates")
78
  templates.env.filters['markdown'] = lambda text: markdown2.markdown(text)
79
 
 
89
  # Application settings
90
  QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
91
  CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
92
+ logger.debug(f"Application settings: QUEUE_SIZE={QUEUE_SIZE}, CONCURRENCY_LIMIT={CONCURRENCY_LIMIT}")
93
 
94
  # Initialize FastAPI app
95
  @asynccontextmanager
96
  async def lifespan(app: FastAPI):
97
+ logger.info("Initializing database and MongoDB index...")
98
+ await init_db()
99
  await setup_mongo_index()
100
  yield
101
+ logger.info("Shutting down application...")
102
 
103
  app = FastAPI(title="MGZon Chatbot API", lifespan=lifespan)
104
 
105
  # Add SessionMiddleware
106
  app.add_middleware(SessionMiddleware, secret_key=JWT_SECRET)
107
+ logger.debug("SessionMiddleware added with JWT_SECRET")
108
 
109
  # Mount static files
110
  os.makedirs("static", exist_ok=True)
111
  app.mount("/static", StaticFiles(directory="static"), name="static")
112
+ logger.debug("Static files mounted at /static")
113
 
114
  # CORS setup
115
  app.add_middleware(
 
117
  allow_origins=[
118
  "https://mgzon-mgzon-app.hf.space",
119
  "http://localhost:7860",
120
+ "http://localhost:8000", # أضفنا ده للتستيج المحلي
121
  "https://mgzon-mgzon-app.hf.space/auth/google/callback",
122
  "https://mgzon-mgzon-app.hf.space/auth/github/callback",
123
  ],
124
  allow_credentials=True,
125
+ allow_methods=["GET", "POST", "OPTIONS", "PUT", "DELETE"],
126
+ allow_headers=["Accept", "Content-Type", "Authorization", "X-Requested-With"],
127
  )
128
+ logger.debug("CORS middleware configured with allowed origins")
129
 
130
  # Include routers
131
  app.include_router(api_router)
132
  get_auth_router(app) # Add OAuth and auth routers
133
+ logger.debug("API and auth routers included")
134
 
135
  # Add logout endpoint
136
  @app.get("/logout")
137
  async def logout(request: Request):
138
+ logger.info("User logout requested")
139
  request.session.clear()
140
  response = RedirectResponse("/login")
141
  response.delete_cookie("access_token")
 
144
  # Debug routes endpoint
145
  @app.get("/debug/routes", response_class=PlainTextResponse)
146
  async def debug_routes():
147
+ logger.debug("Fetching debug routes")
148
  routes = []
149
  for route in app.routes:
150
  methods = getattr(route, "methods", [])
 
184
  )
185
 
186
  app.add_middleware(NotFoundMiddleware)
187
+ logger.debug("NotFoundMiddleware added")
188
 
189
  # OAuth error handler
190
  @app.exception_handler(GetIdEmailError)
 
199
  # Root endpoint
200
  @app.get("/", response_class=HTMLResponse)
201
  async def root(request: Request, user: User = Depends(current_active_user)):
202
+ logger.debug(f"Root endpoint accessed by user: {user.email if user else 'Anonymous'}")
203
  return templates.TemplateResponse("index.html", {"request": request, "user": user})
204
 
205
  # Google verification
206
  @app.get("/google97468ef1f6b6e804.html", response_class=PlainTextResponse)
207
  async def google_verification():
208
+ logger.debug("Google verification endpoint accessed")
209
  return "google-site-verification: google97468ef1f6b6e804.html"
210
 
211
  # Login page
212
  @app.get("/login", response_class=HTMLResponse)
213
  async def login_page(request: Request, user: User = Depends(current_active_user)):
214
  if user:
215
+ logger.debug(f"User {user.email} already logged in, redirecting to /chat")
216
  return RedirectResponse(url="/chat", status_code=302)
217
+ logger.debug("Login page accessed")
218
  return templates.TemplateResponse("login.html", {"request": request})
219
 
220
  # Register page
221
  @app.get("/register", response_class=HTMLResponse)
222
  async def register_page(request: Request, user: User = Depends(current_active_user)):
223
  if user:
224
+ logger.debug(f"User {user.email} already logged in, redirecting to /chat")
225
  return RedirectResponse(url="/chat", status_code=302)
226
+ logger.debug("Register page accessed")
227
  return templates.TemplateResponse("register.html", {"request": request})
228
 
229
  # Chat page
230
  @app.get("/chat", response_class=HTMLResponse)
231
  async def chat(request: Request, user: User = Depends(current_active_user)):
232
+ logger.debug(f"Chat page accessed by user: {user.email if user else 'Anonymous'}")
233
  return templates.TemplateResponse("chat.html", {"request": request, "user": user})
234
 
235
  # Specific conversation page
 
241
  db: AsyncSession = Depends(get_db)
242
  ):
243
  if not user:
244
+ logger.debug("Anonymous user attempted to access conversation page, redirecting to /login")
245
  return RedirectResponse(url="/login", status_code=302)
246
 
247
  conversation = await db.execute(
 
252
  )
253
  conversation = conversation.scalar_one_or_none()
254
  if not conversation:
255
+ logger.warning(f"Conversation {conversation_id} not found for user {user.email}")
256
  raise HTTPException(status_code=404, detail="Conversation not found")
257
+
258
+ logger.debug(f"Conversation page accessed: {conversation_id} by user: {user.email}")
259
  return templates.TemplateResponse(
260
  "chat.html",
261
  {
 
269
  # About page
270
  @app.get("/about", response_class=HTMLResponse)
271
  async def about(request: Request, user: User = Depends(current_active_user)):
272
+ logger.debug(f"About page accessed by user: {user.email if user else 'Anonymous'}")
273
  return templates.TemplateResponse("about.html", {"request": request, "user": user})
274
 
275
  # Serve static files
 
278
  clean_path = re.sub(r'\?.*', '', path)
279
  file_path = Path("static") / clean_path
280
  if not file_path.exists():
281
+ logger.warning(f"Static file not found: {file_path}")
282
  raise HTTPException(status_code=404, detail="File not found")
283
  cache_duration = 31536000 if not clean_path.endswith(('.js', '.css')) else 3600
284
  with open(file_path, "rb") as f:
 
288
  "ETag": file_hash,
289
  "Last-Modified": datetime.utcfromtimestamp(file_path.stat().st_mtime).strftime('%a, %d %b %Y %H:%M:%S GMT')
290
  }
291
+ logger.debug(f"Serving static file: {file_path}")
292
  return FileResponse(file_path, headers=headers)
293
 
294
  # Blog page
295
  @app.get("/blog", response_class=HTMLResponse)
296
  async def blog(request: Request, skip: int = Query(0, ge=0), limit: int = Query(10, ge=1, le=100)):
297
+ logger.debug(f"Blog page accessed with skip={skip}, limit={limit}")
298
  posts = await mongo_db.blog_posts.find().skip(skip).limit(limit).to_list(limit)
299
  return templates.TemplateResponse("blog.html", {"request": request, "posts": posts})
300
 
301
  # Individual blog post
302
  @app.get("/blog/{post_id}", response_class=HTMLResponse)
303
  async def blog_post(request: Request, post_id: str):
304
+ logger.debug(f"Blog post accessed: {post_id}")
305
  post = await mongo_db.blog_posts.find_one({"id": post_id})
306
  if not post:
307
+ logger.warning(f"Blog post not found: {post_id}")
308
  raise HTTPException(status_code=404, detail="Post not found")
309
  return templates.TemplateResponse("blog_post.html", {"request": request, "post": post})
310
 
311
  # Docs page
312
  @app.get("/docs", response_class=HTMLResponse)
313
  async def docs(request: Request):
314
+ logger.debug("Docs page accessed")
315
  return templates.TemplateResponse("docs.html", {"request": request})
316
 
317
  # Swagger UI
318
  @app.get("/swagger", response_class=HTMLResponse)
319
  async def swagger_ui():
320
+ logger.debug("Swagger UI accessed")
321
  return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
322
 
323
  # Sitemap
324
  @app.get("/sitemap.xml", response_class=PlainTextResponse)
325
  async def sitemap():
326
+ logger.debug("Sitemap accessed")
327
  posts = await mongo_db.blog_posts.find().to_list(100)
328
  current_date = datetime.utcnow().strftime('%Y-%m-%d')
329
  xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
 
383
  # Redirect /gradio to /chat
384
  @app.get("/gradio", response_class=RedirectResponse)
385
  async def launch_chatbot():
386
+ logger.debug("Redirecting /gradio to /chat")
387
  return RedirectResponse(url="/chat", status_code=302)
388
 
389
+ # Health check endpoint
390
+ @app.get("/health", response_class=PlainTextResponse)
391
+ async def health_check():
392
+ logger.debug("Health check endpoint accessed")
393
+ return "OK"
394
+
395
  if __name__ == "__main__":
396
+ logger.info(f"Starting uvicorn server on port {os.getenv('PORT', 7860)}")
397
  uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
utils/generation.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import os
2
  import re
3
  import json
@@ -34,11 +38,11 @@ LATEX_DELIMS = [
34
  HF_TOKEN = os.getenv("HF_TOKEN")
35
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
36
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
37
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
38
  FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
39
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
40
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
41
- TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf") # استبدال Qwen بنموذج متاح
42
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
43
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
44
  ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
@@ -46,7 +50,7 @@ TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
46
 
47
  # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
48
  PROVIDER_ENDPOINTS = {
49
- "huggingface": API_ENDPOINT # استخدام Hugging Face فقط
50
  }
51
 
52
  def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
@@ -56,6 +60,7 @@ def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str,
56
  headers={"Authorization": f"Bearer {api_key}"},
57
  timeout=30
58
  )
 
59
  if response.status_code == 200:
60
  logger.info(f"Model {model_name} is available at {API_ENDPOINT}")
61
  return True, api_key, API_ENDPOINT
@@ -76,7 +81,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
76
  model_name = MODEL_ALIASES[preferred_model]
77
  is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
78
  if is_available:
79
- logger.info(f"Selected preferred model {model_name} with endpoint {endpoint} for query: {query}")
80
  return model_name, endpoint
81
 
82
  query_lower = query.lower()
@@ -92,7 +97,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
92
  ]
93
  for pattern in image_patterns:
94
  if re.search(pattern, query_lower, re.IGNORECASE):
95
- logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
96
  return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
97
  available_models = [
98
  (MODEL_NAME, API_ENDPOINT),
@@ -102,7 +107,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
102
  for model_name, api_endpoint in available_models:
103
  is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
104
  if is_available:
105
- logger.info(f"Selected {model_name} with endpoint {endpoint} for query: {query}")
106
  return model_name, endpoint
107
  logger.error("No models available. Falling back to default.")
108
  return MODEL_NAME, API_ENDPOINT
@@ -150,7 +155,7 @@ def request_generation(
150
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
151
  task_type = "general"
152
  enhanced_system_prompt = system_prompt
153
- buffer = "" # تعريف buffer هنا لتجنب UnboundLocalError
154
 
155
  if model_name == ASR_MODEL and audio_data:
156
  task_type = "audio_transcription"
@@ -166,6 +171,7 @@ def request_generation(
166
  file=audio_file,
167
  response_format="text"
168
  )
 
169
  yield transcription
170
  cache[cache_key] = [transcription]
171
  return
@@ -185,6 +191,7 @@ def request_generation(
185
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
186
  audio_file.seek(0)
187
  audio_data = audio_file.read()
 
188
  yield audio_data
189
  cache[cache_key] = [audio_data]
190
  return
@@ -204,6 +211,7 @@ def request_generation(
204
  logits_per_image = outputs.logits_per_image
205
  probs = logits_per_image.softmax(dim=1)
206
  result = f"Image analysis result: {probs.tolist()}"
 
207
  if output_format == "audio":
208
  model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
209
  processor = AutoProcessor.from_pretrained(TTS_MODEL)
@@ -267,16 +275,18 @@ def request_generation(
267
 
268
  cached_chunks = []
269
  try:
270
- stream = client.chat.completions.create(
271
- model=model_name,
272
- messages=input_messages,
273
- temperature=temperature,
274
- max_tokens=max_new_tokens,
275
- stream=True,
276
- tools=tools,
277
- tool_choice=tool_choice,
278
- )
279
-
 
 
280
  reasoning_started = False
281
  reasoning_closed = False
282
  saw_visible_output = False
@@ -284,7 +294,8 @@ def request_generation(
284
  last_tool_args = None
285
 
286
  for chunk in stream:
287
- if chunk.choices[0].delta.content:
 
288
  content = chunk.choices[0].delta.content
289
  if content == "<|channel|>analysis<|message|>":
290
  if not reasoning_started:
@@ -308,7 +319,7 @@ def request_generation(
308
  buffer = ""
309
  continue
310
 
311
- if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
312
  tool_call = chunk.choices[0].delta.tool_calls[0]
313
  name = getattr(tool_call, "function", {}).get("name", None)
314
  args = getattr(tool_call, "function", {}).get("arguments", None)
@@ -318,7 +329,7 @@ def request_generation(
318
  last_tool_args = args
319
  continue
320
 
321
- if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
322
  if buffer:
323
  cached_chunks.append(buffer)
324
  yield buffer
@@ -404,18 +415,21 @@ def request_generation(
404
  yield f"Error: Fallback model {fallback_model} is not available."
405
  return
406
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
407
- stream = client.chat.completions.create(
408
- model=fallback_model,
409
- messages=input_messages,
410
- temperature=temperature,
411
- max_tokens=max_new_tokens,
412
- stream=True,
413
- tools=[],
414
- tool_choice="none",
415
- )
416
- buffer = "" # تعريف buffer للنموذج البديل
 
 
417
  for chunk in stream:
418
- if chunk.choices[0].delta.content:
 
419
  content = chunk.choices[0].delta.content
420
  if content == "<|channel|>analysis<|message|>":
421
  if not reasoning_started:
@@ -439,7 +453,7 @@ def request_generation(
439
  buffer = ""
440
  continue
441
 
442
- if chunk.choices[0].finish_reason in ("stop", "error", "length"):
443
  if buffer:
444
  cached_chunks.append(buffer)
445
  yield buffer
@@ -487,18 +501,21 @@ def request_generation(
487
  yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
488
  return
489
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
490
- stream = client.chat.completions.create(
491
- model=TERTIARY_MODEL_NAME,
492
- messages=input_messages,
493
- temperature=temperature,
494
- max_tokens=max_new_tokens,
495
- stream=True,
496
- tools=[],
497
- tool_choice="none",
498
- )
499
- buffer = "" # تعريف buffer للنموذج الثالث
 
 
500
  for chunk in stream:
501
- if chunk.choices[0].delta.content:
 
502
  content = chunk.choices[0].delta.content
503
  saw_visible_output = True
504
  buffer += content
@@ -507,7 +524,7 @@ def request_generation(
507
  yield buffer
508
  buffer = ""
509
  continue
510
- if chunk.choices[0].finish_reason in ("stop", "error", "length"):
511
  if buffer:
512
  cached_chunks.append(buffer)
513
  yield buffer
 
1
+ # utils/generation.py
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
  import os
6
  import re
7
  import json
 
38
  HF_TOKEN = os.getenv("HF_TOKEN")
39
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
40
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
41
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
42
  FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
43
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
44
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
45
+ TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
46
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
47
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
48
  ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 
50
 
51
  # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
52
  PROVIDER_ENDPOINTS = {
53
+ "huggingface": API_ENDPOINT
54
  }
55
 
56
  def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
 
60
  headers={"Authorization": f"Bearer {api_key}"},
61
  timeout=30
62
  )
63
+ logger.debug(f"Checking model {model_name}: {response.status_code} - {response.text}")
64
  if response.status_code == 200:
65
  logger.info(f"Model {model_name} is available at {API_ENDPOINT}")
66
  return True, api_key, API_ENDPOINT
 
81
  model_name = MODEL_ALIASES[preferred_model]
82
  is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
83
  if is_available:
84
+ logger.info(f"Selected preferred model {model_name} with endpoint {endpoint} for query: {query[:50]}...")
85
  return model_name, endpoint
86
 
87
  query_lower = query.lower()
 
97
  ]
98
  for pattern in image_patterns:
99
  if re.search(pattern, query_lower, re.IGNORECASE):
100
+ logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query[:50]}...")
101
  return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
102
  available_models = [
103
  (MODEL_NAME, API_ENDPOINT),
 
107
  for model_name, api_endpoint in available_models:
108
  is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
109
  if is_available:
110
+ logger.info(f"Selected {model_name} with endpoint {endpoint} for query: {query[:50]}...")
111
  return model_name, endpoint
112
  logger.error("No models available. Falling back to default.")
113
  return MODEL_NAME, API_ENDPOINT
 
155
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
156
  task_type = "general"
157
  enhanced_system_prompt = system_prompt
158
+ buffer = ""
159
 
160
  if model_name == ASR_MODEL and audio_data:
161
  task_type = "audio_transcription"
 
171
  file=audio_file,
172
  response_format="text"
173
  )
174
+ logger.debug(f"Transcription response: {transcription}")
175
  yield transcription
176
  cache[cache_key] = [transcription]
177
  return
 
191
  torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
192
  audio_file.seek(0)
193
  audio_data = audio_file.read()
194
+ logger.debug(f"Generated audio data of length: {len(audio_data)} bytes")
195
  yield audio_data
196
  cache[cache_key] = [audio_data]
197
  return
 
211
  logits_per_image = outputs.logits_per_image
212
  probs = logits_per_image.softmax(dim=1)
213
  result = f"Image analysis result: {probs.tolist()}"
214
+ logger.debug(f"Image analysis result: {result}")
215
  if output_format == "audio":
216
  model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
217
  processor = AutoProcessor.from_pretrained(TTS_MODEL)
 
275
 
276
  cached_chunks = []
277
  try:
278
+ payload = {
279
+ "model": model_name,
280
+ "messages": input_messages,
281
+ "temperature": temperature,
282
+ "max_tokens": max_new_tokens,
283
+ "stream": True,
284
+ "tools": tools,
285
+ "tool_choice": tool_choice
286
+ }
287
+ logger.debug(f"Sending payload to {selected_endpoint}/chat/completions: {json.dumps(payload, indent=2, ensure_ascii=False)}")
288
+
289
+ stream = client.chat.completions.create(**payload)
290
  reasoning_started = False
291
  reasoning_closed = False
292
  saw_visible_output = False
 
294
  last_tool_args = None
295
 
296
  for chunk in stream:
297
+ logger.debug(f"Received chunk: {chunk}")
298
+ if chunk.choices and chunk.choices[0].delta.content:
299
  content = chunk.choices[0].delta.content
300
  if content == "<|channel|>analysis<|message|>":
301
  if not reasoning_started:
 
319
  buffer = ""
320
  continue
321
 
322
+ if chunk.choices and chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
323
  tool_call = chunk.choices[0].delta.tool_calls[0]
324
  name = getattr(tool_call, "function", {}).get("name", None)
325
  args = getattr(tool_call, "function", {}).get("arguments", None)
 
329
  last_tool_args = args
330
  continue
331
 
332
+ if chunk.choices and chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
333
  if buffer:
334
  cached_chunks.append(buffer)
335
  yield buffer
 
415
  yield f"Error: Fallback model {fallback_model} is not available."
416
  return
417
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
418
+ payload = {
419
+ "model": fallback_model,
420
+ "messages": input_messages,
421
+ "temperature": temperature,
422
+ "max_tokens": max_new_tokens,
423
+ "stream": True,
424
+ "tools": [],
425
+ "tool_choice": "none"
426
+ }
427
+ logger.debug(f"Sending payload to {selected_endpoint}/chat/completions: {json.dumps(payload, indent=2, ensure_ascii=False)}")
428
+ stream = client.chat.completions.create(**payload)
429
+ buffer = ""
430
  for chunk in stream:
431
+ logger.debug(f"Received chunk from fallback: {chunk}")
432
+ if chunk.choices and chunk.choices[0].delta.content:
433
  content = chunk.choices[0].delta.content
434
  if content == "<|channel|>analysis<|message|>":
435
  if not reasoning_started:
 
453
  buffer = ""
454
  continue
455
 
456
+ if chunk.choices and chunk.choices[0].finish_reason in ("stop", "error", "length"):
457
  if buffer:
458
  cached_chunks.append(buffer)
459
  yield buffer
 
501
  yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
502
  return
503
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
504
+ payload = {
505
+ "model": TERTIARY_MODEL_NAME,
506
+ "messages": input_messages,
507
+ "temperature": temperature,
508
+ "max_tokens": max_new_tokens,
509
+ "stream": True,
510
+ "tools": [],
511
+ "tool_choice": "none"
512
+ }
513
+ logger.debug(f"Sending payload to {selected_endpoint}/chat/completions: {json.dumps(payload, indent=2, ensure_ascii=False)}")
514
+ stream = client.chat.completions.create(**payload)
515
+ buffer = ""
516
  for chunk in stream:
517
+ logger.debug(f"Received chunk from tertiary: {chunk}")
518
+ if chunk.choices and chunk.choices[0].delta.content:
519
  content = chunk.choices[0].delta.content
520
  saw_visible_output = True
521
  buffer += content
 
524
  yield buffer
525
  buffer = ""
526
  continue
527
+ if chunk.choices and chunk.choices[0].finish_reason in ("stop", "error", "length"):
528
  if buffer:
529
  cached_chunks.append(buffer)
530
  yield buffer