flash2

Paused

App Files Files Community

rkihacker commited on Sep 21

Commit

9e5e128

verified ·

1 Parent(s): 5e05fcc

Update main.py

Browse files

Files changed (1) hide show

main.py +50 -19

main.py CHANGED Viewed

@@ -7,6 +7,7 @@ import random
 import logging
 import time
 from contextlib import asynccontextmanager
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
@@ -45,20 +46,17 @@ app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
 # --- API Endpoints ---
 # 1. Health Check Route (Defined FIRST)
-# This specific route will be matched before the catch-all proxy route.
 @app.get("/")
 async def health_check():
     """Provides a basic health check endpoint."""
     return JSONResponse({"status": "ok", "target": TARGET_URL})
 # 2. Catch-All Reverse Proxy Route (Defined SECOND)
-# This will capture ALL other requests (e.g., /completions, /v1/models, etc.)
-# and forward them. This eliminates any redirect issues.
 @app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
 async def reverse_proxy_handler(request: Request):
     """
     A catch-all reverse proxy that forwards requests to the target URL with
-    enhanced retry logic and latency logging.
     """
     start_time = time.monotonic()
@@ -84,6 +82,7 @@ async def reverse_proxy_handler(request: Request):
     body = await request.body()
     last_exception = None
     for attempt in range(MAX_RETRIES):
         try:
             rp_req = client.build_request(
@@ -92,30 +91,62 @@ async def reverse_proxy_handler(request: Request):
             rp_resp = await client.send(rp_req, stream=True)
             if rp_resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
-                duration_ms = (time.monotonic() - start_time) * 1000
-                log_func = logging.info if rp_resp.is_success else logging.warning
-                log_func(f"Request finished: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
-                return StreamingResponse(
-                    rp_resp.aiter_raw(),
-                    status_code=rp_resp.status_code,
-                    headers=rp_resp.headers,
-                    background=BackgroundTask(rp_resp.aclose),
-                )
             logging.warning(
                 f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with status {rp_resp.status_code}. Retrying..."
             )
             await rp_resp.aclose()
         except httpx.ConnectError as e:
             last_exception = e
             logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with connection error: {e}")
     duration_ms = (time.monotonic() - start_time) * 1000
-    logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
-    raise HTTPException(
-        status_code=502,
-        detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
     )

 import logging
 import time
 from contextlib import asynccontextmanager
+import asyncio
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 # --- API Endpoints ---
 # 1. Health Check Route (Defined FIRST)
 @app.get("/")
 async def health_check():
     """Provides a basic health check endpoint."""
     return JSONResponse({"status": "ok", "target": TARGET_URL})
 # 2. Catch-All Reverse Proxy Route (Defined SECOND)
 @app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
 async def reverse_proxy_handler(request: Request):
     """
     A catch-all reverse proxy that forwards requests to the target URL with
+    enhanced retry logic, latency logging, and an initial processing message on delay.
     """
     start_time = time.monotonic()
     body = await request.body()
     last_exception = None
+    rp_resp = None
     for attempt in range(MAX_RETRIES):
         try:
             rp_req = client.build_request(
             rp_resp = await client.send(rp_req, stream=True)
             if rp_resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
+                break  # Exit loop on success or last retry
             logging.warning(
                 f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with status {rp_resp.status_code}. Retrying..."
             )
             await rp_resp.aclose()
+            rp_resp = None # Ensure response is not carried over
         except httpx.ConnectError as e:
             last_exception = e
             logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with connection error: {e}")
+    if rp_resp is None:
+        duration_ms = (time.monotonic() - start_time) * 1000
+        logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
+        raise HTTPException(
+            status_code=502,
+            detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
+        )
     duration_ms = (time.monotonic() - start_time) * 1000
+    log_func = logging.info if rp_resp.is_success else logging.warning
+    log_func(f"Request headers received: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
+    async def body_generator(response: httpx.Response):
+        """
+        Streams the response body. If the first chunk takes too long,
+        it sends a processing message first.
+        """
+        response_iterator = response.aiter_raw()
+        first_chunk = None
+        try:
+            # Wait for the first chunk of the body with a timeout
+            first_chunk = await asyncio.wait_for(response_iterator.__anext__(), timeout=1.5)
+        except asyncio.TimeoutError:
+            # If timeout occurs, send the processing message
+            logging.warning(f"Response from target timed out. Sending processing message for {url.path}")
+            processing_message = ':NiansuhAI Proccessing:\n\n'
+            yield processing_message.encode('utf-8')
+        except StopAsyncIteration:
+            # The response body is empty
+            pass
+        if first_chunk is not None:
+            yield first_chunk
+        # Yield the rest of the body
+        async for chunk in response_iterator:
+            yield chunk
+        final_duration_ms = (time.monotonic() - start_time) * 1000
+        logging.info(f"Request finished streaming: {request.method} {request.url.path} status_code={response.status_code} total_latency={final_duration_ms:.2f}ms")
+    return StreamingResponse(
+        body_generator(rp_resp),
+        status_code=rp_resp.status_code,
+        headers=rp_resp.headers,
+        background=BackgroundTask(rp_resp.aclose),
     )