flash2

Paused

App Files Files Community

rkihacker commited on Sep 21

Commit

b3b4e9a

verified ·

1 Parent(s): 9e5e128

Update main.py

Browse files

Files changed (1) hide show

main.py +19 -50

main.py CHANGED Viewed

@@ -7,7 +7,6 @@ import random
 import logging
 import time
 from contextlib import asynccontextmanager
-import asyncio
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
@@ -46,17 +45,20 @@ app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
 # --- API Endpoints ---
 # 1. Health Check Route (Defined FIRST)
 @app.get("/")
 async def health_check():
     """Provides a basic health check endpoint."""
     return JSONResponse({"status": "ok", "target": TARGET_URL})
 # 2. Catch-All Reverse Proxy Route (Defined SECOND)
 @app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
 async def reverse_proxy_handler(request: Request):
     """
     A catch-all reverse proxy that forwards requests to the target URL with
-    enhanced retry logic, latency logging, and an initial processing message on delay.
     """
     start_time = time.monotonic()
@@ -82,7 +84,6 @@ async def reverse_proxy_handler(request: Request):
     body = await request.body()
     last_exception = None
-    rp_resp = None
     for attempt in range(MAX_RETRIES):
         try:
             rp_req = client.build_request(
@@ -91,62 +92,30 @@ async def reverse_proxy_handler(request: Request):
             rp_resp = await client.send(rp_req, stream=True)
             if rp_resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
-                break  # Exit loop on success or last retry
             logging.warning(
                 f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with status {rp_resp.status_code}. Retrying..."
             )
             await rp_resp.aclose()
-            rp_resp = None # Ensure response is not carried over
         except httpx.ConnectError as e:
             last_exception = e
             logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with connection error: {e}")
-    if rp_resp is None:
-        duration_ms = (time.monotonic() - start_time) * 1000
-        logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
-        raise HTTPException(
-            status_code=502,
-            detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
-        )
     duration_ms = (time.monotonic() - start_time) * 1000
-    log_func = logging.info if rp_resp.is_success else logging.warning
-    log_func(f"Request headers received: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
-    async def body_generator(response: httpx.Response):
-        """
-        Streams the response body. If the first chunk takes too long,
-        it sends a processing message first.
-        """
-        response_iterator = response.aiter_raw()
-        first_chunk = None
-        try:
-            # Wait for the first chunk of the body with a timeout
-            first_chunk = await asyncio.wait_for(response_iterator.__anext__(), timeout=1.5)
-        except asyncio.TimeoutError:
-            # If timeout occurs, send the processing message
-            logging.warning(f"Response from target timed out. Sending processing message for {url.path}")
-            processing_message = ':NiansuhAI Proccessing:\n\n'
-            yield processing_message.encode('utf-8')
-        except StopAsyncIteration:
-            # The response body is empty
-            pass
-        if first_chunk is not None:
-            yield first_chunk
-        # Yield the rest of the body
-        async for chunk in response_iterator:
-            yield chunk
-        final_duration_ms = (time.monotonic() - start_time) * 1000
-        logging.info(f"Request finished streaming: {request.method} {request.url.path} status_code={response.status_code} total_latency={final_duration_ms:.2f}ms")
-    return StreamingResponse(
-        body_generator(rp_resp),
-        status_code=rp_resp.status_code,
-        headers=rp_resp.headers,
-        background=BackgroundTask(rp_resp.aclose),
     )

 import logging
 import time
 from contextlib import asynccontextmanager
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 # --- API Endpoints ---
 # 1. Health Check Route (Defined FIRST)
+# This specific route will be matched before the catch-all proxy route.
 @app.get("/")
 async def health_check():
     """Provides a basic health check endpoint."""
     return JSONResponse({"status": "ok", "target": TARGET_URL})
 # 2. Catch-All Reverse Proxy Route (Defined SECOND)
+# This will capture ALL other requests (e.g., /completions, /v1/models, etc.)
+# and forward them. This eliminates any redirect issues.
 @app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
 async def reverse_proxy_handler(request: Request):
     """
     A catch-all reverse proxy that forwards requests to the target URL with
+    enhanced retry logic and latency logging.
     """
     start_time = time.monotonic()
     body = await request.body()
     last_exception = None
     for attempt in range(MAX_RETRIES):
         try:
             rp_req = client.build_request(
             rp_resp = await client.send(rp_req, stream=True)
             if rp_resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
+                duration_ms = (time.monotonic() - start_time) * 1000
+                log_func = logging.info if rp_resp.is_success else logging.warning
+                log_func(f"Request finished: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
+                return StreamingResponse(
+                    rp_resp.aiter_raw(),
+                    status_code=rp_resp.status_code,
+                    headers=rp_resp.headers,
+                    background=BackgroundTask(rp_resp.aclose),
+                )
             logging.warning(
                 f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with status {rp_resp.status_code}. Retrying..."
             )
             await rp_resp.aclose()
         except httpx.ConnectError as e:
             last_exception = e
             logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with connection error: {e}")
     duration_ms = (time.monotonic() - start_time) * 1000
+    logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
+    raise HTTPException(
+        status_code=502,
+        detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
     )