flash2

Paused

App Files Files Community

rkihacker commited on Sep 16

Commit

5bad7a1

verified ·

1 Parent(s): 7ee09b9

Update main.py

Browse files

Files changed (1) hide show

main.py +24 -55

main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import httpx
 from fastapi import FastAPI, Request, HTTPException
-from starlette.responses import StreamingResponse
 from starlette.background import BackgroundTask
 import os
 import random
@@ -9,34 +9,23 @@ import time
 from contextlib import asynccontextmanager
 # --- Production-Ready Configuration ---
-# All key settings are now configurable via environment variables.
-# 1. Logging Configuration
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 logging.basicConfig(
     level=LOG_LEVEL,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-# 2. Target URL
 TARGET_URL = os.getenv("TARGET_URL", "https://api.gmi-serving.com/v1/chat")
-# 3. Retry Logic Configuration
-# Default to 7 retries as requested.
 MAX_RETRIES = int(os.getenv("MAX_RETRIES", "7"))
-# Default retry codes now include 500. Configurable via a comma-separated string.
 DEFAULT_RETRY_CODES = "429,500,502,503,504"
 RETRY_CODES_STR = os.getenv("RETRY_CODES", DEFAULT_RETRY_CODES)
 try:
-    # Parse the comma-separated string into a set of integers.
     RETRY_STATUS_CODES = {int(code.strip()) for code in RETRY_CODES_STR.split(',')}
     logging.info(f"Will retry on the following status codes: {RETRY_STATUS_CODES}")
 except ValueError:
     logging.error(f"Invalid RETRY_CODES format: '{RETRY_CODES_STR}'. Falling back to default: {DEFAULT_RETRY_CODES}")
     RETRY_STATUS_CODES = {int(code.strip()) for code in DEFAULT_RETRY_CODES.split(',')}
 # --- Helper Function ---
 def generate_random_ip():
     """Generates a random, valid-looking IPv4 address."""
@@ -46,7 +35,6 @@ def generate_random_ip():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manages the lifecycle of the HTTPX client."""
-    # Using a longer timeout for the client itself, but no timeout per-request.
     async with httpx.AsyncClient(base_url=TARGET_URL, timeout=None) as client:
         app.state.http_client = client
         yield
@@ -54,23 +42,34 @@ async def lifespan(app: FastAPI):
 # Initialize the FastAPI app with the lifespan manager and disabled docs
 app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
-# --- Reverse Proxy Logic ---
-async def _reverse_proxy(request: Request):
     """
-    Forwards a request to the target URL with enhanced retry logic and latency logging.
     """
-    # Start timer for latency tracking. time.monotonic is used for reliable duration measurement.
     start_time = time.monotonic()
     client: httpx.AsyncClient = request.app.state.http_client
     url = httpx.URL(path=request.url.path, query=request.url.query.encode("utf-8"))
-    # --- Header Processing ---
     request_headers = dict(request.headers)
     request_headers.pop("host", None)
     random_ip = generate_random_ip()
-    logging.info(f"Client '{request.client.host}' proxied with spoofed IP: {random_ip}")
     specific_headers = {
         "accept": "application/json, text/plain, */*",
@@ -89,7 +88,6 @@ async def _reverse_proxy(request: Request):
     body = await request.body()
-    # --- Retry Logic ---
     last_exception = None
     for attempt in range(MAX_RETRIES):
         try:
@@ -98,23 +96,10 @@ async def _reverse_proxy(request: Request):
             )
             rp_resp = await client.send(rp_req, stream=True)
-            # If status is successful or not in our retry list, we are done.
-            if rp_resp.status_code not in RETRY_STATUS_CODES:
-                # Log latency and success before returning
                 duration_ms = (time.monotonic() - start_time) * 1000
-                logging.info(f"Request finished: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
-                return StreamingResponse(
-                    rp_resp.aiter_raw(),
-                    status_code=rp_resp.status_code,
-                    headers=rp_resp.headers,
-                    background=BackgroundTask(rp_resp.aclose),
-                )
-            # If we are on the last attempt, return the error response without retrying further.
-            if attempt == MAX_RETRIES - 1:
-                duration_ms = (time.monotonic() - start_time) * 1000
-                logging.error(f"Request failed after max retries: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
                 return StreamingResponse(
                     rp_resp.aiter_raw(),
@@ -123,35 +108,19 @@ async def _reverse_proxy(request: Request):
                     background=BackgroundTask(rp_resp.aclose),
                 )
-            # Log the retry attempt before closing the response and looping again.
             logging.warning(
-                f"Attempt {attempt + 1}/{MAX_RETRIES} failed with status {rp_resp.status_code}. Retrying..."
             )
             await rp_resp.aclose()
         except httpx.ConnectError as e:
             last_exception = e
-            logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} failed with connection error: {e}")
-    # This block is reached if all attempts fail with a connection error.
     duration_ms = (time.monotonic() - start_time) * 1000
     logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
     raise HTTPException(
         status_code=502,
         detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
-    )
-# --- API Endpoints ---
-@app.api_route(
-    "/completions/{full_path:path}",
-    methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"]
-)
-async def chat_proxy_handler(request: Request):
-    """Captures all requests under /completions/ and forwards them."""
-    return await _reverse_proxy(request)
-@app.get("/")
-async def health_check():
-    """Provides a basic health check endpoint."""
-    return {"status": "ok", "proxying_endpoint": "/completions", "target": "TypeGPT"}

 import httpx
 from fastapi import FastAPI, Request, HTTPException
+from starlette.responses import StreamingResponse, JSONResponse
 from starlette.background import BackgroundTask
 import os
 import random
 from contextlib import asynccontextmanager
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 logging.basicConfig(
     level=LOG_LEVEL,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 TARGET_URL = os.getenv("TARGET_URL", "https://api.gmi-serving.com/v1/chat")
 MAX_RETRIES = int(os.getenv("MAX_RETRIES", "7"))
 DEFAULT_RETRY_CODES = "429,500,502,503,504"
 RETRY_CODES_STR = os.getenv("RETRY_CODES", DEFAULT_RETRY_CODES)
 try:
     RETRY_STATUS_CODES = {int(code.strip()) for code in RETRY_CODES_STR.split(',')}
     logging.info(f"Will retry on the following status codes: {RETRY_STATUS_CODES}")
 except ValueError:
     logging.error(f"Invalid RETRY_CODES format: '{RETRY_CODES_STR}'. Falling back to default: {DEFAULT_RETRY_CODES}")
     RETRY_STATUS_CODES = {int(code.strip()) for code in DEFAULT_RETRY_CODES.split(',')}
 # --- Helper Function ---
 def generate_random_ip():
     """Generates a random, valid-looking IPv4 address."""
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manages the lifecycle of the HTTPX client."""
     async with httpx.AsyncClient(base_url=TARGET_URL, timeout=None) as client:
         app.state.http_client = client
         yield
 # Initialize the FastAPI app with the lifespan manager and disabled docs
 app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
+# --- API Endpoints ---
+# 1. Health Check Route (Defined FIRST)
+# This specific route will be matched before the catch-all proxy route.
+@app.get("/")
+async def health_check():
+    """Provides a basic health check endpoint."""
+    return JSONResponse({"status": "ok", "target": TARGET_URL})
+# 2. Catch-All Reverse Proxy Route (Defined SECOND)
+# This will capture ALL other requests (e.g., /completions, /v1/models, etc.)
+# and forward them. This eliminates any redirect issues.
+@app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
+async def reverse_proxy_handler(request: Request):
     """
+    A catch-all reverse proxy that forwards requests to the target URL with
+    enhanced retry logic and latency logging.
     """
     start_time = time.monotonic()
     client: httpx.AsyncClient = request.app.state.http_client
     url = httpx.URL(path=request.url.path, query=request.url.query.encode("utf-8"))
     request_headers = dict(request.headers)
     request_headers.pop("host", None)
     random_ip = generate_random_ip()
+    logging.info(f"Client '{request.client.host}' proxied with spoofed IP: {random_ip} for path: {url.path}")
     specific_headers = {
         "accept": "application/json, text/plain, */*",
     body = await request.body()
     last_exception = None
     for attempt in range(MAX_RETRIES):
         try:
             )
             rp_resp = await client.send(rp_req, stream=True)
+            if rp_resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
                 duration_ms = (time.monotonic() - start_time) * 1000
+                log_func = logging.info if rp_resp.is_success else logging.warning
+                log_func(f"Request finished: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
                 return StreamingResponse(
                     rp_resp.aiter_raw(),
                     background=BackgroundTask(rp_resp.aclose),
                 )
             logging.warning(
+                f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with status {rp_resp.status_code}. Retrying..."
             )
             await rp_resp.aclose()
         except httpx.ConnectError as e:
             last_exception = e
+            logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with connection error: {e}")
     duration_ms = (time.monotonic() - start_time) * 1000
     logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
     raise HTTPException(
         status_code=502,
         detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
+    )