Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on 28 days ago

Commit

9f14d65

verified ·

1 Parent(s): ff93199

Update main.py

Browse files

Files changed (1) hide show

main.py +36 -16

main.py CHANGED Viewed

@@ -3,7 +3,8 @@ import httpx
 import json
 import time
 import asyncio
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import List, Dict, Any, Optional, Union, Literal
@@ -12,11 +13,30 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
 if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.3 (Raw Stream)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
@@ -200,7 +220,6 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
             async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
                 current_event = None
                 accumulated_content = ""
-                # first_token = True  <- REMOVED THIS
                 async for line in sse.aiter_lines():
                     if not line: continue
@@ -216,14 +235,7 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
                         except (json.JSONDecodeError, TypeError):
                             content_token = raw_data
-                        # ### MAJOR FIX HERE ###
-                        # The lstrip() logic has been COMPLETELY REMOVED
-                        # to send the raw, unmodified token from Replicate.
-                        #
-                        # if first_token:
-                        #     content_token = content_token.lstrip()
-                        #     if content_token:
-                        #         first_token = False
                         accumulated_content += content_token
                         completion_tokens += 1
@@ -234,7 +246,6 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
                             chunk = ChatCompletionChunk(id=request_id, created=int(time.time()), model=replicate_model_id, choices=[ChoiceDelta(index=0, delta=DeltaMessage(tool_calls=[tool_call]), finish_reason=None)])
                             yield f"data: {chunk.json()}\n\n"
                         else:
-                            # Only yield a chunk if there is content to send.
                             if content_token:
                                 chunk = ChatCompletionChunk(id=request_id, created=int(time.time()), model=replicate_model_id, choices=[ChoiceDelta(index=0, delta=DeltaMessage(content=content_token), finish_reason=None)])
                                 yield f"data: {chunk.json()}\n\n"
@@ -253,12 +264,18 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
     yield "data: [DONE]\n\n"
 # --- Endpoints ---
-@app.get("/v1/models")
 async def list_models():
     return ModelList(data=[ModelCard(id=k) for k in SUPPORTED_MODELS.keys()])
-@app.post("/v1/chat/completions")
 async def create_chat_completion(request: ChatCompletionRequest):
     if request.model not in SUPPORTED_MODELS:
         raise HTTPException(status_code=404, detail=f"Model not found. Available models: {list(SUPPORTED_MODELS.keys())}")
@@ -332,7 +349,10 @@ async def create_chat_completion(request: ChatCompletionRequest):
 @app.get("/")
 async def root():
-    return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.3"}
 @app.middleware("http")
 async def add_performance_headers(request, call_next):
@@ -340,5 +360,5 @@ async def add_performance_headers(request, call_next):
     response = await call_next(request)
     process_time = time.time() - start_time
     response.headers["X-Process-Time"] = str(round(process_time, 3))
-    response.headers["X-API-Version"] = "9.2.3"
     return response

 import json
 import time
 import asyncio
+from fastapi import FastAPI, HTTPException, Security, Depends, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from typing import List, Dict, Any, Optional, Union, Literal
 # Load environment variables
 load_dotenv()
 REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
+SERVER_API_KEY = os.getenv("SERVER_API_KEY") # <-- New key for server auth
 if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
+if not SERVER_API_KEY:
+    raise ValueError("SERVER_API_KEY environment variable not set. This is required to protect your server.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.4 (Server Auth Added)")
+# --- Authentication ---
+security = HTTPBearer()
+async def verify_api_key(credentials: HTTPAuthorizationCredentials = Security(security)):
+    """
+    Verify the API key provided in the Authorization header.
+    """
+    if credentials.scheme != "Bearer" or credentials.credentials != SERVER_API_KEY:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid or missing API key",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    return True
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
             async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
                 current_event = None
                 accumulated_content = ""
                 async for line in sse.aiter_lines():
                     if not line: continue
                         except (json.JSONDecodeError, TypeError):
                             content_token = raw_data
+                        # Removed the lstrip() logic to send raw tokens
                         accumulated_content += content_token
                         completion_tokens += 1
                             chunk = ChatCompletionChunk(id=request_id, created=int(time.time()), model=replicate_model_id, choices=[ChoiceDelta(index=0, delta=DeltaMessage(tool_calls=[tool_call]), finish_reason=None)])
                             yield f"data: {chunk.json()}\n\n"
                         else:
                             if content_token:
                                 chunk = ChatCompletionChunk(id=request_id, created=int(time.time()), model=replicate_model_id, choices=[ChoiceDelta(index=0, delta=DeltaMessage(content=content_token), finish_reason=None)])
                                 yield f"data: {chunk.json()}\n\n"
     yield "data: [DONE]\n\n"
 # --- Endpoints ---
+@app.get("/v1/models", dependencies=[Depends(verify_api_key)])
 async def list_models():
+    """
+    Protected endpoint to list available models.
+    """
     return ModelList(data=[ModelCard(id=k) for k in SUPPORTED_MODELS.keys()])
+@app.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)])
 async def create_chat_completion(request: ChatCompletionRequest):
+    """
+    Protected endpoint to create a chat completion.
+    """
     if request.model not in SUPPORTED_MODELS:
         raise HTTPException(status_code=404, detail=f"Model not found. Available models: {list(SUPPORTED_MODELS.keys())}")
 @app.get("/")
 async def root():
+    """
+    Root endpoint for health checks. Does not require authentication.
+    """
+    return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.4"}
 @app.middleware("http")
 async def add_performance_headers(request, call_next):
     response = await call_next(request)
     process_time = time.time() - start_time
     response.headers["X-Process-Time"] = str(round(process_time, 3))
+    response.headers["X-API-Version"] = "9.2.4"
     return response