Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

185d724

verified ·

1 Parent(s): 5d3f475

Update main.py

Browse files

Files changed (1) hide show

main.py +11 -11

main.py CHANGED Viewed

@@ -22,7 +22,7 @@ if not SERVER_API_KEY:
     raise ValueError("SERVER_API_KEY environment variable not set. This is required to protect your server.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.7 (Non-Stream Fix)")
 # --- Authentication ---
 security = HTTPBearer()
@@ -241,7 +241,8 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
                         except (json.JSONDecodeError, TypeError):
                             content_token = raw_data
-                        # Removed the lstrip() logic to send raw tokens
                         accumulated_content += content_token
                         completion_tokens += 1
@@ -294,7 +295,6 @@ async def create_chat_completion(request: ChatCompletionRequest):
         "top_p": request.top_p or 1.0
     }
-    # Only add max_new_tokens if the user *actually* provided it.
     if request.max_tokens is not None:
         replicate_input["max_new_tokens"] = request.max_tokens
@@ -320,8 +320,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
             resp.raise_for_status()
             pred = resp.json()
-            # ### MAJOR FIX HERE (Non-Streaming Join Error) ###
-            # Robustly handle the 'output' field which could be a list, string, or null
             raw_output = pred.get("output")
             if isinstance(raw_output, list):
@@ -329,10 +328,12 @@ async def create_chat_completion(request: ChatCompletionRequest):
             elif isinstance(raw_output, str):
                 output = raw_output          # Handle if it's just a single string
             else:
-                # Handle None, null, int, bool, or other unexpected types
                 output = ""
-            output = output.strip() # Clean up any leading/trailing whitespace
             end_time = time.time()
             prompt_tokens = len(replicate_input.get("prompt", "")) // 4
@@ -367,7 +368,6 @@ async def create_chat_completion(request: ChatCompletionRequest):
         except httpx.HTTPStatusError as e:
             raise HTTPException(status_code=e.response.status_code, detail=f"Error from Replicate API: {e.response.text}")
         except Exception as e:
-            # Catch the join error and any others
             raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
 @app.get("/")
@@ -375,7 +375,7 @@ async def root():
     """
     Root endpoint for health checks. Does not require authentication.
     """
-    return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.7"}
 @app.middleware("http")
 async def add_performance_headers(request, call_next):
@@ -383,5 +383,5 @@ async def add_performance_headers(request, call_next):
     response = await call_next(request)
     process_time = time.time() - start_time
     response.headers["X-Process-Time"] = str(round(process_time, 3))
-    response.headers["X-API-Version"] = "9.2.7"
     return response

     raise ValueError("SERVER_API_KEY environment variable not set. This is required to protect your server.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.8 (Raw Output Fix)")
 # --- Authentication ---
 security = HTTPBearer()
                         except (json.JSONDecodeError, TypeError):
                             content_token = raw_data
+                        # There is NO lstrip() or strip() here.
+                        # This sends the raw, unmodified token.
                         accumulated_content += content_token
                         completion_tokens += 1
         "top_p": request.top_p or 1.0
     }
     if request.max_tokens is not None:
         replicate_input["max_new_tokens"] = request.max_tokens
             resp.raise_for_status()
             pred = resp.json()
+            # Handle the 'output' field which could be a list, string, or null
             raw_output = pred.get("output")
             if isinstance(raw_output, list):
             elif isinstance(raw_output, str):
                 output = raw_output          # Handle if it's just a single string
             else:
                 output = ""
+            # ### MAJOR FIX HERE (Non-Streaming) ###
+            # Removed output.strip() to return the raw response,
+            # even if it's just a space.
+            # output = output.strip() # <-- REMOVED
             end_time = time.time()
             prompt_tokens = len(replicate_input.get("prompt", "")) // 4
         except httpx.HTTPStatusError as e:
             raise HTTPException(status_code=e.response.status_code, detail=f"Error from Replicate API: {e.response.text}")
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
 @app.get("/")
     """
     Root endpoint for health checks. Does not require authentication.
     """
+    return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.8"}
 @app.middleware("http")
 async def add_performance_headers(request, call_next):
     response = await call_next(request)
     process_time = time.time() - start_time
     response.headers["X-Process-Time"] = str(round(process_time, 3))
+    response.headers["X-API-Version"] = "9.2.8"
     return response