Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on 29 days ago

Commit

ff93199

verified ·

1 Parent(s): 3830a6b

Update main.py

Browse files

Files changed (1) hide show

main.py +11 -11

main.py CHANGED Viewed

@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
-app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.2 (Spacing Fixed)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
@@ -200,7 +200,7 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
             async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
                 current_event = None
                 accumulated_content = ""
-                first_token = True
                 async for line in sse.aiter_lines():
                     if not line: continue
@@ -217,13 +217,13 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
                             content_token = raw_data
                         # ### MAJOR FIX HERE ###
-                        # This logic robustly handles the leading space by only stripping
-                        # the very first non-empty token of the entire stream.
-                        if first_token:
-                            content_token = content_token.lstrip()
-                            # Only flip the flag if we've actually processed a token with content.
-                            if content_token:
-                                first_token = False
                         accumulated_content += content_token
                         completion_tokens += 1
@@ -332,7 +332,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
 @app.get("/")
 async def root():
-    return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.2"}
 @app.middleware("http")
 async def add_performance_headers(request, call_next):
@@ -340,5 +340,5 @@ async def add_performance_headers(request, call_next):
     response = await call_next(request)
     process_time = time.time() - start_time
     response.headers["X-Process-Time"] = str(round(process_time, 3))
-    response.headers["X-API-Version"] = "9.2.2"
     return response

     raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
 # FastAPI Init
+app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.3 (Raw Stream)")
 # --- Pydantic Models ---
 class ModelCard(BaseModel):
             async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
                 current_event = None
                 accumulated_content = ""
+                # first_token = True  <- REMOVED THIS
                 async for line in sse.aiter_lines():
                     if not line: continue
                             content_token = raw_data
                         # ### MAJOR FIX HERE ###
+                        # The lstrip() logic has been COMPLETELY REMOVED
+                        # to send the raw, unmodified token from Replicate.
+                        #
+                        # if first_token:
+                        #     content_token = content_token.lstrip()
+                        #     if content_token:
+                        #         first_token = False
                         accumulated_content += content_token
                         completion_tokens += 1
 @app.get("/")
 async def root():
+    return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.3"}
 @app.middleware("http")
 async def add_performance_headers(request, call_next):
     response = await call_next(request)
     process_time = time.time() - start_time
     response.headers["X-Process-Time"] = str(round(process_time, 3))
+    response.headers["X-API-Version"] = "9.2.3"
     return response