Update main.py
Browse files
main.py
CHANGED
|
@@ -16,7 +16,7 @@ if not REPLICATE_API_TOKEN:
|
|
| 16 |
raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
|
| 17 |
|
| 18 |
# FastAPI Init
|
| 19 |
-
app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.
|
| 20 |
|
| 21 |
# --- Pydantic Models ---
|
| 22 |
class ModelCard(BaseModel):
|
|
@@ -200,7 +200,7 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
|
|
| 200 |
async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
|
| 201 |
current_event = None
|
| 202 |
accumulated_content = ""
|
| 203 |
-
first_token = True
|
| 204 |
|
| 205 |
async for line in sse.aiter_lines():
|
| 206 |
if not line: continue
|
|
@@ -217,13 +217,13 @@ async def stream_replicate_response(replicate_model_id: str, input_payload: dict
|
|
| 217 |
content_token = raw_data
|
| 218 |
|
| 219 |
# ### MAJOR FIX HERE ###
|
| 220 |
-
#
|
| 221 |
-
# the
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
|
| 228 |
accumulated_content += content_token
|
| 229 |
completion_tokens += 1
|
|
@@ -332,7 +332,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
| 332 |
|
| 333 |
@app.get("/")
|
| 334 |
async def root():
|
| 335 |
-
return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.
|
| 336 |
|
| 337 |
@app.middleware("http")
|
| 338 |
async def add_performance_headers(request, call_next):
|
|
@@ -340,5 +340,5 @@ async def add_performance_headers(request, call_next):
|
|
| 340 |
response = await call_next(request)
|
| 341 |
process_time = time.time() - start_time
|
| 342 |
response.headers["X-Process-Time"] = str(round(process_time, 3))
|
| 343 |
-
response.headers["X-API-Version"] = "9.2.
|
| 344 |
return response
|
|
|
|
| 16 |
raise ValueError("REPLICATE_API_TOKEN environment variable not set.")
|
| 17 |
|
| 18 |
# FastAPI Init
|
| 19 |
+
app = FastAPI(title="Replicate to OpenAI Compatibility Layer", version="9.2.3 (Raw Stream)")
|
| 20 |
|
| 21 |
# --- Pydantic Models ---
|
| 22 |
class ModelCard(BaseModel):
|
|
|
|
| 200 |
async with client.stream("GET", stream_url, headers={"Accept": "text/event-stream"}, timeout=None) as sse:
|
| 201 |
current_event = None
|
| 202 |
accumulated_content = ""
|
| 203 |
+
# first_token = True <- REMOVED THIS
|
| 204 |
|
| 205 |
async for line in sse.aiter_lines():
|
| 206 |
if not line: continue
|
|
|
|
| 217 |
content_token = raw_data
|
| 218 |
|
| 219 |
# ### MAJOR FIX HERE ###
|
| 220 |
+
# The lstrip() logic has been COMPLETELY REMOVED
|
| 221 |
+
# to send the raw, unmodified token from Replicate.
|
| 222 |
+
#
|
| 223 |
+
# if first_token:
|
| 224 |
+
# content_token = content_token.lstrip()
|
| 225 |
+
# if content_token:
|
| 226 |
+
# first_token = False
|
| 227 |
|
| 228 |
accumulated_content += content_token
|
| 229 |
completion_tokens += 1
|
|
|
|
| 332 |
|
| 333 |
@app.get("/")
|
| 334 |
async def root():
|
| 335 |
+
return {"message": "Replicate to OpenAI Compatibility Layer API", "version": "9.2.3"}
|
| 336 |
|
| 337 |
@app.middleware("http")
|
| 338 |
async def add_performance_headers(request, call_next):
|
|
|
|
| 340 |
response = await call_next(request)
|
| 341 |
process_time = time.time() - start_time
|
| 342 |
response.headers["X-Process-Time"] = str(round(process_time, 3))
|
| 343 |
+
response.headers["X-API-Version"] = "9.2.3"
|
| 344 |
return response
|