Spaces:

rkihacker
/

R2OAI

Paused

App Files Files Community

rkihacker commited on Oct 21

Commit

bff4d10

verified ·

1 Parent(s): e58046c

Update main.py

Browse files

Files changed (1) hide show

main.py +26 -44

main.py CHANGED Viewed

@@ -23,7 +23,7 @@ POLLING_INTERVAL_SECONDS = 1  # How often to poll for updates
 # --- FastAPI App Initialization ---
 app = FastAPI(
     title="Replicate to OpenAI Compatibility Layer",
-    version="1.1.1 (SyntaxError Fixed)",
 )
 # --- Pydantic Models for OpenAI Compatibility ---
@@ -71,12 +71,9 @@ SUPPORTED_MODELS = {
 # --- Helper Functions ---
 def format_tools_for_prompt(tools: List[Tool]) -> str:
-    """Converts OpenAI tools to a string for the system prompt."""
     if not tools:
         return ""
     prompt = "You have access to the following tools. To use a tool, respond with a JSON object in the following format:\n"
-    # *** THIS IS THE CORRECTED LINE ***
     prompt += '{"type": "tool_call", "name": "tool_name", "arguments": {"arg_name": "value"}}\n\n'
     prompt += "Available tools:\n"
     for tool in tools:
@@ -84,7 +81,6 @@ def format_tools_for_prompt(tools: List[Tool]) -> str:
     return prompt
 def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
-    """Prepares the input payload for the Replicate API."""
     input_data = {}
     prompt_parts = []
     system_prompt = ""
@@ -127,13 +123,14 @@ def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, A
 async def stream_replicate_with_polling(model_id: str, payload: dict):
     """
-    Creates a prediction and then polls the 'get' URL to stream back results.
     """
     url = f"https://api.replicate.com/v1/models/{model_id}/predictions"
     headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}", "Content-Type": "application/json"}
     async with httpx.AsyncClient(timeout=300) as client:
-        # 1. Start the prediction
         try:
             response = await client.post(url, headers=headers, json={"input": payload})
             response.raise_for_status()
@@ -142,13 +139,14 @@ async def stream_replicate_with_polling(model_id: str, payload: dict):
             if not get_url:
                 error_detail = prediction.get("detail", "Failed to start prediction.")
-                yield f"data: {json.dumps({'error': error_detail})}\n\n"
                 return
         except httpx.HTTPStatusError as e:
-            yield f"data: {json.dumps({'error': str(e.response.text)})}\n\n"
             return
-        # 2. Poll the prediction 'get' URL for updates
         previous_output = ""
         status = ""
         while status not in ["succeeded", "failed", "canceled"]:
@@ -161,53 +159,44 @@ async def stream_replicate_with_polling(model_id: str, payload: dict):
                 if status == "failed":
                     error_detail = prediction_update.get("error", "Prediction failed.")
-                    yield f"data: {json.dumps({'error': error_detail})}\n\n"
                     break
                 if "output" in prediction_update and prediction_update["output"] is not None:
                     current_output = "".join(prediction_update["output"])
-                    new_chunk = current_output[len(previous_output):]
-                    if new_chunk:
                         chunk = {
-                            "id": prediction["id"],
-                            "object": "chat.completion.chunk",
-                            "created": int(time.time()),
-                            "model": model_id,
-                            "choices": [{"index": 0, "delta": {"content": new_chunk}, "finish_reason": None}]
                         }
-                        yield f"data: {json.dumps(chunk)}\n\n"
                         previous_output = current_output
-            except httpx.HTTPStatusError as e:
-                print(f"Warning: Polling failed with status {e.response.status_code}, retrying...")
             except Exception as e:
-                yield f"data: {json.dumps({'error': f'Polling error: {str(e)}'})}\n\n"
                 break
     # Send the final done signal
     done_chunk = {
-        "id": prediction["id"],
-        "object": "chat.completion.chunk",
-        "created": int(time.time()),
-        "model": model_id,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "stop" if status == "succeeded" else "error"}]
     }
-    yield f"data: {json.dumps(done_chunk)}\n\n"
-    yield "data: [DONE]\n\n"
 # --- API Endpoints ---
 @app.get("/v1/models", response_model=ModelList)
 async def list_models():
-    """Lists the available models."""
-    model_cards = [ModelCard(id=model_name) for model_name in SUPPORTED_MODELS.keys()]
-    return ModelList(data=model_cards)
 @app.post("/v1/chat/completions")
 async def create_chat_completion(request: OpenAIChatCompletionRequest):
-    """Creates a chat completion."""
     model_key = request.model
     if model_key not in SUPPORTED_MODELS:
         raise HTTPException(status_code=404, detail=f"Model not found. Supported models: {list(SUPPORTED_MODELS.keys())}")
@@ -228,11 +217,8 @@ async def create_chat_completion(request: OpenAIChatCompletionRequest):
             response.raise_for_status()
             prediction = response.json()
-            output = prediction.get("output", "")
-            if isinstance(output, list):
-                output = "".join(output)
-            # Basic tool call detection
             try:
                 tool_call_data = json.loads(output)
                 if tool_call_data.get("type") == "tool_call":
@@ -242,15 +228,11 @@ async def create_chat_completion(request: OpenAIChatCompletionRequest):
             except (json.JSONDecodeError, TypeError):
                 message_content, tool_calls = output, None
-            completion_response = {
-                "id": prediction["id"],
-                "object": "chat.completion",
-                "created": int(time.time()),
-                "model": model_key,
                 "choices": [{"index": 0, "message": {"role": "assistant", "content": message_content, "tool_calls": tool_calls}, "finish_reason": "stop"}],
                 "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-            }
-            return JSONResponse(content=completion_response)
         except httpx.HTTPStatusError as e:
             raise HTTPException(status_code=e.response.status_code, detail=e.response.text)

 # --- FastAPI App Initialization ---
 app = FastAPI(
     title="Replicate to OpenAI Compatibility Layer",
+    version="1.2.0 (Streaming Fixed)",
 )
 # --- Pydantic Models for OpenAI Compatibility ---
 # --- Helper Functions ---
 def format_tools_for_prompt(tools: List[Tool]) -> str:
     if not tools:
         return ""
     prompt = "You have access to the following tools. To use a tool, respond with a JSON object in the following format:\n"
     prompt += '{"type": "tool_call", "name": "tool_name", "arguments": {"arg_name": "value"}}\n\n'
     prompt += "Available tools:\n"
     for tool in tools:
     return prompt
 def prepare_replicate_input(request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
     input_data = {}
     prompt_parts = []
     system_prompt = ""
 async def stream_replicate_with_polling(model_id: str, payload: dict):
     """
+    Creates a prediction and polls the 'get' URL to stream back results.
+    Yields raw JSON strings for EventSourceResponse to handle.
     """
     url = f"https://api.replicate.com/v1/models/{model_id}/predictions"
     headers = {"Authorization": f"Bearer {REPLICATE_API_TOKEN}", "Content-Type": "application/json"}
     async with httpx.AsyncClient(timeout=300) as client:
+        prediction = None
         try:
             response = await client.post(url, headers=headers, json={"input": payload})
             response.raise_for_status()
             if not get_url:
                 error_detail = prediction.get("detail", "Failed to start prediction.")
+                error_chunk = {"error": {"message": error_detail, "type": "api_error", "code": 500}}
+                yield json.dumps(error_chunk)
                 return
         except httpx.HTTPStatusError as e:
+            error_chunk = {"error": {"message": e.response.text, "type": "api_error", "code": e.response.status_code}}
+            yield json.dumps(error_chunk)
             return
         previous_output = ""
         status = ""
         while status not in ["succeeded", "failed", "canceled"]:
                 if status == "failed":
                     error_detail = prediction_update.get("error", "Prediction failed.")
+                    chunk = {"choices": [{"delta": {"content": f"\n\n[ERROR: {error_detail}]"}, "finish_reason": "error"}]}
+                    yield json.dumps(chunk)
                     break
                 if "output" in prediction_update and prediction_update["output"] is not None:
                     current_output = "".join(prediction_update["output"])
+                    new_chunk_text = current_output[len(previous_output):]
+                    if new_chunk_text:
                         chunk = {
+                            "id": prediction["id"], "object": "chat.completion.chunk", "created": int(time.time()), "model": model_id,
+                            "choices": [{"index": 0, "delta": {"content": new_chunk_text}, "finish_reason": None}]
                         }
+                        yield json.dumps(chunk) # *** FIX: Yield raw JSON string
                         previous_output = current_output
             except Exception as e:
+                error_chunk = {"error": {"message": f"Polling error: {str(e)}", "type": "internal_error", "code": 500}}
+                yield json.dumps(error_chunk)
                 break
     # Send the final done signal
     done_chunk = {
+        "id": prediction["id"], "object": "chat.completion.chunk", "created": int(time.time()), "model": model_id,
         "choices": [{"index": 0, "delta": {}, "finish_reason": "stop" if status == "succeeded" else "error"}]
     }
+    yield json.dumps(done_chunk) # *** FIX: Yield raw JSON string
+    yield "[DONE]" # *** FIX: Yield the special [DONE] marker
 # --- API Endpoints ---
 @app.get("/v1/models", response_model=ModelList)
 async def list_models():
+    return ModelList(data=[ModelCard(id=model_name) for model_name in SUPPORTED_MODELS.keys()])
 @app.post("/v1/chat/completions")
 async def create_chat_completion(request: OpenAIChatCompletionRequest):
     model_key = request.model
     if model_key not in SUPPORTED_MODELS:
         raise HTTPException(status_code=404, detail=f"Model not found. Supported models: {list(SUPPORTED_MODELS.keys())}")
             response.raise_for_status()
             prediction = response.json()
+            output = "".join(prediction.get("output", []))
             try:
                 tool_call_data = json.loads(output)
                 if tool_call_data.get("type") == "tool_call":
             except (json.JSONDecodeError, TypeError):
                 message_content, tool_calls = output, None
+            return JSONResponse(content={
+                "id": prediction["id"], "object": "chat.completion", "created": int(time.time()), "model": model_key,
                 "choices": [{"index": 0, "message": {"role": "assistant", "content": message_content, "tool_calls": tool_calls}, "finish_reason": "stop"}],
                 "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+            })
         except httpx.HTTPStatusError as e:
             raise HTTPException(status_code=e.response.status_code, detail=e.response.text)