Sync from GitHub 82cd055
Browse files
main.py
CHANGED
|
@@ -23,11 +23,11 @@ import re
|
|
| 23 |
import base64
|
| 24 |
import tempfile
|
| 25 |
import contextlib
|
| 26 |
-
from typing import Any, Dict, List, Optional, Tuple, Deque
|
| 27 |
|
| 28 |
from fastapi import FastAPI, HTTPException, Request
|
| 29 |
from fastapi.middleware.cors import CORSMiddleware
|
| 30 |
-
from pydantic import BaseModel
|
| 31 |
from starlette.responses import JSONResponse
|
| 32 |
from fastapi.responses import StreamingResponse, Response
|
| 33 |
import json
|
|
@@ -330,12 +330,84 @@ def load_video_frames_from_any(src: Dict[str, Any], max_frames: int = MAX_VIDEO_
|
|
| 330 |
|
| 331 |
|
| 332 |
class ChatRequest(BaseModel):
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
|
| 341 |
class Engine:
|
|
@@ -896,7 +968,27 @@ def health():
|
|
| 896 |
return JSONResponse({"ok": True, "modelReady": ready, "modelId": model_id, "error": err, "context": ctx})
|
| 897 |
|
| 898 |
|
| 899 |
-
@app.post(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 900 |
def chat_completions(request: Request, body: ChatRequest):
|
| 901 |
# Ensure engine is loaded
|
| 902 |
try:
|
|
|
|
| 23 |
import base64
|
| 24 |
import tempfile
|
| 25 |
import contextlib
|
| 26 |
+
from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
|
| 27 |
|
| 28 |
from fastapi import FastAPI, HTTPException, Request
|
| 29 |
from fastapi.middleware.cors import CORSMiddleware
|
| 30 |
+
from pydantic import BaseModel, ConfigDict, Field
|
| 31 |
from starlette.responses import JSONResponse
|
| 32 |
from fastapi.responses import StreamingResponse, Response
|
| 33 |
import json
|
|
|
|
| 330 |
|
| 331 |
|
| 332 |
class ChatRequest(BaseModel):
|
| 333 |
+
"""OpenAI-compatible Chat Completions request body."""
|
| 334 |
+
model: Optional[str] = Field(default=None, description="Model id (defaults to env MODEL_REPO_ID).")
|
| 335 |
+
messages: List[Dict[str, Any]] = Field(description="OpenAI-style messages array. Supports text, image_url/input_image, video_url/input_video parts.")
|
| 336 |
+
max_tokens: Optional[int] = Field(default=None, description="Max new tokens to generate.")
|
| 337 |
+
temperature: Optional[float] = Field(default=None, description="Sampling temperature.")
|
| 338 |
+
stream: Optional[bool] = Field(default=None, description="When true, returns Server-Sent Events stream.")
|
| 339 |
+
session_id: Optional[str] = Field(default=None, description="Optional session id for resumable SSE.")
|
| 340 |
+
# Pydantic v2 schema extras with rich examples
|
| 341 |
+
model_config = ConfigDict(
|
| 342 |
+
json_schema_extra={
|
| 343 |
+
"examples": [
|
| 344 |
+
{
|
| 345 |
+
"summary": "Text-only",
|
| 346 |
+
"value": {
|
| 347 |
+
"messages": [
|
| 348 |
+
{"role": "user", "content": "Hello, summarize the benefits of multimodal LLMs."}
|
| 349 |
+
],
|
| 350 |
+
"max_tokens": 128
|
| 351 |
+
}
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"summary": "Image by URL",
|
| 355 |
+
"value": {
|
| 356 |
+
"messages": [
|
| 357 |
+
{
|
| 358 |
+
"role": "user",
|
| 359 |
+
"content": [
|
| 360 |
+
{"type": "text", "text": "What is in this image?"},
|
| 361 |
+
{"type": "image_url", "image_url": {"url": "https://example.com/cat.jpg"}}
|
| 362 |
+
]
|
| 363 |
+
}
|
| 364 |
+
],
|
| 365 |
+
"max_tokens": 128
|
| 366 |
+
}
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"summary": "Video by URL (streaming SSE)",
|
| 370 |
+
"value": {
|
| 371 |
+
"messages": [
|
| 372 |
+
{
|
| 373 |
+
"role": "user",
|
| 374 |
+
"content": [
|
| 375 |
+
{"type": "text", "text": "Describe this clip briefly."},
|
| 376 |
+
{"type": "video_url", "video_url": {"url": "https://example.com/clip.mp4"}}
|
| 377 |
+
]
|
| 378 |
+
}
|
| 379 |
+
],
|
| 380 |
+
"stream": True,
|
| 381 |
+
"max_tokens": 128
|
| 382 |
+
}
|
| 383 |
+
}
|
| 384 |
+
]
|
| 385 |
+
}
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
class MessageModel(BaseModel):
|
| 389 |
+
role: Literal["system", "user", "assistant"]
|
| 390 |
+
content: str
|
| 391 |
+
|
| 392 |
+
class ChoiceModel(BaseModel):
|
| 393 |
+
index: int
|
| 394 |
+
message: MessageModel
|
| 395 |
+
finish_reason: Optional[str] = None
|
| 396 |
+
|
| 397 |
+
class UsageModel(BaseModel):
|
| 398 |
+
prompt_tokens: int
|
| 399 |
+
completion_tokens: int
|
| 400 |
+
total_tokens: int
|
| 401 |
+
|
| 402 |
+
class ChatCompletionResponse(BaseModel):
|
| 403 |
+
"""Non-streaming Chat Completions response (when stream=false)."""
|
| 404 |
+
id: str
|
| 405 |
+
object: str
|
| 406 |
+
created: int
|
| 407 |
+
model: str
|
| 408 |
+
choices: List[ChoiceModel]
|
| 409 |
+
usage: UsageModel
|
| 410 |
+
context: Dict[str, Any] = {}
|
| 411 |
|
| 412 |
|
| 413 |
class Engine:
|
|
|
|
| 968 |
return JSONResponse({"ok": True, "modelReady": ready, "modelId": model_id, "error": err, "context": ctx})
|
| 969 |
|
| 970 |
|
| 971 |
+
@app.post(
|
| 972 |
+
"/v1/chat/completions",
|
| 973 |
+
tags=["chat"],
|
| 974 |
+
response_model=ChatCompletionResponse,
|
| 975 |
+
responses={
|
| 976 |
+
200: {
|
| 977 |
+
"description": "When stream=true, the response is text/event-stream (SSE). When stream=false, JSON body matches ChatCompletionResponse.",
|
| 978 |
+
"content": {
|
| 979 |
+
"text/event-stream": {
|
| 980 |
+
"schema": {"type": "string"},
|
| 981 |
+
"examples": {
|
| 982 |
+
"sse": {
|
| 983 |
+
"summary": "SSE stream example",
|
| 984 |
+
"value": "id: sess-123:0\ndata: {\"id\":\"sess-123\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"}}]}\n\n"
|
| 985 |
+
}
|
| 986 |
+
}
|
| 987 |
+
}
|
| 988 |
+
},
|
| 989 |
+
}
|
| 990 |
+
},
|
| 991 |
+
)
|
| 992 |
def chat_completions(request: Request, body: ChatRequest):
|
| 993 |
# Ensure engine is loaded
|
| 994 |
try:
|