KillerKing93 commited on
Commit
01c78c5
·
verified ·
1 Parent(s): 9f99713

Sync from GitHub 82cd055

Browse files
Files changed (1) hide show
  1. main.py +101 -9
main.py CHANGED
@@ -23,11 +23,11 @@ import re
23
  import base64
24
  import tempfile
25
  import contextlib
26
- from typing import Any, Dict, List, Optional, Tuple, Deque
27
 
28
  from fastapi import FastAPI, HTTPException, Request
29
  from fastapi.middleware.cors import CORSMiddleware
30
- from pydantic import BaseModel
31
  from starlette.responses import JSONResponse
32
  from fastapi.responses import StreamingResponse, Response
33
  import json
@@ -330,12 +330,84 @@ def load_video_frames_from_any(src: Dict[str, Any], max_frames: int = MAX_VIDEO_
330
 
331
 
332
  class ChatRequest(BaseModel):
333
- model: Optional[str] = None
334
- messages: List[Dict[str, Any]]
335
- max_tokens: Optional[int] = None
336
- temperature: Optional[float] = None
337
- stream: Optional[bool] = None
338
- session_id: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
 
341
  class Engine:
@@ -896,7 +968,27 @@ def health():
896
  return JSONResponse({"ok": True, "modelReady": ready, "modelId": model_id, "error": err, "context": ctx})
897
 
898
 
899
- @app.post("/v1/chat/completions", tags=["chat"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900
  def chat_completions(request: Request, body: ChatRequest):
901
  # Ensure engine is loaded
902
  try:
 
23
  import base64
24
  import tempfile
25
  import contextlib
26
+ from typing import Any, Dict, List, Optional, Tuple, Deque, Literal
27
 
28
  from fastapi import FastAPI, HTTPException, Request
29
  from fastapi.middleware.cors import CORSMiddleware
30
+ from pydantic import BaseModel, ConfigDict, Field
31
  from starlette.responses import JSONResponse
32
  from fastapi.responses import StreamingResponse, Response
33
  import json
 
330
 
331
 
332
  class ChatRequest(BaseModel):
333
+ """OpenAI-compatible Chat Completions request body."""
334
+ model: Optional[str] = Field(default=None, description="Model id (defaults to env MODEL_REPO_ID).")
335
+ messages: List[Dict[str, Any]] = Field(description="OpenAI-style messages array. Supports text, image_url/input_image, video_url/input_video parts.")
336
+ max_tokens: Optional[int] = Field(default=None, description="Max new tokens to generate.")
337
+ temperature: Optional[float] = Field(default=None, description="Sampling temperature.")
338
+ stream: Optional[bool] = Field(default=None, description="When true, returns Server-Sent Events stream.")
339
+ session_id: Optional[str] = Field(default=None, description="Optional session id for resumable SSE.")
340
+ # Pydantic v2 schema extras with rich examples
341
+ model_config = ConfigDict(
342
+ json_schema_extra={
343
+ "examples": [
344
+ {
345
+ "summary": "Text-only",
346
+ "value": {
347
+ "messages": [
348
+ {"role": "user", "content": "Hello, summarize the benefits of multimodal LLMs."}
349
+ ],
350
+ "max_tokens": 128
351
+ }
352
+ },
353
+ {
354
+ "summary": "Image by URL",
355
+ "value": {
356
+ "messages": [
357
+ {
358
+ "role": "user",
359
+ "content": [
360
+ {"type": "text", "text": "What is in this image?"},
361
+ {"type": "image_url", "image_url": {"url": "https://example.com/cat.jpg"}}
362
+ ]
363
+ }
364
+ ],
365
+ "max_tokens": 128
366
+ }
367
+ },
368
+ {
369
+ "summary": "Video by URL (streaming SSE)",
370
+ "value": {
371
+ "messages": [
372
+ {
373
+ "role": "user",
374
+ "content": [
375
+ {"type": "text", "text": "Describe this clip briefly."},
376
+ {"type": "video_url", "video_url": {"url": "https://example.com/clip.mp4"}}
377
+ ]
378
+ }
379
+ ],
380
+ "stream": True,
381
+ "max_tokens": 128
382
+ }
383
+ }
384
+ ]
385
+ }
386
+ )
387
+
388
+ class MessageModel(BaseModel):
389
+ role: Literal["system", "user", "assistant"]
390
+ content: str
391
+
392
+ class ChoiceModel(BaseModel):
393
+ index: int
394
+ message: MessageModel
395
+ finish_reason: Optional[str] = None
396
+
397
+ class UsageModel(BaseModel):
398
+ prompt_tokens: int
399
+ completion_tokens: int
400
+ total_tokens: int
401
+
402
+ class ChatCompletionResponse(BaseModel):
403
+ """Non-streaming Chat Completions response (when stream=false)."""
404
+ id: str
405
+ object: str
406
+ created: int
407
+ model: str
408
+ choices: List[ChoiceModel]
409
+ usage: UsageModel
410
+ context: Dict[str, Any] = {}
411
 
412
 
413
  class Engine:
 
968
  return JSONResponse({"ok": True, "modelReady": ready, "modelId": model_id, "error": err, "context": ctx})
969
 
970
 
971
+ @app.post(
972
+ "/v1/chat/completions",
973
+ tags=["chat"],
974
+ response_model=ChatCompletionResponse,
975
+ responses={
976
+ 200: {
977
+ "description": "When stream=true, the response is text/event-stream (SSE). When stream=false, JSON body matches ChatCompletionResponse.",
978
+ "content": {
979
+ "text/event-stream": {
980
+ "schema": {"type": "string"},
981
+ "examples": {
982
+ "sse": {
983
+ "summary": "SSE stream example",
984
+ "value": "id: sess-123:0\ndata: {\"id\":\"sess-123\",\"object\":\"chat.completion.chunk\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\"}}]}\n\n"
985
+ }
986
+ }
987
+ }
988
+ },
989
+ }
990
+ },
991
+ )
992
  def chat_completions(request: Request, body: ChatRequest):
993
  # Ensure engine is loaded
994
  try: