Commit
·
0207752
1
Parent(s):
dcac576
Update main.py
Browse files
main.py
CHANGED
|
@@ -6,7 +6,7 @@ from functools import partial
|
|
| 6 |
import fastapi
|
| 7 |
import uvicorn
|
| 8 |
from fastapi import HTTPException, Depends, Request
|
| 9 |
-
from fastapi.responses import HTMLResponse
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from sse_starlette.sse import EventSourceResponse
|
| 12 |
from anyio import create_memory_object_stream
|
|
@@ -87,7 +87,7 @@ async def chat(request: ChatCompletionRequest):
|
|
| 87 |
|
| 88 |
return EventSourceResponse(generate_response(chat_chunks, llm))
|
| 89 |
|
| 90 |
-
async def stream_response(tokens
|
| 91 |
try:
|
| 92 |
iterator: Generator = llm.generate(tokens)
|
| 93 |
for chat_chunk in iterator:
|
|
@@ -102,22 +102,17 @@ async def stream_response(tokens: Any) -> None:
|
|
| 102 |
}
|
| 103 |
]
|
| 104 |
}
|
| 105 |
-
yield
|
| 106 |
-
yield
|
| 107 |
except Exception as e:
|
| 108 |
print(f"Exception in event publisher: {str(e)}")
|
| 109 |
|
| 110 |
-
|
| 111 |
-
async def chatV2(request: Request, body: ChatCompletionRequest):
|
| 112 |
-
combined_messages = ' '.join([message.content for message in body.messages])
|
| 113 |
-
tokens = llm.tokenize(combined_messages)
|
| 114 |
-
|
| 115 |
-
return StreamingResponse(stream_response(tokens))
|
| 116 |
-
|
| 117 |
@app.post("/v2/chat/completions")
|
| 118 |
async def chatV2_endpoint(request: Request, body: ChatCompletionRequest):
|
| 119 |
-
|
|
|
|
| 120 |
|
|
|
|
| 121 |
|
| 122 |
@app.post("/v0/chat/completions")
|
| 123 |
async def chat(request: ChatCompletionRequestV0, response_mode=None):
|
|
|
|
| 6 |
import fastapi
|
| 7 |
import uvicorn
|
| 8 |
from fastapi import HTTPException, Depends, Request
|
| 9 |
+
from fastapi.responses import HTMLResponse
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
from sse_starlette.sse import EventSourceResponse
|
| 12 |
from anyio import create_memory_object_stream
|
|
|
|
| 87 |
|
| 88 |
return EventSourceResponse(generate_response(chat_chunks, llm))
|
| 89 |
|
| 90 |
+
async def stream_response(tokens, llm):
|
| 91 |
try:
|
| 92 |
iterator: Generator = llm.generate(tokens)
|
| 93 |
for chat_chunk in iterator:
|
|
|
|
| 102 |
}
|
| 103 |
]
|
| 104 |
}
|
| 105 |
+
yield dict(data=json.dumps(response))
|
| 106 |
+
yield dict(data="[DONE]")
|
| 107 |
except Exception as e:
|
| 108 |
print(f"Exception in event publisher: {str(e)}")
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
@app.post("/v2/chat/completions")
|
| 111 |
async def chatV2_endpoint(request: Request, body: ChatCompletionRequest):
|
| 112 |
+
combined_messages = ' '.join([message.content for message in body.messages])
|
| 113 |
+
tokens = llm.tokenize(combined_messages)
|
| 114 |
|
| 115 |
+
return EventSourceResponse(stream_response(tokens, llm))
|
| 116 |
|
| 117 |
@app.post("/v0/chat/completions")
|
| 118 |
async def chat(request: ChatCompletionRequestV0, response_mode=None):
|