Spaces:
Runtime error
Runtime error
| import fastapi | |
| import json | |
| import markdown | |
| import uvicorn | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from sse_starlette.sse import EventSourceResponse | |
| from ctransformers import AutoModelForCausalLM | |
| from ctransformers.langchain import CTransformers | |
| from pydantic import BaseModel | |
| llm = AutoModelForCausalLM.from_pretrained("TheBloke/gorilla-7B-GGML", | |
| model_file="Gorilla-7B.ggmlv3.q4_0.bin", | |
| model_type="llama") | |
| app = fastapi.FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def index(): | |
| html_content = """ | |
| <!DOCTYPE html> | |
| <html> | |
| <style> | |
| body { | |
| font-family: "Arial"; | |
| } | |
| h1 { | |
| text-align: "center"; | |
| } | |
| </style> | |
| <body> | |
| <h1>gorilla</h1> | |
| <input id="prompt" type="text"> | |
| <button id="search">I'm feeling lucky</button> | |
| <div id="content"></div> | |
| <script> | |
| document.getElementById("search").addEventListener("click", () => { | |
| let prompt = document.getElementById("prompt").value; | |
| let source = new EventSource(`https://matthoffner-gorilla.hf.space/stream?prompt=${prompt}`); | |
| source.onmessage = function(event) { | |
| console.log(event); | |
| let eventData = event.data; | |
| document.getElementById("content").innerHTML += eventData | |
| }; | |
| }); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| return HTMLResponse(content=html_content, status_code=200) | |
| async def chat(prompt = "I want to download a dataset from GCS"): | |
| tokens = llm.tokenize(prompt) | |
| async def server_sent_events(chat_chunks, llm): | |
| yield prompt | |
| for chat_chunk in llm.generate(chat_chunks): | |
| yield llm.detokenize(chat_chunk) | |
| yield "" | |
| return EventSourceResponse(server_sent_events(tokens, llm)) | |
| class ChatCompletionRequest(BaseModel): | |
| messages: str | |
| async def chat(request: ChatCompletionRequest, response_mode=None): | |
| tokens = llm.tokenize(request.messages.join(' ')) | |
| async def server_sent_events(chat_chunks, llm): | |
| for chat_chunk in llm.generate(chat_chunks): | |
| yield llm.detokenize(chat_chunk) | |
| yield "" | |
| return EventSourceResponse(server_sent_events(tokens, llm)) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |