| from llama_cpp import Llama | |
| import gradio as gr | |
| import random | |
| import requests | |
| import os | |
| if not os.path.exists("ggml-model-q4_0.bin"): | |
| open("ggml-model-q4_0.bin", "wb").write( | |
| requests.get( | |
| "https://huggingface.co/birdup/pygmalion-7b-q5_1-ggml-v5/resolve/main/pygmalion-7b-q5_1-ggml-v5.bin" | |
| ).content | |
| ) | |
| else: | |
| print("Model already exists, skipping redownload") | |
| print("Loading model...") | |
| llm = Llama( | |
| model_path="ggml-model-q4_0.bin", | |
| seed=random.randint(1, 9999999), | |
| n_ctx=2048, | |
| n_threads=3, | |
| ) | |
| print("Model loaded.") | |
| def generate(prompt, stop): | |
| output = llm( | |
| bytes(prompt, "utf-8").decode("unicode_escape"), | |
| max_tokens=64, | |
| temperature=0.75, | |
| top_p=0.7, | |
| stop=[bytes(stop, "utf-8").decode("unicode_escape")] if len(stop) > 1 else None, | |
| ) | |
| print(output) | |
| return output["choices"][0]["text"] | |
| app = gr.Interface(fn=generate, inputs=["text", "text"], outputs="text") | |
| app.launch() | |