Spaces:

lemonteaa
/

edge_llm_chat

Running

App Files Files Community

lemonteaa commited on Aug 23

Commit

7977967

verified ·

1 Parent(s): e3f0a4b

Create chat_demo.py

Browse files

Files changed (1) hide show

chat_demo.py +119 -0

chat_demo.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import gradio as gr
+from openai import OpenAI
+import uuid
+import json
+import os
+import tempfile
+import subprocess
+import threading
+BASE_URL = "http://localhost:8080/v1"
+MODEL_NAME = "bn"
+def read_output(process):
+    """Reads the output from the subprocess and prints it to the console."""
+    for line in iter(process.stdout.readline, ""):
+        print(line.rstrip())
+    process.stdout.close()
+def start_server(command):
+    """Starts the server as a subprocess and captures its stdout."""
+    # Start the server process
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,  # Redirect stderr to stdout
+        text=True  # Automatically decode the output to text
+    )
+    # Start a thread to read the output
+    output_thread = threading.Thread(target=read_output, args=(process,))
+    output_thread.daemon = True  # Daemonize the thread so it exits when the main program does
+    output_thread.start()
+    return process
+server_process = start_server(["./ik_llama.cpp/build/bin/llama-server", "-m" ,"./ik_llama.cpp/build/model-out.gguf", "--chat-template", "vicuna"])
+cli = OpenAI(api_key="sk-nokey", base_url=BASE_URL)
+def openai_call(message, history, system_prompt, max_new_tokens):
+    #print(history) # DEBUG
+    history.insert(0, {
+        "role": "system",
+        "content": system_prompt
+    })
+    history.append({
+        "role": "user",
+        "content": message
+    })
+    response = cli.chat.completions.create(
+        model=MODEL_NAME,
+        messages=history,
+        max_tokens=max_new_tokens,
+        stop=["<|im_end|>", "</s>"],
+        stream=True
+    )
+    reply = ""
+    for chunk in response:
+        delta = chunk.choices[0].delta.content
+        if delta is not None:
+            reply = reply + delta
+            yield reply, None
+    history.append({ "role": "assistant", "content": reply })
+    yield reply, gr.State(history)
+def gen_file(conv_state):
+    #print(conv_state) # DEBUG
+    fname = f"{str(uuid.uuid4())}.json"
+    #with tempfile.NamedTemporaryFile(prefix=str(uuid.uuid4()), suffix=".json", mode="w", encoding="utf-8", delete_on_close=False) as f:
+    with open(fname, mode="w", encoding="utf-8") as f:
+        json.dump(conv_state.value, f, indent=4, ensure_ascii=False)
+    return gr.File(fname), gr.State(fname)
+def rm_file_wrap(path : str):
+    # Try to delete the file.
+    try:
+        os.remove(path)
+    except OSError as e:
+        # If it fails, inform the user.
+        print("Error: %s - %s." % (e.filename, e.strerror))
+def on_download(download_data: gr.DownloadData):
+    print(f"deleting {download_data.file.path}")
+    rm_file_wrap(download_data.file.path)
+def clean_file(orig_path):
+    print(f"Deleting {orig_path.value}")
+    rm_file_wrap(orig_path.value)
+with gr.Blocks() as demo:
+    #download=gr.DownloadButton(label="Download Conversation", value=None)
+    conv_state = gr.State()
+    orig_path = gr.State()
+    chat = gr.ChatInterface(
+        openai_call,
+        type="messages",
+        additional_inputs=[
+            gr.Textbox("You are a helpful AI assistant.", label="System Prompt"),
+            gr.Slider(30, 2048, label="Max new tokens"),
+        ],
+        additional_outputs=[conv_state],
+        title="Chat with bitnet using ik_llama",
+        description="Warning: Do not input sensitive info - assume everything is public! Also note this is experimental and ik_llama server doesn't seems to support arbitrary chat template, we're using vicuna as approximate match - so there might be intelligence degradation."
+    )
+    download_file = gr.File()
+    download_btn = gr.Button("Export Conversation for Download") \
+        .click(fn=gen_file, inputs=[conv_state], outputs=[download_file, orig_path]) \
+        .success(fn=clean_file, inputs=[orig_path])
+    download_file.download(on_download, None, None)
+try:
+    demo.queue(max_size=10, api_open=True).launch(server_name='0.0.0.0')
+finally:
+    # Stop the server
+    server_process.terminate()
+    server_process.wait()
+    print("Server stopped.")