Spaces:

jasvir-singh1021
/

CodeWhisperer-CPU

Sleeping

jasvir-singh1021 commited on Jul 27

Commit

dd7ea12

verified ·

1 Parent(s): acdae08

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from ctransformers import AutoModelForCausalLM
+import time
+# Load the quantized GGUF model (optimized for CPU)
+llm = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/WizardCoder-Python-13B-GGUF",  # You can change to CodeLlama, Phind, etc.
+    model_file="wizardcoder-python-13b.Q4_K_M.gguf",  # Use Q4_K_M for 16GB RAM
+    model_type="llama",
+    config={
+        "max_new_tokens": 512,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "stream": True
+    }
+)
+def generate_response(message, history):
+    prompt = ""
+    for user, bot in history:
+        prompt += f"<user>: {user}\n<assistant>: {bot}\n"
+    prompt += f"<user>: {message}\n<assistant>:"
+    history.append([message, ""])
+    response = ""
+    for chunk in llm(prompt):
+        response += chunk
+        history[-1][1] = response
+        time.sleep(0.01)
+        yield history
+# Gradio UI
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(placeholder="Ask coding questions...", label="Your Message")
+    clear = gr.Button("Clear")
+    msg.submit(generate_response, [msg, chatbot], chatbot)
+    clear.click(lambda: [], None, chatbot)
+demo.launch()