jasvir-singh1021 commited on
Commit
dd7ea12
·
verified ·
1 Parent(s): acdae08

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ctransformers import AutoModelForCausalLM
3
+ import time
4
+
5
+ # Load the quantized GGUF model (optimized for CPU)
6
+ llm = AutoModelForCausalLM.from_pretrained(
7
+ "TheBloke/WizardCoder-Python-13B-GGUF", # You can change to CodeLlama, Phind, etc.
8
+ model_file="wizardcoder-python-13b.Q4_K_M.gguf", # Use Q4_K_M for 16GB RAM
9
+ model_type="llama",
10
+ config={
11
+ "max_new_tokens": 512,
12
+ "temperature": 0.7,
13
+ "top_p": 0.9,
14
+ "stream": True
15
+ }
16
+ )
17
+
18
+ def generate_response(message, history):
19
+ prompt = ""
20
+ for user, bot in history:
21
+ prompt += f"<user>: {user}\n<assistant>: {bot}\n"
22
+ prompt += f"<user>: {message}\n<assistant>:"
23
+
24
+ history.append([message, ""])
25
+ response = ""
26
+ for chunk in llm(prompt):
27
+ response += chunk
28
+ history[-1][1] = response
29
+ time.sleep(0.01)
30
+ yield history
31
+
32
+ # Gradio UI
33
+ with gr.Blocks() as demo:
34
+ chatbot = gr.Chatbot()
35
+ msg = gr.Textbox(placeholder="Ask coding questions...", label="Your Message")
36
+ clear = gr.Button("Clear")
37
+
38
+ msg.submit(generate_response, [msg, chatbot], chatbot)
39
+ clear.click(lambda: [], None, chatbot)
40
+
41
+ demo.launch()