Spaces:

jasvir-singh1021
/

CodeWhisperer-CPU

Sleeping

CodeWhisperer-CPU / app.py

Update app.py

5698eae verified 3 months ago

1 kB

	import gradio as gr
	from ctransformers import AutoModelForCausalLM
	import time

	llm = AutoModelForCausalLM.from_pretrained(
	"TheBloke/WizardCoder-Python-7B-V1.0-GGUF",
	model_file="wizardcoder-python-7b-v1.0.Q4_K_M.gguf",
	model_type="llama",
	max_new_tokens=512,
	temperature=0.7,
	top_p=0.9,
	stream=True
	)

	def generate_response(message, history):
	prompt = ""
	for user, bot in history:
	prompt += f"<user>: {user}\n<assistant>: {bot}\n"
	prompt += f"<user>: {message}\n<assistant>:"

	history.append([message, ""])
	response = ""
	for chunk in llm(prompt):
	response += chunk
	history[-1][1] = response
	time.sleep(0.01)
	yield history

	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox(placeholder="Ask coding questions...", label="Your Message")
	clear = gr.Button("Clear")

	msg.submit(generate_response, [msg, chatbot], chatbot)
	clear.click(lambda: [], None, chatbot)

	demo.launch()