Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from ctransformers import AutoModelForCausalLM | |
| import time | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| "TheBloke/WizardCoder-Python-7B-V1.0-GGUF", | |
| model_file="wizardcoder-python-7b-v1.0.Q4_K_M.gguf", | |
| model_type="llama", | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| top_p=0.9, | |
| stream=True | |
| ) | |
| def generate_response(message, history): | |
| prompt = "" | |
| for user, bot in history: | |
| prompt += f"<user>: {user}\n<assistant>: {bot}\n" | |
| prompt += f"<user>: {message}\n<assistant>:" | |
| history.append([message, ""]) | |
| response = "" | |
| for chunk in llm(prompt): | |
| response += chunk | |
| history[-1][1] = response | |
| time.sleep(0.01) | |
| yield history | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(placeholder="Ask coding questions...", label="Your Message") | |
| clear = gr.Button("Clear") | |
| msg.submit(generate_response, [msg, chatbot], chatbot) | |
| clear.click(lambda: [], None, chatbot) | |
| demo.launch() | |