Spaces:
Runtime error
Runtime error
| !pip install -U "transformers==4.40.0" --upgrade | |
| !pip install -i https://pypi.org/simple/ bitsandbytes | |
| !pip install accelerate | |
| import transformers | |
| import torch | |
| model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit" | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model_id, | |
| model_kwargs={ | |
| "torch_dtype": torch.float16, | |
| "quantization_config": {"load_in_4bit": True}, | |
| "low_cpu_mem_usage": True, | |
| }, | |
| ) | |
| messages = [ | |
| {"role" : "system", | |
| "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, | |
| {"role" : "user", | |
| "content": """hi there!"""}, | |
| ] | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| print(outputs[0]["generated_text"][len(prompt):]) | |
| !pip install gradio | |
| import gradio as gr | |
| messages = [{"role" : "system", | |
| "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, | |
| {"role" : "user", | |
| "content": """hi there!"""},] | |
| def add_text(history, text): | |
| global messages #message[list] is defined globally | |
| history = history + [(text,'')] | |
| messages = messages + [{"role":'user', 'content': text}] | |
| return history, '' | |
| def generate(history): | |
| global messages | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| response_msg = outputs[0]["generated_text"][len(prompt):] | |
| for char in response_msg: | |
| history[-1][1] += char | |
| yield history | |
| pass | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot(value=[], elem_id="chatbot") | |
| with gr.Row(): | |
| txt = gr.Textbox( | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| ) | |
| txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( | |
| generate, inputs =[chatbot,],outputs = chatbot,) | |
| demo.queue() | |
| demo.launch(debug=True) | |