Spaces:

ModularityAI
/

LLama3Chat

Runtime error

hanzla commited on Apr 18, 2024

Commit

145ecb9

1 Parent(s): 1b3204d

model added

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,28 +6,47 @@ import transformers
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-model_name = "meta-llama/Meta-Llama-3-8B"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16,device_map="auto")
 @spaces.GPU
-def yes_man(message, history):
-    input_ids = tokenizer(message, return_tensors="pt").input_ids.to(model.device)
-    output = model.generate(input_ids, max_length=512, num_return_sequences=1)
-    detailed_prompt = tokenizer.decode(output[0], skip_special_tokens=True)
-    return detailed_prompt
 gr.ChatInterface(
-    yes_man,
     chatbot=gr.Chatbot(height=300),
     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
     title="LLAMA 3 8B Chat",
     description="Ask Yes Man any question",
     theme="soft",
-    examples=["Hello", "Am I cool?", "Are tomatoes vegetables?"],
-    cache_examples=True,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
+pipeline = transformers.pipeline(
+    "text-generation",
+    model=model_name,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    device="cuda",
+)
 @spaces.GPU
+def chat_function(message, history):
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant!"},
+        {"role": "user", "content": message},
+    ]
+    prompt = pipeline.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    outputs = pipeline(
+        prompt,
+        max_new_tokens=256,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.9,
+    )
+    return outputs[0]["generated_text"][len(prompt):]
 gr.ChatInterface(
+    chat_function,
     chatbot=gr.Chatbot(height=300),
     textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
     title="LLAMA 3 8B Chat",
     description="Ask Yes Man any question",
     theme="soft",
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",