olacode55 commited on
Commit
9f17d88
·
verified ·
1 Parent(s): 6a531fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -1,24 +1,37 @@
 
 
1
  import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
- import torch
5
 
 
 
 
 
 
 
 
6
  base_model = "meta-llama/Llama-2-7b-chat-hf"
7
  adapter_model = "olacode55/zimble-llama2"
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(base_model)
 
10
  base = AutoModelForCausalLM.from_pretrained(
11
  base_model,
12
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
13
- device_map="auto"
 
14
  )
15
 
16
  model = PeftModel.from_pretrained(base, adapter_model)
17
 
 
18
  def generate(prompt):
19
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
20
  outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
21
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
22
 
 
23
  demo = gr.Interface(fn=generate, inputs="text", outputs="text", title="Zimble LLaMA 2 Fine-Tuned")
24
  demo.launch()
 
1
+ import os
2
+ import torch
3
  import gradio as gr
4
+ from huggingface_hub import login
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
6
  from peft import PeftModel
 
7
 
8
+ # === STEP 1: Authenticate with Hugging Face ===
9
+ # Make sure you set your HF token as an environment variable or paste it here temporarily
10
+ # For security, prefer environment variable (recommended)
11
+ os.environ["HF_TOKEN"] = "boewmwFyewoJuARzTMoCNFLVyYNQSMDUvx"
12
+ login(token=os.getenv("HF_TOKEN"))
13
+
14
+ # === STEP 2: Load base and adapter models ===
15
  base_model = "meta-llama/Llama-2-7b-chat-hf"
16
  adapter_model = "olacode55/zimble-llama2"
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(base_model)
19
+
20
  base = AutoModelForCausalLM.from_pretrained(
21
  base_model,
22
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
23
+ device_map="auto",
24
+ use_auth_token="hf_" +os.getenv("HF_TOKEN") # ensure it uses your auth
25
  )
26
 
27
  model = PeftModel.from_pretrained(base, adapter_model)
28
 
29
+ # === STEP 3: Define generation function ===
30
  def generate(prompt):
31
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
32
  outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7)
33
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
34
 
35
+ # === STEP 4: Launch Gradio app ===
36
  demo = gr.Interface(fn=generate, inputs="text", outputs="text", title="Zimble LLaMA 2 Fine-Tuned")
37
  demo.launch()