Remostart commited on
Commit
b1ee67b
·
verified ·
1 Parent(s): 18231b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -49
app.py CHANGED
@@ -1,56 +1,46 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
4
 
5
- # Load the fine-tuned Llama-3-8B model and tokenizer for ubiodee/plutus_llm
6
- model_name = "ubiodee/plutus_llm"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) # Safeguard against fast tokenizer issues
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_name,
10
- torch_dtype=torch.float16,
11
- device_map="auto",
12
- load_in_8bit=True # Enable 8-bit quantization as per model specs
13
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Set padding token if not already set
16
- if tokenizer.pad_token is None:
17
- tokenizer.pad_token = tokenizer.eos_token
18
-
19
- def generate_text(prompt, max_length=200, temperature=0.7, top_p=0.9):
20
- # Tokenize the input prompt
21
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda" if torch.cuda.is_available() else "cpu")
22
-
23
- # Generate text
24
- outputs = model.generate(
25
- inputs["input_ids"],
26
- attention_mask=inputs["attention_mask"],
27
- max_length=max_length,
28
- temperature=temperature,
29
- top_p=top_p,
30
- do_sample=True,
31
- num_return_sequences=1,
32
- pad_token_id=tokenizer.eos_token_id
33
- )
34
-
35
- # Decode the generated text
36
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
- # Remove the input prompt from the output for cleaner response
38
- generated_text = generated_text[len(prompt):].strip()
39
- return generated_text
40
-
41
- # Create Gradio interface
42
  demo = gr.Interface(
43
- fn=generate_text,
44
- inputs=[
45
- gr.Textbox(label="Input Prompt", placeholder="Enter your prompt here...", lines=3),
46
- gr.Slider(label="Max Length", minimum=50, maximum=500, value=200, step=10),
47
- gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.7, step=0.1),
48
- gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.05)
49
- ],
50
- outputs=gr.Textbox(label="Generated Text", lines=10),
51
- title="Plutus LLM Demo (ubiodee/plutus_llm)",
52
- description="Interact with the fine-tuned Llama-3-8B model using LoRA and 8-bit quantization. This is based on ubiodee/plutus_llm."
53
  )
54
 
55
- if __name__ == "__main__":
56
- demo.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Load model & tokenizer
6
+ MODEL_NAME = "ubiodee/plutus_llm"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
10
+ model.eval()
11
+
12
+ if torch.cuda.is_available():
13
+ model.to("cuda")
14
+
15
+ # Response function
16
+ def generate_response(prompt):
17
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
18
+
19
+ with torch.no_grad():
20
+ outputs = model.generate(
21
+ **inputs,
22
+ max_new_tokens=200,
23
+ temperature=0.7,
24
+ top_p=0.9,
25
+ do_sample=True,
26
+ eos_token_id=tokenizer.eos_token_id,
27
+ pad_token_id=tokenizer.pad_token_id,
28
+ )
29
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+
31
+ # Remove the prompt from the output to return only the answer
32
+ if response.startswith(prompt):
33
+ response = response[len(prompt):].strip()
34
+
35
+ return response
36
 
37
+ # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  demo = gr.Interface(
39
+ fn=generate_response,
40
+ inputs=gr.Textbox(label="Enter your prompt", lines=4, placeholder="Ask about Plutus..."),
41
+ outputs=gr.Textbox(label="Model Response"),
42
+ title="Cardano Plutus AI Assistant",
43
+ description="Ask questions about Plutus smart contracts or Cardano blockchain."
 
 
 
 
 
44
  )
45
 
46
+ demo.launch()