Remostart commited on
Commit
39ff65e
Β·
verified Β·
1 Parent(s): 4828408

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -92
app.py CHANGED
@@ -1,106 +1,56 @@
1
  import gradio as gr
2
  import torch
3
- import torch.multiprocessing as mp
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
- import spaces
6
- import logging
7
 
8
- # Set multiprocessing to 'spawn' for ZeroGPU compatibility
9
- try:
10
- mp.set_start_method('spawn', force=True)
11
- except RuntimeError:
12
- pass
13
-
14
- # Set up logging
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
-
18
- # Global variables
19
- model = None
20
- tokenizer = None
21
  MODEL_NAME = "ubiodee/plutus_llm"
 
 
 
 
 
 
 
22
 
23
- # Load tokenizer at startup
24
- try:
25
- logger.info("Loading tokenizer at startup for %s...", MODEL_NAME)
26
- tokenizer = AutoTokenizer.from_pretrained(
27
- MODEL_NAME,
28
- use_fast=True,
29
- trust_remote_code=True,
30
- )
31
- logger.info("Primary tokenizer loaded successfully.")
32
- except Exception as e:
33
- logger.error(f"Tokenizer loading failed: {str(e)}")
34
- raise
35
-
36
- # Set pad token
37
- if tokenizer.pad_token_id is None:
38
- tokenizer.pad_token_id = tokenizer.eos_token_id
39
- logger.info("Set pad_token_id to eos_token_id: %s", tokenizer.eos_token_id)
40
-
41
- # Load model at startup
42
- try:
43
- logger.info("Loading model %s with torch.float16...", MODEL_NAME)
44
- model = AutoModelForCausalLM.from_pretrained(
45
- MODEL_NAME,
46
- torch_dtype=torch.float16,
47
- trust_remote_code=True,
48
- )
49
- model.eval()
50
- logger.info("Model loaded successfully.")
51
- except Exception as e:
52
- logger.error(f"Model loading failed: {str(e)}")
53
- raise
54
-
55
- # Response function
56
- @spaces.GPU(duration=120)
57
- def generate_response(prompt, progress=gr.Progress()):
58
- global model
59
- progress(0.1, desc="Moving model to GPU...")
60
- try:
61
- if torch.cuda.is_available():
62
- model = model.to("cuda")
63
- logger.info("Model moved to GPU.")
64
- else:
65
- logger.warning("GPU not available; using CPU.")
66
-
67
- progress(0.3, desc="Tokenizing input...")
68
- inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
69
-
70
- progress(0.6, desc="Generating response...")
71
- with torch.no_grad():
72
- outputs = model.generate(
73
- **inputs,
74
- max_new_tokens=200,
75
- temperature=0.7,
76
- top_p=0.9,
77
- do_sample=True,
78
- eos_token_id=tokenizer.eos_token_id,
79
- pad_token_id=tokenizer.pad_token_id,
80
- )
81
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
82
-
83
- if response.startswith(prompt):
84
- response = response[len(prompt):].strip()
85
-
86
- progress(1.0, desc="Done!")
87
- return response
88
- except Exception as e:
89
- logger.error(f"Inference failed: {str(e)}")
90
- return f"Error during generation: {str(e)}"
91
- finally:
92
- if torch.cuda.is_available():
93
- torch.cuda.empty_cache()
94
- logger.info("GPU memory cleared.")
95
 
96
  # Gradio UI
97
  demo = gr.Interface(
98
  fn=generate_response,
99
- inputs=gr.Textbox(label="Enter your prompt", lines=4, placeholder="Ask about Plutus smart contracts..."),
 
 
 
 
 
100
  outputs=gr.Textbox(label="Model Response"),
101
  title="Cardano Plutus AI Assistant",
102
- description="Write Plutus smart contracts on Cardano blockchain."
103
  )
104
 
105
- # Launch
106
- demo.launch()
 
1
  import gradio as gr
2
  import torch
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from spaces import GPU # Import ZeroGPU decorator
 
5
 
6
+ # Load model & tokenizer (runs on CPU at startup)
 
 
 
 
 
 
 
 
 
 
 
 
7
  MODEL_NAME = "ubiodee/plutus_llm"
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_NAME,
11
+ torch_dtype=torch.float16,
12
+ device_map="auto",
13
+ load_in_8bit=True
14
+ )
15
 
16
+ # Set padding token
17
+ if tokenizer.pad_token is None:
18
+ tokenizer.pad_token = tokenizer.eos_token
19
+
20
+ model.eval()
21
+
22
+ # Response function with ZeroGPU decorator
23
+ @GPU
24
+ def generate_response(prompt, max_new_tokens=200, temperature=0.7, top_p=0.9):
25
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
26
+ with torch.no_grad():
27
+ outputs = model.generate(
28
+ **inputs,
29
+ max_new_tokens=max_new_tokens,
30
+ temperature=temperature,
31
+ top_p=top_p,
32
+ do_sample=True,
33
+ eos_token_id=tokenizer.eos_token_id,
34
+ pad_token_id=tokenizer.pad_token_id,
35
+ )
36
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+ if response.startswith(prompt):
38
+ response = response[len(prompt):].strip()
39
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Gradio UI
42
  demo = gr.Interface(
43
  fn=generate_response,
44
+ inputs=[
45
+ gr.Textbox(label="Enter your prompt", lines=4, placeholder="Ask about Plutus..."),
46
+ gr.Slider(label="Max New Tokens", minimum=50, maximum=500, value=200, step=10),
47
+ gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.7, step=0.1),
48
+ gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.9, step=0.05)
49
+ ],
50
  outputs=gr.Textbox(label="Model Response"),
51
  title="Cardano Plutus AI Assistant",
52
+ description="Ask questions about Plutus smart contracts or Cardano blockchain using ubiodee/plutus_llm."
53
  )
54
 
55
+ if __name__ == "__main__":
56
+ demo.launch()