Remostart commited on
Commit
ad298b5
Β·
verified Β·
1 Parent(s): f2ab72d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import spaces # Explicit import for GPU decorator
5
  import logging
 
6
 
7
  # Set up logging
8
  logging.basicConfig(level=logging.INFO)
@@ -12,40 +13,44 @@ logger = logging.getLogger(__name__)
12
  model = None
13
  tokenizer = None
14
  MODEL_NAME = "ubiodee/Test_Plutus"
15
- FALLBACK_TOKENIZER = "gpt2"
16
 
17
  # Load tokenizer at startup (lightweight, no model yet)
18
  try:
19
- logger.info("Loading tokenizer at startup with legacy versions...")
20
  tokenizer = AutoTokenizer.from_pretrained(
21
  MODEL_NAME,
22
- use_fast=False,
23
  trust_remote_code=True,
24
  )
25
  logger.info("Primary tokenizer loaded successfully.")
26
  except Exception as e:
27
- logger.warning(f"Primary tokenizer failed: {str(e)}. Using fallback.")
28
- tokenizer = AutoTokenizer.from_pretrained(
29
- FALLBACK_TOKENIZER,
30
- use_fast=False,
31
- trust_remote_code=True,
32
- )
33
- logger.info("Fallback tokenizer loaded.")
 
 
 
 
34
 
35
  # Set pad token
36
  if tokenizer.pad_token_id is None:
37
  tokenizer.pad_token_id = tokenizer.eos_token_id
38
- logger.info("Set pad_token_id to eos_token_id.")
39
 
40
  def load_model():
41
  """Load model inside GPU context."""
42
  global model
43
  if model is None:
44
  try:
45
- logger.info("Loading model with CPU fallback (full precision)...")
46
  model = AutoModelForCausalLM.from_pretrained(
47
  MODEL_NAME,
48
- torch_dtype=torch.float16, # Use fp16 for memory efficiency
49
  low_cpu_mem_usage=True,
50
  trust_remote_code=True,
51
  )
@@ -69,7 +74,7 @@ def generate_response(prompt, progress=gr.Progress()):
69
 
70
  progress(0.3, desc="Tokenizing input...")
71
  try:
72
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
73
 
74
  progress(0.6, desc="Generating response...")
75
  with torch.no_grad():
@@ -97,11 +102,11 @@ def generate_response(prompt, progress=gr.Progress()):
97
  # Gradio UI
98
  demo = gr.Interface(
99
  fn=generate_response,
100
- inputs=gr.Textbox(label="Enter your prompt", lines=4, placeholder="Ask about Plutus..."),
101
  outputs=gr.Textbox(label="Model Response"),
102
  title="Cardano Plutus AI Assistant",
103
  description="Write Plutus smart contracts on Cardano blockchain."
104
  )
105
 
106
- # Launch with queueing
107
- demo.queue(max_size=5).launch(enable_queue=True, max_threads=1)
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import spaces
5
  import logging
6
+ import json
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO)
 
13
  model = None
14
  tokenizer = None
15
  MODEL_NAME = "ubiodee/Test_Plutus"
16
+ FALLBACK_TOKENIZER = "NousResearch/Meta-Llama-3-8B"
17
 
18
  # Load tokenizer at startup (lightweight, no model yet)
19
  try:
20
+ logger.info("Loading tokenizer at startup for %s...", MODEL_NAME)
21
  tokenizer = AutoTokenizer.from_pretrained(
22
  MODEL_NAME,
23
+ use_fast=True, # Llama-3 uses fast tokenizer
24
  trust_remote_code=True,
25
  )
26
  logger.info("Primary tokenizer loaded successfully.")
27
  except Exception as e:
28
+ logger.warning(f"Primary tokenizer failed: {str(e)}. Using fallback: {FALLBACK_TOKENIZER}")
29
+ try:
30
+ tokenizer = AutoTokenizer.from_pretrained(
31
+ FALLBACK_TOKENIZER,
32
+ use_fast=True,
33
+ trust_remote_code=True,
34
+ )
35
+ logger.info("Fallback tokenizer loaded successfully.")
36
+ except Exception as fallback_e:
37
+ logger.error(f"Fallback tokenizer failed: {str(fallback_e)}")
38
+ raise
39
 
40
  # Set pad token
41
  if tokenizer.pad_token_id is None:
42
  tokenizer.pad_token_id = tokenizer.eos_token_id
43
+ logger.info("Set pad_token_id to eos_token_id: %s", tokenizer.eos_token_id)
44
 
45
  def load_model():
46
  """Load model inside GPU context."""
47
  global model
48
  if model is None:
49
  try:
50
+ logger.info("Loading model %s with torch.float16...", MODEL_NAME)
51
  model = AutoModelForCausalLM.from_pretrained(
52
  MODEL_NAME,
53
+ torch_dtype=torch.float16, # Use fp16 for ZeroGPU
54
  low_cpu_mem_usage=True,
55
  trust_remote_code=True,
56
  )
 
74
 
75
  progress(0.3, desc="Tokenizing input...")
76
  try:
77
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
78
 
79
  progress(0.6, desc="Generating response...")
80
  with torch.no_grad():
 
102
  # Gradio UI
103
  demo = gr.Interface(
104
  fn=generate_response,
105
+ inputs=gr.Textbox(label="Enter your prompt", lines=4, placeholder="Ask about Plutus smart contracts..."),
106
  outputs=gr.Textbox(label="Model Response"),
107
  title="Cardano Plutus AI Assistant",
108
  description="Write Plutus smart contracts on Cardano blockchain."
109
  )
110
 
111
+ # Launch with simplified queueing
112
+ demo.launch(queue=True, max_queue_size=5)