Remostart commited on
Commit
f2ab72d
Β·
verified Β·
1 Parent(s): 2984b8e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from spaces import GPU
5
  import logging
6
 
7
  # Set up logging
@@ -38,14 +38,14 @@ if tokenizer.pad_token_id is None:
38
  logger.info("Set pad_token_id to eos_token_id.")
39
 
40
  def load_model():
41
- """Load model inside GPU context to enable quantization."""
42
  global model
43
  if model is None:
44
  try:
45
  logger.info("Loading model with CPU fallback (full precision)...")
46
  model = AutoModelForCausalLM.from_pretrained(
47
  MODEL_NAME,
48
- torch_dtype=torch.float16, # Use fp16 for memory efficiency without bitsandbytes
49
  low_cpu_mem_usage=True,
50
  trust_remote_code=True,
51
  )
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import spaces # Explicit import for GPU decorator
5
  import logging
6
 
7
  # Set up logging
 
38
  logger.info("Set pad_token_id to eos_token_id.")
39
 
40
  def load_model():
41
+ """Load model inside GPU context."""
42
  global model
43
  if model is None:
44
  try:
45
  logger.info("Loading model with CPU fallback (full precision)...")
46
  model = AutoModelForCausalLM.from_pretrained(
47
  MODEL_NAME,
48
+ torch_dtype=torch.float16, # Use fp16 for memory efficiency
49
  low_cpu_mem_usage=True,
50
  trust_remote_code=True,
51
  )