Remostart commited on
Commit
feafa8c
·
verified ·
1 Parent(s): 39ff65e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -1,16 +1,21 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from spaces import GPU # Import ZeroGPU decorator
5
 
6
- # Load model & tokenizer (runs on CPU at startup)
7
  MODEL_NAME = "ubiodee/plutus_llm"
 
 
 
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  MODEL_NAME,
11
- torch_dtype=torch.float16,
12
  device_map="auto",
13
- load_in_8bit=True
14
  )
15
 
16
  # Set padding token
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
+ from spaces import GPU
5
 
6
+ # Load model & tokenizer
7
  MODEL_NAME = "ubiodee/plutus_llm"
8
+ quantization_config = BitsAndBytesConfig(
9
+ load_in_8bit=True,
10
+ bnb_8bit_compute_dtype=torch.float16,
11
+ bnb_8bit_use_double_quant=True
12
+ )
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_NAME,
16
+ quantization_config=quantization_config,
17
  device_map="auto",
18
+ torch_dtype=torch.float16
19
  )
20
 
21
  # Set padding token