abdull4h commited on
Commit
48a1d04
·
verified ·
1 Parent(s): 60c7332

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -1,8 +1,25 @@
1
  import os
2
  import re
3
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
4
  import torch
5
- import spaces # Import spaces module
 
 
 
 
 
 
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
  # Global variables for model and tokenizer
@@ -202,8 +219,9 @@ def load_models():
202
  token=hf_token,
203
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
204
  device_map=device_map,
205
- # Ensure we use the model's full capabilities
206
- use_cache=True
 
207
  )
208
 
209
  # Cache the loaded model and tokenizer
 
1
  import os
2
  import re
3
  import gradio as gr
4
+ import spaces
5
+
6
+ # CRITICAL: Disable PyTorch compiler BEFORE importing torch
7
+ os.environ["PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
8
+ os.environ["TORCH_COMPILE_DISABLE"] = "1"
9
+ os.environ["TORCH_INDUCTOR_DISABLE"] = "1"
10
+ os.environ["TORCHINDUCTOR_DISABLE_CUDAGRAPHS"] = "1"
11
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
12
+ os.environ["TORCH_USE_CUDA_DSA"] = "0"
13
+
14
+ # Now import torch and disable its compiler features
15
  import torch
16
+ if hasattr(torch, "_dynamo"):
17
+ if hasattr(torch._dynamo, "config"):
18
+ torch._dynamo.config.suppress_errors = True
19
+ if hasattr(torch._dynamo, "disable"):
20
+ torch._dynamo.disable()
21
+ print("Disabled torch._dynamo")
22
+
23
  from transformers import AutoTokenizer, AutoModelForCausalLM
24
 
25
  # Global variables for model and tokenizer
 
219
  token=hf_token,
220
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
221
  device_map=device_map,
222
+ use_cache=True,
223
+ use_flash_attention_2=False,
224
+ _attn_implementation="eager" # Use eager mode to avoid compiler issues
225
  )
226
 
227
  # Cache the loaded model and tokenizer