peihsin0715 commited on
Commit
97a31eb
·
1 Parent(s): b1dab38

Fix model loading

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -0
  2. backend/utils/utils.py +63 -42
Dockerfile CHANGED
@@ -46,6 +46,7 @@ ENV HOME=/tmp \
46
  HF_DATASETS_CACHE=/tmp/.cache/huggingface/datasets \
47
  TRANSFORMERS_CACHE=/tmp/.cache/huggingface/transformers \
48
  MPLCONFIGDIR=/tmp
 
49
  WORKDIR /app
50
 
51
  # 依賴
 
46
  HF_DATASETS_CACHE=/tmp/.cache/huggingface/datasets \
47
  TRANSFORMERS_CACHE=/tmp/.cache/huggingface/transformers \
48
  MPLCONFIGDIR=/tmp
49
+ RUN mkdir -p /tmp/huggingface /tmp/transformers /tmp/hub /tmp/datasets
50
  WORKDIR /app
51
 
52
  # 依賴
backend/utils/utils.py CHANGED
@@ -17,63 +17,84 @@ from transformers import (
17
  )
18
 
19
  def load_model_and_tokenizer(model_name: str):
20
- # 檢查可用記憶體
21
- import psutil
22
- available_memory = psutil.virtual_memory().available / 1024**3
23
- print(f"Available memory: {available_memory:.2f} GB")
24
 
 
 
 
 
 
 
 
 
 
25
  if torch.cuda.is_available():
26
  device = torch.device("cuda")
27
- elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
28
- device = torch.device("mps")
29
  else:
30
  device = torch.device("cpu")
 
31
 
32
- gpt2_aliases = {"gpt2", "openai-community/gpt2"}
 
 
33
 
34
  try:
35
- if model_name in gpt2_aliases:
36
- tokenizer = GPT2Tokenizer.from_pretrained(
37
- model_name,
38
- cache_dir="/tmp/hf_cache" # 使用臨時目錄
39
- )
40
- if tokenizer.pad_token is None and tokenizer.eos_token is not None:
41
- tokenizer.pad_token = tokenizer.eos_token
42
-
43
- model = GPT2LMHeadModel.from_pretrained(
44
- model_name,
45
- torch_dtype=torch.float16 if device.type != "cpu" else torch.float32,
46
- low_cpu_mem_usage=True, # 關鍵:減少記憶體使用
47
- cache_dir="/tmp/hf_cache"
48
- )
49
-
50
- if getattr(model.config, "pad_token_id", None) is None and getattr(model.config, "eos_token_id", None) is not None:
51
- model.config.pad_token_id = model.config.eos_token_id
 
 
 
 
 
52
  else:
53
- tokenizer = AutoTokenizer.from_pretrained(
54
- model_name,
55
- cache_dir="/tmp/hf_cache"
56
- )
57
- if tokenizer.pad_token is None and tokenizer.eos_token is not None:
58
- tokenizer.pad_token = tokenizer.eos_token
59
-
60
- model = AutoModelForCausalLM.from_pretrained(
61
- model_name,
62
- torch_dtype=torch.float16 if device.type != "cpu" else torch.float32,
63
- low_cpu_mem_usage=True, # 關鍵優化
64
- device_map="auto" if torch.cuda.is_available() else None,
65
- cache_dir="/tmp/hf_cache"
66
- )
67
 
68
- if getattr(model.config, "pad_token_id", None) is None and getattr(model.config, "eos_token_id", None) is not None:
 
69
  model.config.pad_token_id = model.config.eos_token_id
70
 
71
- # 只在非 CPU 設備上移動模型
72
- if device.type != "cpu":
73
- model.to(device)
74
 
 
75
  return tokenizer, model, device
76
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  except Exception as e:
78
  # 提供更詳細的錯誤信息
79
  import traceback
 
17
  )
18
 
19
  def load_model_and_tokenizer(model_name: str):
20
+ import os
21
+ import torch
 
 
22
 
23
+ # 設定快取目錄
24
+ cache_dir = "/tmp/hf_models"
25
+ os.makedirs(cache_dir, exist_ok=True)
26
+
27
+ # 設定環境變數
28
+ os.environ['HF_HOME'] = cache_dir
29
+ os.environ['TRANSFORMERS_CACHE'] = cache_dir
30
+
31
+ # 確定設備
32
  if torch.cuda.is_available():
33
  device = torch.device("cuda")
34
+ dtype = torch.float16
 
35
  else:
36
  device = torch.device("cpu")
37
+ dtype = torch.float32
38
 
39
+ print(f"載入模型: {model_name}")
40
+ print(f"設備: {device}, 精度: {dtype}")
41
+ print(f"快取目錄: {cache_dir}")
42
 
43
  try:
44
+ # 強制線上下載,不使用快取
45
+ download_kwargs = {
46
+ 'cache_dir': cache_dir,
47
+ 'force_download': False, # 改為 True 如果需要強制重新下載
48
+ 'resume_download': True,
49
+ 'local_files_only': False
50
+ }
51
+
52
+ model_kwargs = {
53
+ **download_kwargs,
54
+ 'torch_dtype': dtype,
55
+ 'low_cpu_mem_usage': True,
56
+ }
57
+
58
+ if device.type == "cuda":
59
+ model_kwargs['device_map'] = "auto"
60
+
61
+ # 根據模型類型載入
62
+ if model_name in {"gpt2", "openai-community/gpt2"}:
63
+ print("使用 GPT2 專用載入器")
64
+ tokenizer = GPT2Tokenizer.from_pretrained(model_name, **download_kwargs)
65
+ model = GPT2LMHeadModel.from_pretrained(model_name, **model_kwargs)
66
  else:
67
+ print("使用 Auto 載入器")
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name, **download_kwargs)
69
+ model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
70
+
71
+ # 設定 pad token
72
+ if tokenizer.pad_token is None and tokenizer.eos_token is not None:
73
+ tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
 
 
 
74
 
75
+ if hasattr(model.config, 'pad_token_id') and model.config.pad_token_id is None:
76
+ if hasattr(model.config, 'eos_token_id') and model.config.eos_token_id is not None:
77
  model.config.pad_token_id = model.config.eos_token_id
78
 
79
+ # 如果不是 auto device_map,手動移動到設備
80
+ if device.type != "cuda":
81
+ model = model.to(device)
82
 
83
+ print(f"✓ 成功載入模型 {model_name}")
84
  return tokenizer, model, device
85
 
86
+ except Exception as e:
87
+ import traceback
88
+ print(f"載入失敗: {str(e)}")
89
+ print(f"完整錯誤: {traceback.format_exc()}")
90
+
91
+ # 嘗試替代方案
92
+ if model_name == "openai-community/gpt2":
93
+ print("嘗試使用 'gpt2' 替代...")
94
+ return load_model_and_tokenizer("gpt2")
95
+
96
+ raise RuntimeError(f"無法載入模型 '{model_name}': {e}")
97
+
98
  except Exception as e:
99
  # 提供更詳細的錯誤信息
100
  import traceback