Spaces:
Sleeping
Sleeping
Update minigpt4/models/mini_gpt4_llama_v2.py
Browse files
minigpt4/models/mini_gpt4_llama_v2.py
CHANGED
|
@@ -111,94 +111,28 @@ class MiniGPT4_Video(Blip2Base, PreTrainedModel):
|
|
| 111 |
|
| 112 |
print('Loading LLAMA')
|
| 113 |
|
| 114 |
-
# 🔧 在加载Llama前强制清理GPU缓存
|
| 115 |
-
import torch
|
| 116 |
-
import gc
|
| 117 |
-
if torch.cuda.is_available():
|
| 118 |
-
print("🔧 正在清理GPU缓存...")
|
| 119 |
-
torch.cuda.empty_cache()
|
| 120 |
-
torch.cuda.ipc_collect()
|
| 121 |
-
gc.collect()
|
| 122 |
-
available_mem = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)
|
| 123 |
-
print(f"🔧 清理后可用显存: {available_mem / 1024**3:.1f} GB")
|
| 124 |
-
|
| 125 |
self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
| 126 |
token=os.environ.get("HF_TKN")
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
print(f"🔧 正在为模型 {self.llama_model} 加载tokenizer...")
|
| 130 |
-
|
| 131 |
-
# 检查是否是Qwen模型
|
| 132 |
-
if "qwen" in self.llama_model.lower() or "Qwen" in self.llama_model:
|
| 133 |
-
print("🔧 检测到Qwen模型,使用AutoTokenizer")
|
| 134 |
-
from transformers import AutoTokenizer
|
| 135 |
-
self.llama_tokenizer = AutoTokenizer.from_pretrained(
|
| 136 |
-
self.llama_model,
|
| 137 |
-
use_fast=False,
|
| 138 |
-
token=token,
|
| 139 |
-
trust_remote_code=True
|
| 140 |
-
)
|
| 141 |
-
# Qwen模型的特殊token设置
|
| 142 |
-
if self.llama_tokenizer.pad_token is None:
|
| 143 |
-
self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
|
| 144 |
-
else:
|
| 145 |
-
print("🔧 使用LlamaTokenizer")
|
| 146 |
-
self.llama_tokenizer = LlamaTokenizer.from_pretrained(
|
| 147 |
-
self.llama_model,
|
| 148 |
-
use_fast=False,
|
| 149 |
-
token=token
|
| 150 |
-
)
|
| 151 |
-
self.llama_tokenizer.pad_token = "$$"
|
| 152 |
-
|
| 153 |
-
print(f"✅ Tokenizer加载成功: {type(self.llama_tokenizer)}")
|
| 154 |
print("self.low_resource",self.low_resource)
|
| 155 |
-
|
| 156 |
-
# 🔧 再次清理内存,为模型加载腾出空间
|
| 157 |
-
if torch.cuda.is_available():
|
| 158 |
-
torch.cuda.empty_cache()
|
| 159 |
-
gc.collect()
|
| 160 |
-
|
| 161 |
if self.low_resource:
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
#
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
trust_remote_code=True
|
| 174 |
-
)
|
| 175 |
-
else:
|
| 176 |
-
print("🔧 使用Llama专用加载策略")
|
| 177 |
-
self.llama_model = llm_model.from_pretrained(
|
| 178 |
-
self.llama_model,
|
| 179 |
-
torch_dtype=torch.float16,
|
| 180 |
-
load_in_8bit=True,
|
| 181 |
-
device_map={'':f"cuda:{self.minigpt4_gpu_id}"},
|
| 182 |
-
token=token
|
| 183 |
-
)
|
| 184 |
else:
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
self.llama_model = AutoModelForCausalLM.from_pretrained(
|
| 190 |
-
self.llama_model,
|
| 191 |
-
torch_dtype=torch.float16,
|
| 192 |
-
token=token,
|
| 193 |
-
trust_remote_code=True
|
| 194 |
-
)
|
| 195 |
-
else:
|
| 196 |
-
print("🔧 使用Llama高资源加载策略")
|
| 197 |
-
self.llama_model = llm_model.from_pretrained(
|
| 198 |
-
self.llama_model,
|
| 199 |
-
torch_dtype=torch.float16,
|
| 200 |
-
token=token
|
| 201 |
-
)
|
| 202 |
|
| 203 |
# self.llama_model.resize_token_embeddings(len(self.llama_tokenizer))
|
| 204 |
self.llama_model = prepare_model_for_int8_training(self.llama_model)
|
|
@@ -874,4 +808,4 @@ def assign_imgs(batched_instruct_list, batched_img_embeds):
|
|
| 874 |
n_assigned.append(None)
|
| 875 |
batched_assigned.append(assigned_img)
|
| 876 |
|
| 877 |
-
return batched_assigned
|
|
|
|
| 111 |
|
| 112 |
print('Loading LLAMA')
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
self.B_SYS, self.E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
| 115 |
token=os.environ.get("HF_TKN")
|
| 116 |
+
self.llama_tokenizer = LlamaTokenizer.from_pretrained(self.llama_model,use_fast=False,token=token) #
|
| 117 |
+
self.llama_tokenizer.pad_token = "$$"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
print("self.low_resource",self.low_resource)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
if self.low_resource:
|
| 120 |
+
self.llama_model = llm_model.from_pretrained(
|
| 121 |
+
self.llama_model,
|
| 122 |
+
torch_dtype=torch.float16,
|
| 123 |
+
# torch_dtype = torch.bfloat16,
|
| 124 |
+
load_in_8bit=True,
|
| 125 |
+
# device_map = "balanced"
|
| 126 |
+
# device_map="auto",
|
| 127 |
+
# device_map={'':torch.cuda.current_device()},token=token
|
| 128 |
+
device_map={'':f"cuda:{self.minigpt4_gpu_id}"},token=token
|
| 129 |
+
|
| 130 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
else:
|
| 132 |
+
self.llama_model = llm_model.from_pretrained(
|
| 133 |
+
self.llama_model,
|
| 134 |
+
torch_dtype=torch.float16,token=token
|
| 135 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
# self.llama_model.resize_token_embeddings(len(self.llama_tokenizer))
|
| 138 |
self.llama_model = prepare_model_for_int8_training(self.llama_model)
|
|
|
|
| 808 |
n_assigned.append(None)
|
| 809 |
batched_assigned.append(assigned_img)
|
| 810 |
|
| 811 |
+
return batched_assigned
|