codemo commited on
Commit
18c93c9
·
verified ·
1 Parent(s): 107f99d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -8
app.py CHANGED
@@ -37,13 +37,28 @@ torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
37
  print(f"使用设备: {device}, 数据类型: {torch_dtype}")
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
40
- model = AutoModel.from_pretrained(
41
- MODEL_PATH,
42
- trust_remote_code=True,
43
- use_safetensors=True,
44
- torch_dtype=torch_dtype
45
- )
46
- model = model.eval().to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # 创建设备兼容的推理包装器
49
  original_infer = model.infer
@@ -55,6 +70,7 @@ def device_compatible_infer(*args, **kwargs):
55
  # 临时修补 torch.cuda.is_available 和相关方法
56
  old_is_available = torch.cuda.is_available
57
  old_cuda_method = None
 
58
 
59
  try:
60
  # 如果是 CPU 模式,劫持 CUDA 调用
@@ -63,7 +79,11 @@ def device_compatible_infer(*args, **kwargs):
63
 
64
  # 修补 tensor.cuda() 方法
65
  def cpu_wrapper(self, *args, **kwargs):
66
- return self.cpu()
 
 
 
 
67
 
68
  # 保存原始方法
69
  if hasattr(torch.Tensor, '_original_cuda'):
@@ -73,6 +93,10 @@ def device_compatible_infer(*args, **kwargs):
73
  torch.Tensor._original_cuda = old_cuda_method
74
 
75
  torch.Tensor.cuda = cpu_wrapper
 
 
 
 
76
 
77
  # 调用原始 infer 方法
78
  return original_infer(*args, **kwargs)
@@ -82,6 +106,8 @@ def device_compatible_infer(*args, **kwargs):
82
  torch.cuda.is_available = old_is_available
83
  if old_cuda_method is not None:
84
  torch.Tensor.cuda = old_cuda_method
 
 
85
 
86
  # 替换模型的 infer 方法
87
  model.infer = device_compatible_infer
 
37
  print(f"使用设备: {device}, 数据类型: {torch_dtype}")
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
40
+
41
+ # 加载模型
42
+ if device == "cpu":
43
+ # CPU 模式:使用 float32 避免类型不匹配
44
+ print("⚠️ CPU 模式:强制使用 float32(bfloat16 在 CPU 上不完全支持)")
45
+ model = AutoModel.from_pretrained(
46
+ MODEL_PATH,
47
+ trust_remote_code=True,
48
+ use_safetensors=True,
49
+ torch_dtype=torch.float32, # CPU 必须使用 float32
50
+ low_cpu_mem_usage=True
51
+ )
52
+ model = model.eval().float() # 确保所有参数都是 float32
53
+ else:
54
+ # GPU 模式:可以使用 bfloat16
55
+ model = AutoModel.from_pretrained(
56
+ MODEL_PATH,
57
+ trust_remote_code=True,
58
+ use_safetensors=True,
59
+ torch_dtype=torch.bfloat16
60
+ )
61
+ model = model.eval().to(device)
62
 
63
  # 创建设备兼容的推理包装器
64
  original_infer = model.infer
 
70
  # 临时修补 torch.cuda.is_available 和相关方法
71
  old_is_available = torch.cuda.is_available
72
  old_cuda_method = None
73
+ old_float_tensor = None
74
 
75
  try:
76
  # 如果是 CPU 模式,劫持 CUDA 调用
 
79
 
80
  # 修补 tensor.cuda() 方法
81
  def cpu_wrapper(self, *args, **kwargs):
82
+ # 确保返回 float32 类型
83
+ result = self.cpu()
84
+ if result.dtype == torch.bfloat16:
85
+ result = result.float()
86
+ return result
87
 
88
  # 保存原始方法
89
  if hasattr(torch.Tensor, '_original_cuda'):
 
93
  torch.Tensor._original_cuda = old_cuda_method
94
 
95
  torch.Tensor.cuda = cpu_wrapper
96
+
97
+ # 修补 torch.cuda.FloatTensor
98
+ old_float_tensor = torch.cuda.FloatTensor
99
+ torch.cuda.FloatTensor = torch.FloatTensor
100
 
101
  # 调用原始 infer 方法
102
  return original_infer(*args, **kwargs)
 
106
  torch.cuda.is_available = old_is_available
107
  if old_cuda_method is not None:
108
  torch.Tensor.cuda = old_cuda_method
109
+ if old_float_tensor is not None:
110
+ torch.cuda.FloatTensor = old_float_tensor
111
 
112
  # 替换模型的 infer 方法
113
  model.infer = device_compatible_infer