Spaces:
Paused
Paused
use 4-bit
Browse files
model.py
CHANGED
|
@@ -10,10 +10,11 @@ model_id = 'baichuan-inc/Baichuan2-13B-Chat'
|
|
| 10 |
if torch.cuda.is_available():
|
| 11 |
model = AutoModelForCausalLM.from_pretrained(
|
| 12 |
model_id,
|
| 13 |
-
device_map='auto',
|
| 14 |
-
torch_dtype=torch.
|
| 15 |
trust_remote_code=True
|
| 16 |
)
|
|
|
|
| 17 |
model.generation_config = GenerationConfig.from_pretrained(model_id)
|
| 18 |
else:
|
| 19 |
model = None
|
|
|
|
| 10 |
if torch.cuda.is_available():
|
| 11 |
model = AutoModelForCausalLM.from_pretrained(
|
| 12 |
model_id,
|
| 13 |
+
# device_map='auto',
|
| 14 |
+
torch_dtype=torch.float16,
|
| 15 |
trust_remote_code=True
|
| 16 |
)
|
| 17 |
+
model = model.quantize(4).cuda()
|
| 18 |
model.generation_config = GenerationConfig.from_pretrained(model_id)
|
| 19 |
else:
|
| 20 |
model = None
|