Update README.md
Browse files
README.md
CHANGED
|
@@ -175,7 +175,7 @@ def load_model_tokenizer(model_path):
|
|
| 175 |
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 176 |
inject_fused_attention=False,
|
| 177 |
inject_fused_mlp=False,
|
| 178 |
-
use_safetensors=
|
| 179 |
use_cuda_fp16=True,
|
| 180 |
disable_exllama=False,
|
| 181 |
device_map='auto' # Support multi-gpus
|
|
@@ -386,7 +386,7 @@ def load_model_tokenizer(model_path):
|
|
| 386 |
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 387 |
inject_fused_attention=False,
|
| 388 |
inject_fused_mlp=False,
|
| 389 |
-
use_safetensors=
|
| 390 |
use_cuda_fp16=True,
|
| 391 |
disable_exllama=False,
|
| 392 |
device_map='auto' # Support multi-gpus
|
|
|
|
| 175 |
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 176 |
inject_fused_attention=False,
|
| 177 |
inject_fused_mlp=False,
|
| 178 |
+
use_safetensors=True,
|
| 179 |
use_cuda_fp16=True,
|
| 180 |
disable_exllama=False,
|
| 181 |
device_map='auto' # Support multi-gpus
|
|
|
|
| 386 |
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 387 |
inject_fused_attention=False,
|
| 388 |
inject_fused_mlp=False,
|
| 389 |
+
use_safetensors=True,
|
| 390 |
use_cuda_fp16=True,
|
| 391 |
disable_exllama=False,
|
| 392 |
device_map='auto' # Support multi-gpus
|