Update README.md
Browse files
README.md
CHANGED
|
@@ -1,28 +1,22 @@
|
|
| 1 |
---
|
| 2 |
frameworks:
|
| 3 |
- Pytorch
|
| 4 |
-
license:
|
| 5 |
tasks:
|
| 6 |
- text-generation
|
| 7 |
---
|
| 8 |
-
# Model Card for CodeFuse-DeepSeek-33B
|
| 9 |
-
|
| 10 |
-
<img src="https://modelscope.cn/api/v1/models/codefuse-ai/CodeFuse-DeepSeek-33B/repo?Revision=master&FilePath=LOGO.jpg&View=true" width="800"/>
|
| 11 |
-
<p>
|
| 12 |
|
| 13 |
[[中文]](#chinese) [[English]](#english)
|
| 14 |
|
| 15 |
-
#### Clone with HTTP
|
| 16 |
-
```bash
|
| 17 |
-
git clone https://www.modelscope.cn/codefuse-ai/CodeFuse-DeepSeek-33B-4bits.git
|
| 18 |
-
```
|
| 19 |
-
|
| 20 |
<a id="english"></a>
|
| 21 |
|
| 22 |
## Model Description
|
| 23 |
|
| 24 |
CodeFuse-DeepSeek-33B-4bits is the 4-bit quantized version of [CodeFuse-DeepSeek-33B](https://modelscope.cn/models/codefuse-ai/CodeFuse-DeepSeek-33B/summary) which is a 33B Code-LLM finetuned by QLoRA on multiple code-related tasks on the base model DeepSeek-Coder-33B.
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
<br>
|
| 28 |
|
|
@@ -30,7 +24,7 @@ After undergoing 4-bit quantization, the CodeFuse-DeepSeek-33B-4bits model can b
|
|
| 30 |
|
| 31 |
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B-4bits has been released. Despite the quantization process, the model still achieves a remarkable 78.05% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
| 32 |
|
| 33 |
-
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B has been released,
|
| 34 |
|
| 35 |
🔥🔥 2023-11-10 CodeFuse-CodeGeeX2-6B has been released, achieving a pass@1 (greedy decoding) score of 45.12% on HumanEval, which is a 9.22% increase compared to CodeGeeX2 35.9%.
|
| 36 |
|
|
@@ -42,7 +36,7 @@ After undergoing 4-bit quantization, the CodeFuse-DeepSeek-33B-4bits model can b
|
|
| 42 |
|
| 43 |
🔥🔥🔥 2023-09-26 We are pleased to announce the release of the [4-bit quantized version](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) of [CodeFuse-CodeLlama-34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary). Despite the quantization process, the model still achieves a remarkable 73.8% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
| 44 |
|
| 45 |
-
🔥🔥🔥 2023-09-11 [CodeFuse-CodeLlama34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary) has
|
| 46 |
|
| 47 |
<br>
|
| 48 |
|
|
@@ -140,7 +134,7 @@ In this format, the system section is optional and the conversation can be eithe
|
|
| 140 |
import os
|
| 141 |
import torch
|
| 142 |
import time
|
| 143 |
-
from
|
| 144 |
from auto_gptq import AutoGPTQForCausalLM
|
| 145 |
|
| 146 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
@@ -149,7 +143,7 @@ def load_model_tokenizer(model_path):
|
|
| 149 |
"""
|
| 150 |
Load model and tokenizer based on the given model name or local path of downloaded model.
|
| 151 |
"""
|
| 152 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 153 |
trust_remote_code=True,
|
| 154 |
use_fast=False,
|
| 155 |
lagecy=False)
|
|
@@ -157,7 +151,7 @@ def load_model_tokenizer(model_path):
|
|
| 157 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 158 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 159 |
|
| 160 |
-
model = AutoGPTQForCausalLM.from_quantized(
|
| 161 |
inject_fused_attention=False,
|
| 162 |
inject_fused_mlp=False,
|
| 163 |
use_safetensors=False,
|
|
@@ -170,7 +164,7 @@ def load_model_tokenizer(model_path):
|
|
| 170 |
|
| 171 |
def inference(model, tokenizer, prompt):
|
| 172 |
"""
|
| 173 |
-
Uset the given model and tokenizer to generate an answer for the
|
| 174 |
"""
|
| 175 |
st = time.time()
|
| 176 |
prompt = prompt if prompt.endswith('\n') else f'{prompt}\n'
|
|
@@ -198,8 +192,6 @@ def inference(model, tokenizer, prompt):
|
|
| 198 |
|
| 199 |
|
| 200 |
if __name__ == "__main__":
|
| 201 |
-
model_dir = snapshot_download('codefuse-ai/CodeFuse-DeepSeek-33B-4bits', revision='v1.0.0')
|
| 202 |
-
|
| 203 |
prompt = 'Please write a QuickSort program in Python'
|
| 204 |
|
| 205 |
model, tokenizer = load_model_tokenizer(model_dir)
|
|
@@ -332,16 +324,16 @@ User prompt...
|
|
| 332 |
import os
|
| 333 |
import torch
|
| 334 |
import time
|
| 335 |
-
from
|
| 336 |
from auto_gptq import AutoGPTQForCausalLM
|
| 337 |
|
| 338 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 339 |
|
| 340 |
def load_model_tokenizer(model_path):
|
| 341 |
"""
|
| 342 |
-
Load model and tokenizer based on the given model name or local path of downloaded model.
|
| 343 |
"""
|
| 344 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 345 |
trust_remote_code=True,
|
| 346 |
use_fast=False,
|
| 347 |
lagecy=False)
|
|
@@ -349,7 +341,7 @@ def load_model_tokenizer(model_path):
|
|
| 349 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 350 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 351 |
|
| 352 |
-
model = AutoGPTQForCausalLM.from_quantized(
|
| 353 |
inject_fused_attention=False,
|
| 354 |
inject_fused_mlp=False,
|
| 355 |
use_safetensors=False,
|
|
@@ -390,7 +382,6 @@ def inference(model, tokenizer, prompt):
|
|
| 390 |
|
| 391 |
|
| 392 |
if __name__ == "__main__":
|
| 393 |
-
model_dir = snapshot_download('codefuse-ai/CodeFuse-DeepSeek-33B-4bits', revision='v1.0.0')
|
| 394 |
|
| 395 |
prompt = 'Please write a QuickSort program in Python'
|
| 396 |
|
|
|
|
| 1 |
---
|
| 2 |
frameworks:
|
| 3 |
- Pytorch
|
| 4 |
+
license: other
|
| 5 |
tasks:
|
| 6 |
- text-generation
|
| 7 |
---
|
| 8 |
+
# Model Card for CodeFuse-DeepSeek-33B-4bits
|
| 9 |
+

|
|
|
|
|
|
|
| 10 |
|
| 11 |
[[中文]](#chinese) [[English]](#english)
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
<a id="english"></a>
|
| 14 |
|
| 15 |
## Model Description
|
| 16 |
|
| 17 |
CodeFuse-DeepSeek-33B-4bits is the 4-bit quantized version of [CodeFuse-DeepSeek-33B](https://modelscope.cn/models/codefuse-ai/CodeFuse-DeepSeek-33B/summary) which is a 33B Code-LLM finetuned by QLoRA on multiple code-related tasks on the base model DeepSeek-Coder-33B.
|
| 18 |
+
|
| 19 |
+
fter undergoing 4-bit quantization, the CodeFuse-DeepSeek-33B-4bits model can be loaded on either a single A10 (24GB VRAM) or an RTX 4090 (24GB VRAM). Moreover, the quantized model still achives an impressive accuracy of 78.05% on the Humaneval pass@1 metric.
|
| 20 |
|
| 21 |
<br>
|
| 22 |
|
|
|
|
| 24 |
|
| 25 |
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B-4bits has been released. Despite the quantization process, the model still achieves a remarkable 78.05% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
| 26 |
|
| 27 |
+
🔥🔥🔥 2024-01-12 CodeFuse-DeepSeek-33B has been released, achieving a pass@1 (greedy decoding) score of 78.65% on HumanEval.
|
| 28 |
|
| 29 |
🔥🔥 2023-11-10 CodeFuse-CodeGeeX2-6B has been released, achieving a pass@1 (greedy decoding) score of 45.12% on HumanEval, which is a 9.22% increase compared to CodeGeeX2 35.9%.
|
| 30 |
|
|
|
|
| 36 |
|
| 37 |
🔥🔥🔥 2023-09-26 We are pleased to announce the release of the [4-bit quantized version](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) of [CodeFuse-CodeLlama-34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary). Despite the quantization process, the model still achieves a remarkable 73.8% accuracy (greedy decoding) on the HumanEval pass@1 metric.
|
| 38 |
|
| 39 |
+
🔥🔥🔥 2023-09-11 [CodeFuse-CodeLlama34B](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B/summary) has achieved 74.4% of pass@1 (greedy decoding) on HumanEval, which is SOTA results for openspurced LLMs at present.
|
| 40 |
|
| 41 |
<br>
|
| 42 |
|
|
|
|
| 134 |
import os
|
| 135 |
import torch
|
| 136 |
import time
|
| 137 |
+
from transformers import AutoTokenizer
|
| 138 |
from auto_gptq import AutoGPTQForCausalLM
|
| 139 |
|
| 140 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
|
|
| 143 |
"""
|
| 144 |
Load model and tokenizer based on the given model name or local path of downloaded model.
|
| 145 |
"""
|
| 146 |
+
tokenizer = AutoTokenizer.from_pretrained("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 147 |
trust_remote_code=True,
|
| 148 |
use_fast=False,
|
| 149 |
lagecy=False)
|
|
|
|
| 151 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 152 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 153 |
|
| 154 |
+
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 155 |
inject_fused_attention=False,
|
| 156 |
inject_fused_mlp=False,
|
| 157 |
use_safetensors=False,
|
|
|
|
| 164 |
|
| 165 |
def inference(model, tokenizer, prompt):
|
| 166 |
"""
|
| 167 |
+
Uset the given model and tokenizer to generate an answer for the specified prompt.
|
| 168 |
"""
|
| 169 |
st = time.time()
|
| 170 |
prompt = prompt if prompt.endswith('\n') else f'{prompt}\n'
|
|
|
|
| 192 |
|
| 193 |
|
| 194 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 195 |
prompt = 'Please write a QuickSort program in Python'
|
| 196 |
|
| 197 |
model, tokenizer = load_model_tokenizer(model_dir)
|
|
|
|
| 324 |
import os
|
| 325 |
import torch
|
| 326 |
import time
|
| 327 |
+
from transformers import AutoTokenizer
|
| 328 |
from auto_gptq import AutoGPTQForCausalLM
|
| 329 |
|
| 330 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 331 |
|
| 332 |
def load_model_tokenizer(model_path):
|
| 333 |
"""
|
| 334 |
+
Load model and tokenizer based on the given model name or local path of the downloaded model.
|
| 335 |
"""
|
| 336 |
+
tokenizer = AutoTokenizer.from_pretrained("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 337 |
trust_remote_code=True,
|
| 338 |
use_fast=False,
|
| 339 |
lagecy=False)
|
|
|
|
| 341 |
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 342 |
tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("<|end▁of▁sentence|>")
|
| 343 |
|
| 344 |
+
model = AutoGPTQForCausalLM.from_quantized("codefuse-ai/CodeFuse-DeepSeek-33B-4bits",
|
| 345 |
inject_fused_attention=False,
|
| 346 |
inject_fused_mlp=False,
|
| 347 |
use_safetensors=False,
|
|
|
|
| 382 |
|
| 383 |
|
| 384 |
if __name__ == "__main__":
|
|
|
|
| 385 |
|
| 386 |
prompt = 'Please write a QuickSort program in Python'
|
| 387 |
|