GuilhermeNunes commited on
Commit
7436d0e
·
verified ·
1 Parent(s): 71c2299

Upload processor

Browse files
chat_template.jinja CHANGED
@@ -1,5 +1,4 @@
1
- {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '
2
- ' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>
3
- ' }}{% endfor %}{# Render all text next #}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] }}{% endfor %}{% endif %}{{ '<|im_end|>' + '
4
- ' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
5
- ' }}{% endif %}
 
1
+ {{ bos_token }}{% for message in messages %}{% if message['role'] == 'assistant' %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}<start_of_turn>{{ role }}
2
+ {{ message['content'] | trim }}<end_of_turn>
3
+ {% endfor %}{% if add_generation_prompt %}<start_of_turn>model
4
+ {% endif %}
 
special_tokens_map.json CHANGED
@@ -3,8 +3,20 @@
3
  "<start_of_turn>",
4
  "<end_of_turn>"
5
  ],
6
- "bos_token": "<|im_start|>",
7
- "eos_token": "<|im_end|>",
 
 
 
 
 
 
 
 
 
 
 
 
8
  "pad_token": {
9
  "content": "<unk>",
10
  "lstrip": false,
 
3
  "<start_of_turn>",
4
  "<end_of_turn>"
5
  ],
6
+ "bos_token": {
7
+ "content": "<bos>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<end_of_turn>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
  "pad_token": {
21
  "content": "<unk>",
22
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8d0799ced231dd489d7946291b0ffc36aede4ec352cfb98c7ae1555f55874ab
3
- size 34362989
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b297c2611e679fa3ad2e8e77a7894870338ee92c2bbfbeff2d5b65496c47fd
3
+ size 34363057
tokenizer_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "add_bos_token": false,
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
@@ -2001,31 +2001,15 @@
2001
  "rstrip": false,
2002
  "single_word": false,
2003
  "special": true
2004
- },
2005
- "256001": {
2006
- "content": "<|im_start|>",
2007
- "lstrip": false,
2008
- "normalized": false,
2009
- "rstrip": false,
2010
- "single_word": false,
2011
- "special": true
2012
- },
2013
- "256002": {
2014
- "content": "<|im_end|>",
2015
- "lstrip": false,
2016
- "normalized": false,
2017
- "rstrip": false,
2018
- "single_word": false,
2019
- "special": true
2020
  }
2021
  },
2022
  "additional_special_tokens": [
2023
  "<start_of_turn>",
2024
  "<end_of_turn>"
2025
  ],
2026
- "bos_token": "<|im_start|>",
2027
  "clean_up_tokenization_spaces": false,
2028
- "eos_token": "<|im_end|>",
2029
  "extra_special_tokens": {},
2030
  "model_max_length": 8192,
2031
  "pad_token": "<unk>",
@@ -2033,7 +2017,7 @@
2033
  "processor_class": "LlavaNextProcessor",
2034
  "sp_model_kwargs": {},
2035
  "spaces_between_special_tokens": false,
2036
- "tokenizer_class": "GemmaTokenizer",
2037
  "unk_token": "<unk>",
2038
  "use_default_system_prompt": false
2039
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
 
2001
  "rstrip": false,
2002
  "single_word": false,
2003
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2004
  }
2005
  },
2006
  "additional_special_tokens": [
2007
  "<start_of_turn>",
2008
  "<end_of_turn>"
2009
  ],
2010
+ "bos_token": "<bos>",
2011
  "clean_up_tokenization_spaces": false,
2012
+ "eos_token": "<end_of_turn>",
2013
  "extra_special_tokens": {},
2014
  "model_max_length": 8192,
2015
  "pad_token": "<unk>",
 
2017
  "processor_class": "LlavaNextProcessor",
2018
  "sp_model_kwargs": {},
2019
  "spaces_between_special_tokens": false,
2020
+ "tokenizer_class": "GemmaTokenizerFast",
2021
  "unk_token": "<unk>",
2022
  "use_default_system_prompt": false
2023
  }