Upload tokenizer_config.json with huggingface_hub
Browse files- tokenizer_config.json +3 -2
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 | 
             
              "architectures": [
         | 
| 3 | 
             
                "GPT2LMHeadModel"
         | 
| 4 | 
             
              ],
         | 
| 5 | 
            -
              "model_max_length":  | 
| 6 | 
             
              "tokenizer_class": "HYTokenizer",
         | 
| 7 | 
             
              "auto_map": {
         | 
| 8 | 
             
                "AutoTokenizer": [
         | 
| @@ -11,8 +11,9 @@ | |
| 11 | 
             
                  ]
         | 
| 12 | 
             
              },
         | 
| 13 | 
             
              "eos_token": "<|eos|>",
         | 
|  | |
| 14 | 
             
              "model_type": "gpt2",
         | 
| 15 | 
             
              "additional_special_tokens": ["<|startoftext|>", "<|extra_0|>", "<|extra_4|>", "<|extra_5|>", "<|eos|>"],
         | 
| 16 | 
             
              "pad_token": "<|pad|>",
         | 
| 17 | 
            -
              "chat_template": "{% set  | 
| 18 | 
             
            }
         | 
|  | |
| 2 | 
             
              "architectures": [
         | 
| 3 | 
             
                "GPT2LMHeadModel"
         | 
| 4 | 
             
              ],
         | 
| 5 | 
            +
              "model_max_length": 262144,
         | 
| 6 | 
             
              "tokenizer_class": "HYTokenizer",
         | 
| 7 | 
             
              "auto_map": {
         | 
| 8 | 
             
                "AutoTokenizer": [
         | 
|  | |
| 11 | 
             
                  ]
         | 
| 12 | 
             
              },
         | 
| 13 | 
             
              "eos_token": "<|eos|>",
         | 
| 14 | 
            +
              "bos_token": "<|startoftext|>",
         | 
| 15 | 
             
              "model_type": "gpt2",
         | 
| 16 | 
             
              "additional_special_tokens": ["<|startoftext|>", "<|extra_0|>", "<|extra_4|>", "<|extra_5|>", "<|eos|>"],
         | 
| 17 | 
             
              "pad_token": "<|pad|>",
         | 
| 18 | 
            +
              "chat_template": "{%- if not add_generation_prompt is defined %}\n    {%- set add_generation_prompt = false %}\n{%- endif %}\n{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_first_user=true, is_last_user=false) %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n        {%- if ns.is_first_sp %}\n            {%- set ns.system_prompt = ns.system_prompt + message['content'] %}\n            {%- set ns.is_first_sp = false %}\n        {%- else %}\n            {%- set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{{- bos_token }}\n{{- ns.system_prompt }}\n{%- if tools %}\n    {%- if ns.system_prompt != '' %}\n        {{- '\n\n# Tools\n\nYou may call one or more functions to assist with the user query.' }}\n    {%- else %}\n        {{- '# Tools\n\nYou may call one or more functions to assist with the user query.' }}\n    {%- endif %}\n    {{- '\n\nYou are provided with function signatures within <tools></tools> XML tags:' }}\n    {{- '\n<tools>\n' }}\n    {%- for tool in tools %}\n        {%- if loop.index0 > 0 %}\n            {{- '\n' }}\n        {%- endif %}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- '\n</tools>\n\n' }}\n    {{- 'For function call returns, you should first print <tool_calls>' }}\n    {{- 'For each function call, you should return object like:\n' }}\n    {{- '<tool_call>function_name\n```json\nfunction_arguments_in_json_format\n```</tool_call>' }}\n    {{- 'At the end of function call returns, you should print </tool_calls>' }}\n{%- endif %}\n{%- if ns.system_prompt != '' or tools %}\n    {{- '<|extra_4|>' }}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'user' %}\n        {%- set ns.is_tool = false %}\n        {%- set ns.is_first = false %}\n        {%- set ns.is_last_user = true %}\n        {%- if ns.is_first_user %}\n            {{- message['content'] + '<|extra_0|>' }}\n            {%- set ns.is_first_user = false %}\n        {%- else %}\n            {{- bos_token + message['content'] + '<|extra_0|>' }}\n        {%- endif %}\n    {%- endif %}\n    {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}\n        {%- set ns.is_last_user = false %}\n        {%- if ns.is_tool %}\n            {{- '</tool_responses>' + '<|extra_0|>' }}\n        {%- endif %}\n        {%- set ns.is_first = false %}\n        {%- set ns.is_tool = false %}\n        {%- set ns.is_output_first = true %}\n        {%- for tool in message['tool_calls'] %}\n            {%- set arguments = tool['function']['arguments'] %}\n            {%- if arguments is not string %}\n                {%- set arguments = arguments | tojson %}\n            {%- endif %}\n            {%- if not ns.is_first %}\n                {%- if message['content'] is none %}\n                    {{- '<tool_calls><tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n                {%- else %}\n                    {{- message['content'] + '<tool_calls><tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n                {%- endif %}\n            {%- set ns.is_first = true %}\n            {%- else %}\n                {{- '\n' + '<tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n            {%- endif %}\n        {%- endfor %}\n        {{- '</tool_calls>' + eos_token }}\n    {%- endif %}\n    {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}\n        {%- set content = message['content'] %}\n        {%- if '<answer>' in content and not loop.last %}\n            {%- set content = content.split('<answer>')[-1].strip('</answer>').strip() %}\n        {%- endif %}\n        {%- set ns.is_last_user = false %}\n        {%- if ns.is_tool %}\n            {{- '</tool_responses>' + '<|extra_0|>' + content + eos_token }}\n            {%- set ns.is_tool = false %}\n        {%- else %}\n            {{- content + eos_token }}\n        {%- endif %}\n    {%- endif %}\n    {%- if message['role'] == 'tool' %}\n        {%- set ns.is_last_user = false %}\n        {%- set ns.is_tool = true %}\n        {%- if ns.is_output_first %}\n            {{- bos_token + '<tool_responses><tool_response>' + message['content'] + '</tool_response>' }}\n            {%- set ns.is_output_first = false %}\n        {%- else %}\n            {{- '\n<tool_response>' + message['content'] + '</tool_response>' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if ns.is_tool %}\n    {{- '</tool_responses>' + '<|extra_0|>' }}\n{%- endif %}\n{%- if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}\n    {{- '<|extra_0|>' }}\n{%- endif %}\n{%- if enable_thinking is defined and not enable_thinking %}\n    {{- '<think>\n\n</think>\n' }}\n{%- endif %}"
         | 
| 19 | 
             
            }
         | 

