Revert PR 56
#65
by
						
xwjabc
	
							
						- opened
							
					
- chat_template.json +0 -3
- processor_config.json +6 -0
- tokenizer.json +2 -2
- tokenizer_config.json +10 -8
- vocab.json +0 -0
    	
        chat_template.json
    DELETED
    
    | @@ -1,3 +0,0 @@ | |
| 1 | 
            -
            {
         | 
| 2 | 
            -
              "chat_template": "{% for message in messages %}{{ '<|' + message['role'] + '|>' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'audio' %}{{ '<|audio|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% endif %}{{ '<|end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}"
         | 
| 3 | 
            -
            }
         | 
|  | |
|  | |
|  | |
|  | 
    	
        processor_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "auto_map": {
         | 
| 3 | 
            +
                "AutoProcessor": "processing_phi4mm.Phi4MMProcessor"
         | 
| 4 | 
            +
              },
         | 
| 5 | 
            +
              "processor_class": "Phi4MMProcessor"
         | 
| 6 | 
            +
            }
         | 
    	
        tokenizer.json
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:4c1b9f641d4f8b7247b8d5007dd3b6a9f6a87cb5123134fe0d326f14d10c0585
         | 
| 3 | 
            +
            size 15524479
         | 
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 | 
             
              "add_prefix_space": false,
         | 
| 3 | 
             
              "added_tokens_decoder": {
         | 
| 4 | 
             
                "200010": {
         | 
| 5 | 
            -
                  "content": "<| | 
| 6 | 
             
                  "lstrip": false,
         | 
| 7 | 
             
                  "normalized": false,
         | 
| 8 | 
             
                  "rstrip": false,
         | 
| @@ -10,7 +10,15 @@ | |
| 10 | 
             
                  "special": true
         | 
| 11 | 
             
                },
         | 
| 12 | 
             
                "200011": {
         | 
| 13 | 
            -
                  "content": "<| | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 14 | 
             
                  "lstrip": false,
         | 
| 15 | 
             
                  "normalized": false,
         | 
| 16 | 
             
                  "rstrip": false,
         | 
| @@ -106,16 +114,10 @@ | |
| 106 | 
             
                  "special": true
         | 
| 107 | 
             
                }
         | 
| 108 | 
             
              },
         | 
| 109 | 
            -
              "audio_token": "<|audio|>",
         | 
| 110 | 
             
              "bos_token": "<|endoftext|>",
         | 
| 111 | 
             
              "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
         | 
| 112 | 
             
              "clean_up_tokenization_spaces": false,
         | 
| 113 | 
             
              "eos_token": "<|endoftext|>",
         | 
| 114 | 
            -
              "extra_special_tokens": {
         | 
| 115 | 
            -
                "audio_token": "<|audio|>",
         | 
| 116 | 
            -
                "image_token": "<|image|>"
         | 
| 117 | 
            -
              },
         | 
| 118 | 
            -
              "image_token": "<|image|>",
         | 
| 119 | 
             
              "model_max_length": 131072,
         | 
| 120 | 
             
              "pad_token": "<|endoftext|>",
         | 
| 121 | 
             
              "tokenizer_class": "GPT2TokenizerFast",
         | 
|  | |
| 2 | 
             
              "add_prefix_space": false,
         | 
| 3 | 
             
              "added_tokens_decoder": {
         | 
| 4 | 
             
                "200010": {
         | 
| 5 | 
            +
                  "content": "<|endoftext10|>",
         | 
| 6 | 
             
                  "lstrip": false,
         | 
| 7 | 
             
                  "normalized": false,
         | 
| 8 | 
             
                  "rstrip": false,
         | 
|  | |
| 10 | 
             
                  "special": true
         | 
| 11 | 
             
                },
         | 
| 12 | 
             
                "200011": {
         | 
| 13 | 
            +
                  "content": "<|endoftext11|>",
         | 
| 14 | 
            +
                  "lstrip": false,
         | 
| 15 | 
            +
                  "normalized": false,
         | 
| 16 | 
            +
                  "rstrip": false,
         | 
| 17 | 
            +
                  "single_word": false,
         | 
| 18 | 
            +
                  "special": true
         | 
| 19 | 
            +
                },
         | 
| 20 | 
            +
                "199999": {
         | 
| 21 | 
            +
                  "content": "<|endoftext|>",
         | 
| 22 | 
             
                  "lstrip": false,
         | 
| 23 | 
             
                  "normalized": false,
         | 
| 24 | 
             
                  "rstrip": false,
         | 
|  | |
| 114 | 
             
                  "special": true
         | 
| 115 | 
             
                }
         | 
| 116 | 
             
              },
         | 
|  | |
| 117 | 
             
              "bos_token": "<|endoftext|>",
         | 
| 118 | 
             
              "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
         | 
| 119 | 
             
              "clean_up_tokenization_spaces": false,
         | 
| 120 | 
             
              "eos_token": "<|endoftext|>",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 121 | 
             
              "model_max_length": 131072,
         | 
| 122 | 
             
              "pad_token": "<|endoftext|>",
         | 
| 123 | 
             
              "tokenizer_class": "GPT2TokenizerFast",
         | 
    	
        vocab.json
    CHANGED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
