| from .mm_constants import IMAGE_TOKEN_INDEX, IMAGE_PAD_TOKEN_INDEX | |
| def tokenizer_image_token_qwen(prompt, tokenizer, image_token_index, image_token_num=576): | |
| prompt_chunks, tmp = [], [] | |
| for n in prompt: | |
| if n == image_token_index: | |
| prompt_chunks.append(tmp) | |
| tmp = [] | |
| else: | |
| tmp.append(n) | |
| if tmp: prompt_chunks.append(tmp) | |
| input_ids = [] | |
| for i, chunk in enumerate(prompt_chunks): | |
| if i > 0: | |
| input_ids.extend([IMAGE_TOKEN_INDEX] + [IMAGE_PAD_TOKEN_INDEX] * (image_token_num - 1)) | |
| input_ids.extend(chunk) | |
| return input_ids |