davanstrien HF Staff commited on
Commit
40f1c08
·
1 Parent(s): 1411932

Fix TOKENIZER initialization for vLLM compatibility

Browse files

- Initialize tokenizer at module import time in config.py
- Remove redundant tokenizer initialization from main.py
- Fixes AttributeError during vLLM AsyncEngine profile run

Files changed (2) hide show
  1. config.py +3 -2
  2. main.py +1 -4
config.py CHANGED
@@ -24,8 +24,9 @@ OUTPUT_PATH = ''
24
  # Default prompt
25
  PROMPT = '<image>\n<|grounding|>Convert the document to markdown.'
26
 
27
- # Tokenizer (will be initialized in main.py)
28
- TOKENIZER = None
 
29
 
30
 
31
  def set_resolution_mode(mode: str):
 
24
  # Default prompt
25
  PROMPT = '<image>\n<|grounding|>Convert the document to markdown.'
26
 
27
+ # Tokenizer - initialized at import time for vLLM compatibility
28
+ from transformers import AutoTokenizer
29
+ TOKENIZER = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
30
 
31
 
32
  def set_resolution_mode(mode: str):
main.py CHANGED
@@ -20,7 +20,6 @@ from datasets import load_dataset
20
  from huggingface_hub import DatasetCard, login
21
  from PIL import Image, ImageOps
22
  from tqdm.auto import tqdm
23
- from transformers import AutoTokenizer
24
  from vllm import AsyncLLMEngine, SamplingParams
25
  from vllm.engine.arg_utils import AsyncEngineArgs
26
  from vllm.model_executor.models.registry import ModelRegistry
@@ -256,9 +255,7 @@ async def main_async(
256
  # Set up config for resolution mode
257
  setup_config(resolution_mode)
258
 
259
- # Initialize tokenizer
260
- logger.info(f"Loading tokenizer from {model}...")
261
- config.TOKENIZER = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
262
  config.MODEL_PATH = model
263
  config.PROMPT = prompt
264
 
 
20
  from huggingface_hub import DatasetCard, login
21
  from PIL import Image, ImageOps
22
  from tqdm.auto import tqdm
 
23
  from vllm import AsyncLLMEngine, SamplingParams
24
  from vllm.engine.arg_utils import AsyncEngineArgs
25
  from vllm.model_executor.models.registry import ModelRegistry
 
255
  # Set up config for resolution mode
256
  setup_config(resolution_mode)
257
 
258
+ # Set model and prompt (tokenizer already initialized in config.py)
 
 
259
  config.MODEL_PATH = model
260
  config.PROMPT = prompt
261