Spaces:

prithivMLmods
/

core-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on May 21

Commit

a5e1c7c

verified ·

1 Parent(s): ddae8ac

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -6

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpy as np
 from PIL import Image
 from transformers import (
     Qwen2VLForConditionalGeneration,
     AutoProcessor,
     TextIteratorStreamer,
 )
@@ -58,7 +59,7 @@ def downsample_video(video_path):
     return frames
 # Model and Processor Setup
-QV_MODEL_ID = "Qwen/Qwen2-VL-7B-Instruct"
 qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
 qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
     QV_MODEL_ID,
@@ -66,9 +67,9 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to("cuda").eval()
-COREOCR_MODEL_ID = "prithivMLmods/coreOCR-7B-050325-preview"
 coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
-coreocr_model = Qwen2VLForConditionalGeneration.from_pretrained(
     COREOCR_MODEL_ID,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
@@ -117,11 +118,11 @@ def model_inference(message, history, use_coreocr):
     if use_coreocr:
         processor = coreocr_processor
         model = coreocr_model
-        model_name = "CoreOCR"
     else:
         processor = qwen_processor
         model = qwen_model
-        model_name = "Qwen2VL OCR"
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     all_images = [item["image"] for item in content if item["type"] == "image"]
@@ -165,7 +166,7 @@ demo = gr.ChatInterface(
     multimodal=True,
     cache_examples=False,
     theme="bethecloud/storj_theme",
-    additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use Qwen2VL OCR")],
 )
 demo.launch(debug=True, ssr_mode=False)

 from PIL import Image
 from transformers import (
     Qwen2VLForConditionalGeneration,
+    Qwen2_5_VLForConditionalGeneration,
     AutoProcessor,
     TextIteratorStreamer,
 )
     return frames
 # Model and Processor Setup
+QV_MODEL_ID = "prithivMLmods/coreOCR-7B-050325-preview"
 qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
 qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
     QV_MODEL_ID,
     torch_dtype=torch.float16
 ).to("cuda").eval()
+COREOCR_MODEL_ID = "prithivMLmods/docscopeOCR-7B-050425-exp"
 coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
+coreocr_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     COREOCR_MODEL_ID,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
     if use_coreocr:
         processor = coreocr_processor
         model = coreocr_model
+        model_name = "DocScopeOCR"
     else:
         processor = qwen_processor
         model = qwen_model
+        model_name = "CoreOCR"
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     all_images = [item["image"] for item in content if item["type"] == "image"]
     multimodal=True,
     cache_examples=False,
     theme="bethecloud/storj_theme",
+    additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use DocScopeOCR")],
 )
 demo.launch(debug=True, ssr_mode=False)