Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import numpy as np
|
|
| 9 |
from PIL import Image
|
| 10 |
from transformers import (
|
| 11 |
Qwen2VLForConditionalGeneration,
|
|
|
|
| 12 |
AutoProcessor,
|
| 13 |
TextIteratorStreamer,
|
| 14 |
)
|
|
@@ -58,7 +59,7 @@ def downsample_video(video_path):
|
|
| 58 |
return frames
|
| 59 |
|
| 60 |
# Model and Processor Setup
|
| 61 |
-
QV_MODEL_ID = "
|
| 62 |
qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
|
| 63 |
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 64 |
QV_MODEL_ID,
|
|
@@ -66,9 +67,9 @@ qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
| 66 |
torch_dtype=torch.float16
|
| 67 |
).to("cuda").eval()
|
| 68 |
|
| 69 |
-
COREOCR_MODEL_ID = "prithivMLmods/
|
| 70 |
coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
|
| 71 |
-
coreocr_model =
|
| 72 |
COREOCR_MODEL_ID,
|
| 73 |
trust_remote_code=True,
|
| 74 |
torch_dtype=torch.bfloat16
|
|
@@ -117,11 +118,11 @@ def model_inference(message, history, use_coreocr):
|
|
| 117 |
if use_coreocr:
|
| 118 |
processor = coreocr_processor
|
| 119 |
model = coreocr_model
|
| 120 |
-
model_name = "
|
| 121 |
else:
|
| 122 |
processor = qwen_processor
|
| 123 |
model = qwen_model
|
| 124 |
-
model_name = "
|
| 125 |
|
| 126 |
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 127 |
all_images = [item["image"] for item in content if item["type"] == "image"]
|
|
@@ -165,7 +166,7 @@ demo = gr.ChatInterface(
|
|
| 165 |
multimodal=True,
|
| 166 |
cache_examples=False,
|
| 167 |
theme="bethecloud/storj_theme",
|
| 168 |
-
additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use
|
| 169 |
)
|
| 170 |
|
| 171 |
demo.launch(debug=True, ssr_mode=False)
|
|
|
|
| 9 |
from PIL import Image
|
| 10 |
from transformers import (
|
| 11 |
Qwen2VLForConditionalGeneration,
|
| 12 |
+
Qwen2_5_VLForConditionalGeneration,
|
| 13 |
AutoProcessor,
|
| 14 |
TextIteratorStreamer,
|
| 15 |
)
|
|
|
|
| 59 |
return frames
|
| 60 |
|
| 61 |
# Model and Processor Setup
|
| 62 |
+
QV_MODEL_ID = "prithivMLmods/coreOCR-7B-050325-preview"
|
| 63 |
qwen_processor = AutoProcessor.from_pretrained(QV_MODEL_ID, trust_remote_code=True)
|
| 64 |
qwen_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 65 |
QV_MODEL_ID,
|
|
|
|
| 67 |
torch_dtype=torch.float16
|
| 68 |
).to("cuda").eval()
|
| 69 |
|
| 70 |
+
COREOCR_MODEL_ID = "prithivMLmods/docscopeOCR-7B-050425-exp"
|
| 71 |
coreocr_processor = AutoProcessor.from_pretrained(COREOCR_MODEL_ID, trust_remote_code=True)
|
| 72 |
+
coreocr_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 73 |
COREOCR_MODEL_ID,
|
| 74 |
trust_remote_code=True,
|
| 75 |
torch_dtype=torch.bfloat16
|
|
|
|
| 118 |
if use_coreocr:
|
| 119 |
processor = coreocr_processor
|
| 120 |
model = coreocr_model
|
| 121 |
+
model_name = "DocScopeOCR"
|
| 122 |
else:
|
| 123 |
processor = qwen_processor
|
| 124 |
model = qwen_model
|
| 125 |
+
model_name = "CoreOCR"
|
| 126 |
|
| 127 |
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 128 |
all_images = [item["image"] for item in content if item["type"] == "image"]
|
|
|
|
| 166 |
multimodal=True,
|
| 167 |
cache_examples=False,
|
| 168 |
theme="bethecloud/storj_theme",
|
| 169 |
+
additional_inputs=[gr.Checkbox(label="Use CoreOCR", value=True, info="Check to use CoreOCR, uncheck to use DocScopeOCR")],
|
| 170 |
)
|
| 171 |
|
| 172 |
demo.launch(debug=True, ssr_mode=False)
|