Spaces:
Runtime error
Runtime error
auto adj bbox width
Browse files- app.py +14 -29
- imgs/saved_image_demo.png +0 -0
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from typing import Optional
|
| 2 |
-
import spaces
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import numpy as np
|
|
@@ -25,31 +25,6 @@ caption_model_processor = {'processor': processor, 'model': model}
|
|
| 25 |
print('finish loading model!!!')
|
| 26 |
|
| 27 |
|
| 28 |
-
platform = 'pc'
|
| 29 |
-
if platform == 'pc':
|
| 30 |
-
draw_bbox_config = {
|
| 31 |
-
'text_scale': 0.8,
|
| 32 |
-
'text_thickness': 2,
|
| 33 |
-
'text_padding': 2,
|
| 34 |
-
'thickness': 2,
|
| 35 |
-
}
|
| 36 |
-
elif platform == 'web':
|
| 37 |
-
draw_bbox_config = {
|
| 38 |
-
'text_scale': 0.8,
|
| 39 |
-
'text_thickness': 2,
|
| 40 |
-
'text_padding': 3,
|
| 41 |
-
'thickness': 3,
|
| 42 |
-
}
|
| 43 |
-
elif platform == 'mobile':
|
| 44 |
-
draw_bbox_config = {
|
| 45 |
-
'text_scale': 0.8,
|
| 46 |
-
'text_thickness': 2,
|
| 47 |
-
'text_padding': 3,
|
| 48 |
-
'thickness': 3,
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
MARKDOWN = """
|
| 54 |
# OmniParser for Pure Vision Based General GUI Agent 🔥
|
| 55 |
<div>
|
|
@@ -59,6 +34,8 @@ MARKDOWN = """
|
|
| 59 |
</div>
|
| 60 |
|
| 61 |
OmniParser is a screen parsing tool to convert general GUI screen to structured elements.
|
|
|
|
|
|
|
| 62 |
"""
|
| 63 |
|
| 64 |
# DEVICE = torch.device('cuda')
|
|
@@ -66,7 +43,7 @@ OmniParser is a screen parsing tool to convert general GUI screen to structured
|
|
| 66 |
# @spaces.GPU
|
| 67 |
@torch.inference_mode()
|
| 68 |
# @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
| 69 |
-
@spaces.GPU(duration=65)
|
| 70 |
def process(
|
| 71 |
image_input,
|
| 72 |
box_threshold,
|
|
@@ -76,6 +53,14 @@ def process(
|
|
| 76 |
image_save_path = 'imgs/saved_image_demo.png'
|
| 77 |
image_input.save(image_save_path)
|
| 78 |
# import pdb; pdb.set_trace()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
|
| 81 |
text, ocr_bbox = ocr_bbox_rslt
|
|
@@ -117,5 +102,5 @@ with gr.Blocks() as demo:
|
|
| 117 |
)
|
| 118 |
|
| 119 |
# demo.launch(debug=False, show_error=True, share=True)
|
| 120 |
-
|
| 121 |
-
demo.queue().launch(share=False)
|
|
|
|
| 1 |
from typing import Optional
|
| 2 |
+
# import spaces
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import numpy as np
|
|
|
|
| 25 |
print('finish loading model!!!')
|
| 26 |
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
MARKDOWN = """
|
| 29 |
# OmniParser for Pure Vision Based General GUI Agent 🔥
|
| 30 |
<div>
|
|
|
|
| 34 |
</div>
|
| 35 |
|
| 36 |
OmniParser is a screen parsing tool to convert general GUI screen to structured elements.
|
| 37 |
+
|
| 38 |
+
📢 [[Project Page](https://microsoft.github.io/OmniParser/)] [[Blog Post](https://www.microsoft.com/en-us/research/articles/omniparser-for-pure-vision-based-gui-agent/)] [[Models](https://huggingface.co/microsoft/OmniParser)]
|
| 39 |
"""
|
| 40 |
|
| 41 |
# DEVICE = torch.device('cuda')
|
|
|
|
| 43 |
# @spaces.GPU
|
| 44 |
@torch.inference_mode()
|
| 45 |
# @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
| 46 |
+
# @spaces.GPU(duration=65)
|
| 47 |
def process(
|
| 48 |
image_input,
|
| 49 |
box_threshold,
|
|
|
|
| 53 |
image_save_path = 'imgs/saved_image_demo.png'
|
| 54 |
image_input.save(image_save_path)
|
| 55 |
# import pdb; pdb.set_trace()
|
| 56 |
+
image = Image.open(image_save_path)
|
| 57 |
+
box_overlay_ratio = image.size[0] / 3200
|
| 58 |
+
draw_bbox_config = {
|
| 59 |
+
'text_scale': 0.8 * box_overlay_ratio,
|
| 60 |
+
'text_thickness': max(int(2 * box_overlay_ratio), 1),
|
| 61 |
+
'text_padding': max(int(3 * box_overlay_ratio), 1),
|
| 62 |
+
'thickness': max(int(3 * box_overlay_ratio), 1),
|
| 63 |
+
}
|
| 64 |
|
| 65 |
ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=True)
|
| 66 |
text, ocr_bbox = ocr_bbox_rslt
|
|
|
|
| 102 |
)
|
| 103 |
|
| 104 |
# demo.launch(debug=False, show_error=True, share=True)
|
| 105 |
+
demo.launch(share=True, server_port=7861, server_name='0.0.0.0')
|
| 106 |
+
# demo.queue().launch(share=False)
|
imgs/saved_image_demo.png
CHANGED
|
|