Spaces:
Runtime error
Runtime error
update space
Browse files- .gitignore +2 -0
- app.py +37 -15
- requirements.txt +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/.env
|
| 2 |
+
/dummy.py
|
app.py
CHANGED
|
@@ -1,14 +1,26 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
-
os
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
# work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
|
| 5 |
-
os.system("pip uninstall -y gradio")
|
| 6 |
-
os.system("pip install gradio==3.4.1")
|
| 7 |
-
os.system("pip install packaging==21.3")
|
| 8 |
-
os.system(os.environ["DD_ADDONS"])
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
import deepdoctection as dd
|
| 14 |
from deepdoctection.dataflow.serialize import DataFromList
|
|
@@ -21,7 +33,10 @@ from dd_addons.extern.openai import OpenAiLmmTokenClassifier, is_api_key_valid
|
|
| 21 |
|
| 22 |
import gradio as gr
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
demo = gr.Blocks(css="scrollbar.css")
|
| 27 |
|
|
@@ -30,6 +45,9 @@ def process_analyzer(openai_api_key, categories_str, instruction_str, img, pdf,
|
|
| 30 |
if not is_api_key_valid(openai_api_key):
|
| 31 |
return [], {}, "You have entered no or an invalid api key. Please enter a valid api key"
|
| 32 |
categories_list = categories_str.split(",")
|
|
|
|
|
|
|
|
|
|
| 33 |
register_string_categories_from_list(categories_list, "custom_token_classes")
|
| 34 |
custom_token_class = dd.object_types_registry.get("custom_token_classes")
|
| 35 |
print([token_class for token_class in custom_token_class])
|
|
@@ -61,13 +79,15 @@ def process_analyzer(openai_api_key, categories_str, instruction_str, img, pdf,
|
|
| 61 |
|
| 62 |
json_out = {}
|
| 63 |
dpts = []
|
|
|
|
| 64 |
|
| 65 |
for idx, dp in enumerate(df):
|
| 66 |
dpts.append(dp)
|
| 67 |
json_out[f"page_{idx}"] = dp.get_token()
|
|
|
|
| 68 |
|
| 69 |
return [dp.viz(show_cells=False, show_layouts=False, show_tables=False, show_words=True, show_token_class=True, ignore_default_token_class=True)
|
| 70 |
-
for dp in dpts], json_out, "No error"
|
| 71 |
|
| 72 |
|
| 73 |
with demo:
|
|
@@ -125,18 +145,20 @@ with demo:
|
|
| 125 |
with gr.Box():
|
| 126 |
gr.Markdown("<center><strong>JSON</strong></center>")
|
| 127 |
json = gr.JSON()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
with gr.Column():
|
| 129 |
with gr.Box():
|
| 130 |
gr.Markdown("<center><strong>Layout detection</strong></center>")
|
| 131 |
gallery = gr.Gallery(
|
| 132 |
label="Output images", show_label=False, elem_id="gallery"
|
| 133 |
).style(grid=2)
|
| 134 |
-
with gr.Row():
|
| 135 |
-
with gr.Box():
|
| 136 |
-
gr.Markdown("<center><strong>Table</strong></center>")
|
| 137 |
-
html = gr.HTML()
|
| 138 |
|
| 139 |
btn.click(fn=process_analyzer, inputs=[user_token, categories, instruction, inputs, inputs_pdf, max_imgs],
|
| 140 |
-
outputs=[gallery, json, msg])
|
| 141 |
|
| 142 |
demo.launch()
|
|
|
|
| 1 |
+
import time
|
| 2 |
import os
|
| 3 |
+
from os import getcwd, path
|
| 4 |
+
import importlib.metadata
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
def check_additional_requirements():
|
| 9 |
+
if importlib.util.find_spec("detectron2") is None:
|
| 10 |
+
os.system('pip install detectron2@git+https://github.com/facebookresearch/detectron2.git')
|
| 11 |
+
if importlib.util.find_spec("gradio") is not None:
|
| 12 |
+
if importlib.metadata.version("gradio")!="3.4.1":
|
| 13 |
+
os.system("pip uninstall -y gradio")
|
| 14 |
+
os.system("pip install gradio==3.4.1")
|
| 15 |
+
else:
|
| 16 |
+
os.system("pip install gradio==3.4.1")
|
| 17 |
+
os.system(os.environ["DD_ADDONS"])
|
| 18 |
+
return
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
load_dotenv()
|
| 22 |
+
check_additional_requirements()
|
| 23 |
+
|
| 24 |
|
| 25 |
import deepdoctection as dd
|
| 26 |
from deepdoctection.dataflow.serialize import DataFromList
|
|
|
|
| 33 |
|
| 34 |
import gradio as gr
|
| 35 |
|
| 36 |
+
dd.Page.add_attribute_name("raw_json_output")
|
| 37 |
+
analyzer = get_loader(reset_config_file=True, config_overwrite=["OCR.USE_TESSERACT=False",
|
| 38 |
+
"OCR.USE_TEXTRACT=True",
|
| 39 |
+
"WORD_MATCHING.MAX_PARENT_ONLY=True"])
|
| 40 |
|
| 41 |
demo = gr.Blocks(css="scrollbar.css")
|
| 42 |
|
|
|
|
| 45 |
if not is_api_key_valid(openai_api_key):
|
| 46 |
return [], {}, "You have entered no or an invalid api key. Please enter a valid api key"
|
| 47 |
categories_list = categories_str.split(",")
|
| 48 |
+
if not categories_str:
|
| 49 |
+
return [], {}, "You did not enter any entities. Please enter a at least one category."
|
| 50 |
+
|
| 51 |
register_string_categories_from_list(categories_list, "custom_token_classes")
|
| 52 |
custom_token_class = dd.object_types_registry.get("custom_token_classes")
|
| 53 |
print([token_class for token_class in custom_token_class])
|
|
|
|
| 79 |
|
| 80 |
json_out = {}
|
| 81 |
dpts = []
|
| 82 |
+
json_out_raw = {}
|
| 83 |
|
| 84 |
for idx, dp in enumerate(df):
|
| 85 |
dpts.append(dp)
|
| 86 |
json_out[f"page_{idx}"] = dp.get_token()
|
| 87 |
+
json_out_raw[f"page_{idx}"] = dp.raw_json_output
|
| 88 |
|
| 89 |
return [dp.viz(show_cells=False, show_layouts=False, show_tables=False, show_words=True, show_token_class=True, ignore_default_token_class=True)
|
| 90 |
+
for dp in dpts], json_out, json_out_raw, "No error"
|
| 91 |
|
| 92 |
|
| 93 |
with demo:
|
|
|
|
| 145 |
with gr.Box():
|
| 146 |
gr.Markdown("<center><strong>JSON</strong></center>")
|
| 147 |
json = gr.JSON()
|
| 148 |
+
with gr.Box():
|
| 149 |
+
gr.Markdown("<center><strong>ChatGPT output. </strong> <br />"
|
| 150 |
+
"It is possible that ChatGPT answers in an unexpected way, "
|
| 151 |
+
"such that the answer cannot be properly processed. In this case you might get"
|
| 152 |
+
"an empty JSON but you can still see the raw output.</center>")
|
| 153 |
+
json_raw = gr.JSON()
|
| 154 |
with gr.Column():
|
| 155 |
with gr.Box():
|
| 156 |
gr.Markdown("<center><strong>Layout detection</strong></center>")
|
| 157 |
gallery = gr.Gallery(
|
| 158 |
label="Output images", show_label=False, elem_id="gallery"
|
| 159 |
).style(grid=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
btn.click(fn=process_analyzer, inputs=[user_token, categories, instruction, inputs, inputs_pdf, max_imgs],
|
| 162 |
+
outputs=[gallery, json, json_raw, msg])
|
| 163 |
|
| 164 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
Pillow==9.5.0
|
| 2 |
torch==1.12.0
|
| 3 |
torchvision==0.13.0
|
|
|
|
| 1 |
+
python-dotenv
|
| 2 |
Pillow==9.5.0
|
| 3 |
torch==1.12.0
|
| 4 |
torchvision==0.13.0
|