Spaces:
Runtime error
Runtime error
Ankur Goyal
commited on
Commit
·
1af0b6d
1
Parent(s):
2919076
Draw a box over the answer
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
| 2 |
|
| 3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 4 |
|
|
|
|
| 5 |
import streamlit as st
|
| 6 |
|
| 7 |
import torch
|
|
@@ -24,8 +25,28 @@ def construct_pipeline():
|
|
| 24 |
|
| 25 |
|
| 26 |
@st.cache
|
| 27 |
-
def run_pipeline(question, document):
|
| 28 |
-
return construct_pipeline()(question=question, **document.context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
st.markdown("# DocQuery: Query Documents w/ NLP")
|
|
@@ -75,16 +96,30 @@ question = st.text_input("QUESTION", "")
|
|
| 75 |
document = st.session_state.document
|
| 76 |
loading_placeholder = st.empty()
|
| 77 |
if document is not None:
|
| 78 |
-
col1, col2 = st.columns(
|
| 79 |
-
|
| 80 |
|
| 81 |
-
if document is not None and question is not None and len(question) > 0:
|
| 82 |
-
predictions = run_pipeline(question=question, document=document)
|
| 83 |
|
|
|
|
|
|
|
| 84 |
col2.header("Answers")
|
| 85 |
-
for p in ensure_list(predictions):
|
| 86 |
-
col2.subheader(f"{ p['answer'] }: ({round(p['score'] * 100, 1)}%)")
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
|
| 90 |
|
|
|
|
| 2 |
|
| 3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 4 |
|
| 5 |
+
from PIL import ImageDraw
|
| 6 |
import streamlit as st
|
| 7 |
|
| 8 |
import torch
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
@st.cache
|
| 28 |
+
def run_pipeline(question, document, top_k):
|
| 29 |
+
return construct_pipeline()(question=question, **document.context, top_k=top_k)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# TODO: Move into docquery
|
| 33 |
+
# TODO: Support words past the first page (or window?)
|
| 34 |
+
def lift_word_boxes(document):
|
| 35 |
+
return document.context["image"][0][1]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def expand_bbox(word_boxes):
|
| 39 |
+
if len(word_boxes) == 0:
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
|
| 43 |
+
return [min(min_x), min(min_y), max(max_x), max(max_y)]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# LayoutLM boxes are normalized to 0, 1000
|
| 47 |
+
def normalize_bbox(box, width, height):
|
| 48 |
+
pct = [c / 1000 for c in box]
|
| 49 |
+
return [pct[0] * width, pct[1] * height, pct[2] * width, pct[3] * height]
|
| 50 |
|
| 51 |
|
| 52 |
st.markdown("# DocQuery: Query Documents w/ NLP")
|
|
|
|
| 96 |
document = st.session_state.document
|
| 97 |
loading_placeholder = st.empty()
|
| 98 |
if document is not None:
|
| 99 |
+
col1, col2 = st.columns([3, 1])
|
| 100 |
+
image = document.preview
|
| 101 |
|
|
|
|
|
|
|
| 102 |
|
| 103 |
+
colors = ["blue", "red", "green"]
|
| 104 |
+
if document is not None and question is not None and len(question) > 0:
|
| 105 |
col2.header("Answers")
|
|
|
|
|
|
|
| 106 |
|
| 107 |
+
predictions = run_pipeline(question=question, document=document, top_k=1)
|
| 108 |
+
|
| 109 |
+
word_boxes = lift_word_boxes(document)
|
| 110 |
+
image = image.copy()
|
| 111 |
+
draw = ImageDraw.Draw(image)
|
| 112 |
+
for i, p in enumerate(ensure_list(predictions)):
|
| 113 |
+
col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
|
| 114 |
+
x1, y1, x2, y2 = normalize_bbox(
|
| 115 |
+
expand_bbox(word_boxes[p["start"] : p["end"] + 1]),
|
| 116 |
+
image.width,
|
| 117 |
+
image.height,
|
| 118 |
+
)
|
| 119 |
+
draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i])
|
| 120 |
+
|
| 121 |
+
if document is not None:
|
| 122 |
+
col1.image(image, use_column_width=True)
|
| 123 |
|
| 124 |
"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
|
| 125 |
|