Vageesh1 commited on
Commit
2ea1a93
·
1 Parent(s): f2e369d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +54 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import skimage.io as io
4
+ from PIL import Image
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import cv2
9
+ from skimage.filters import threshold_local
10
+ import pytesseract
11
+ import re
12
+
13
+ from pytesseract import Output
14
+
15
+ def plot_gray(image):
16
+ plt.figure(figsize=(16,10))
17
+ return plt.imshow(image, cmap='Greys_r')
18
+
19
+ def plot_rgb(image):
20
+ plt.figure(figsize=(16,10))
21
+ return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
22
+ def bw_scanner(image):
23
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
24
+ T = threshold_local(gray, 21, offset = 5, method = "gaussian")
25
+ return (gray > T).astype("uint8") * 255
26
+
27
+ def text_box_detection(image):
28
+ d = pytesseract.image_to_data(image, output_type=Output.DICT)
29
+ n_boxes = len(d['level'])
30
+ boxes = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)
31
+ for i in range(n_boxes):
32
+ (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
33
+ boxes = cv2.rectangle(boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)
34
+
35
+ return boxes
36
+
37
+ def ui():
38
+ st.markdown("# Text Extraction")
39
+ uploaded_file = st.file_uploader("Upload an Image", type=['png', 'jpeg', 'jpg'])
40
+ if uploaded_file is not None:
41
+ image = Image.open(uploaded_file)
42
+ img_array = np.array(image)
43
+ gray_image=bw_scanner(img_array)
44
+ boxes=text_box_detection(gray_image)
45
+ st.image(boxes, width = 500, channels = 'RGB')
46
+ extracted_text = pytesseract.image_to_string(img_array)
47
+ st.markdown(f"Predicted Text {extracted_text}")
48
+
49
+ if __name__ == '__main__':
50
+ ui()
51
+
52
+
53
+
54
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ streamlit
3
+ scikit-image
4
+ pillow
5
+ numpy
6
+ matplotlib
7
+ seaborn
8
+ opencv-python
9
+ pytesseract
10
+ apt install tesseract-ocr