File size: 1,628 Bytes
2ea1a93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
import streamlit as st
import skimage.io as io
from PIL import Image
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import cv2 
from skimage.filters import threshold_local
import pytesseract
import re

from pytesseract import Output

def plot_gray(image):
    plt.figure(figsize=(16,10))
    return plt.imshow(image, cmap='Greys_r')

def plot_rgb(image):
    plt.figure(figsize=(16,10))
    return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
def bw_scanner(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    T = threshold_local(gray, 21, offset = 5, method = "gaussian")
    return (gray > T).astype("uint8") * 255

def text_box_detection(image):
  d = pytesseract.image_to_data(image, output_type=Output.DICT)
  n_boxes = len(d['level'])
  boxes = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)
  for i in range(n_boxes):
      (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])    
      boxes = cv2.rectangle(boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)
      
  return boxes

def ui(): 
    st.markdown("# Text Extraction")
    uploaded_file = st.file_uploader("Upload an Image", type=['png', 'jpeg', 'jpg'])
    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        img_array = np.array(image)
        gray_image=bw_scanner(img_array)
        boxes=text_box_detection(gray_image)
        st.image(boxes, width = 500, channels = 'RGB')
        extracted_text = pytesseract.image_to_string(img_array)
        st.markdown(f"Predicted Text {extracted_text}") 

if __name__ == '__main__':
    ui()