Spaces:
Build error
Build error
| import pandas as pd | |
| import streamlit as st | |
| import skimage.io as io | |
| from PIL import Image | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import cv2 | |
| from skimage.filters import threshold_local | |
| import pytesseract | |
| import re | |
| import os | |
| from pytesseract import Output | |
| os.system('apt-get install tesseract-ocr') | |
| def plot_gray(image): | |
| plt.figure(figsize=(16,10)) | |
| return plt.imshow(image, cmap='Greys_r') | |
| def plot_rgb(image): | |
| plt.figure(figsize=(16,10)) | |
| return plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
| def bw_scanner(image): | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| T = threshold_local(gray, 21, offset = 5, method = "gaussian") | |
| return (gray > T).astype("uint8") * 255 | |
| def text_box_detection(image): | |
| d = pytesseract.image_to_data(image, output_type=Output.DICT) | |
| n_boxes = len(d['level']) | |
| boxes = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB) | |
| for i in range(n_boxes): | |
| (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) | |
| boxes = cv2.rectangle(boxes, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
| return boxes | |
| def ui(): | |
| st.markdown("# Text Extraction") | |
| uploaded_file = st.file_uploader("Upload an Image", type=['png', 'jpeg', 'jpg']) | |
| if uploaded_file is not None: | |
| image = Image.open(uploaded_file) | |
| img_array = np.array(image) | |
| gray_image=bw_scanner(img_array) | |
| boxes=text_box_detection(gray_image) | |
| st.image(boxes, width = 500, channels = 'RGB') | |
| extracted_text = pytesseract.image_to_string(img_array) | |
| st.markdown(f"Predicted Text {extracted_text}") | |
| if __name__ == '__main__': | |
| ui() | |