Spaces:
Build error
Build error
| from typing import Tuple, List, Sequence, Optional, Union | |
| from torchvision import transforms | |
| from torch import nn, Tensor | |
| from PIL import Image | |
| from pathlib import Path | |
| from bs4 import BeautifulSoup as bs | |
| import numpy as np | |
| import numpy.typing as npt | |
| from numpy import uint8 | |
| ImageType = npt.NDArray[uint8] | |
| from transformers import AutoModelForObjectDetection | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import matplotlib.patches as patches | |
| from matplotlib.patches import Patch | |
| from utils import draw_only_box | |
| from unitable import UnitablePredictor | |
| from ultralyticsplus import YOLO, render_result | |
| from doctrfiles import DoctrWordDetector,DoctrTextRecognizer | |
| from utils import crop_an_Image,cropImageExtraMargin | |
| from utils import denoisingAndSharpening | |
| """ | |
| USES YOLO FOR DETECITON INSTEAD OF TABLE TRANSFORMER | |
| Table TransFORMER | |
| """ | |
| html_table_template = ( | |
| lambda table: f"""<html> | |
| <head> <meta charset="UTF-8"> | |
| <style> | |
| table, th, td {{ | |
| border: 1px solid black; | |
| font-size: 10px; | |
| }} | |
| </style> </head> | |
| <body> | |
| <table frame="hsides" rules="groups" width="100%%"> | |
| {table} | |
| </table> </body> </html>""" | |
| ) | |
| class DetectionAndOcrTable3(): | |
| #This components can take in entire pdf page as input , scan for tables and return the table in html format | |
| #Uses the full unitable model - different to DetectionAndOcrTable1 | |
| def __init__(self,englishFlag = True): | |
| self.unitablePredictor = UnitablePredictor() | |
| self.detector = YOLO('foduucom/table-detection-and-extraction') | |
| # set model parameters | |
| self.detector.overrides['conf'] = 0.25 # NMS confidence threshold | |
| self.detector.overrides['iou'] = 0.45 # NMS IoU threshold | |
| self.detector.overrides['agnostic_nms'] = False # NMS class-agnostic | |
| self.detector.overrides['max_det'] = 1000 # maximum number of detections per image | |
| self.wordDetector = DoctrWordDetector(architecture="db_resnet50", | |
| path_weights="doctrfiles/models/db_resnet50-79bd7d70.pt", | |
| path_config_json ="doctrfiles/models/db_resnet50_config.json") | |
| if englishFlag: | |
| self.textRecognizer = DoctrTextRecognizer(architecture="master", path_weights="./doctrfiles/models/master-fde31e4a.pt", | |
| path_config_json="./doctrfiles/models/master.json") | |
| else: | |
| self.textRecognizer = DoctrTextRecognizer(architecture="parseq", path_weights="./doctrfiles/models/doctr-multilingual-parseq.bin", | |
| path_config_json="./doctrfiles/models/multilingual-parseq-config.json") | |
| def save_detection(detected_lines_images:List[ImageType], prefix = './res/test1/res_'): | |
| i = 0 | |
| for img in detected_lines_images: | |
| pilimg = Image.fromarray(img) | |
| pilimg.save(prefix+str(i)+'.png') | |
| i=i+1 | |
| def build_table_from_html_and_cell( | |
| structure: List[str], content: List[str] = None | |
| ) -> List[str]: | |
| """Build table from html and cell token list""" | |
| assert structure is not None | |
| html_code = list() | |
| # deal with empty table | |
| if content is None: | |
| content = ["placeholder"] * len(structure) | |
| for tag in structure: | |
| if tag in ("<td>[]</td>", ">[]</td>"): | |
| if len(content) == 0: | |
| continue | |
| cell = content.pop(0) | |
| html_code.append(tag.replace("[]", cell)) | |
| else: | |
| html_code.append(tag) | |
| return html_code | |
| """ | |
| Valid 'Boxes' object attributes and properties are: | |
| Attributes: | |
| boxes (torch.Tensor) or (numpy.ndarray): A tensor or numpy array containing the detection boxes, | |
| with shape (num_boxes, 6). | |
| orig_shape (torch.Tensor) or (numpy.ndarray): Original image size, in the format (height, width). | |
| Properties: | |
| xyxy (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format. | |
| conf (torch.Tensor) or (numpy.ndarray): The confidence values of the boxes. | |
| cls (torch.Tensor) or (numpy.ndarray): The class values of the boxes. | |
| xywh (torch.Tensor) or (numpy.ndarray): The boxes in xywh format. | |
| xyxyn (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format normalized by original image size. | |
| xywhn (torch.Tensor) or (numpy.ndarray): The boxes in xywh format normalized by original image size. | |
| """ | |
| # Image is page image | |
| def predict(self,image:Image.Image,debugfolder_filename_page_name = None,denoise =False): | |
| results = self.detector.predict(image) | |
| #Array of bboxes | |
| bbxs = results[0].boxes.xyxy.int().tolist() | |
| #Array of confidences | |
| conf = results[0].boxes.conf.float().tolist() | |
| print(bbxs) | |
| print(conf) | |
| #images_to_recognizer = cropImage(bxs, img) | |
| img_to_save = draw_only_box(image, bbxs) | |
| img_to_save.save(debugfolder_filename_page_name+"detectionBoxRes.png", quality=95) | |
| # we need something to draw the detection | |
| cropped_tables =[] | |
| for i in range (len(bbxs)): | |
| # TODO: find the right confidence and padding values | |
| if conf[i]< 0.65: | |
| continue | |
| padded = [bbxs[i][0]-10,bbxs[i][1]-10,bbxs[i][2]+10,bbxs[i][3]+10] | |
| cropped_table = image.convert("RGB").crop(padded) | |
| cropped_table.save(debugfolder_filename_page_name +"yolo_cropped_table_"+str(i)+".png") | |
| cropped_tables.append(cropped_table) | |
| print("number of cropped tables found: "+str(len(cropped_tables))) | |
| # Step 1: Unitable | |
| #This take PIL Images as input | |
| if cropped_tables != []: | |
| if denoise: | |
| cropped_tables =denoisingAndSharpening(cropped_tables) | |
| pred_htmls, pred_bboxs = self.unitablePredictor.predict(cropped_tables,debugfolder_filename_page_name) | |
| table_codes = [] | |
| for k in range(len(cropped_tables)): | |
| pred_html =pred_htmls[k] | |
| pred_bbox = pred_bboxs[k] | |
| # Some tabless have a lot of words in their header | |
| # So for the headers, give doctr word ddetector doesn't work when the images aren't square | |
| table_header_cells = 0 | |
| header_exists = False | |
| for cell in pred_html: | |
| if cell=='>[]</td>' or cell == '<td>[]</td>': | |
| table_header_cells += 1 | |
| if cell =='</thead>': | |
| header_exists = True | |
| break | |
| if not header_exists: | |
| table_header_cells = 0 | |
| pred_cell = [] | |
| cell_imgs_to_viz = [] | |
| cell_img_num=0 | |
| # Find what one line should be if there is a cell with a single line | |
| one_line_height = 100000 | |
| for i in range(table_header_cells): | |
| box = pred_bbox[i] | |
| xmin, ymin, xmax, ymax = box | |
| current_box_height = abs(ymax-ymin) | |
| if current_box_height<one_line_height: | |
| one_line_height = current_box_height | |
| for box in pred_bbox: | |
| xmin, ymin, xmax, ymax = box | |
| fourbytwo = np.array([ | |
| [xmin, ymin], | |
| [xmax, ymin], | |
| [xmax, ymax], | |
| [xmin, ymax] | |
| ], dtype=np.float32) | |
| if ymax-ymin == 0: | |
| continue | |
| current_box_height = abs(ymax-ymin) | |
| # Those are for header cells with more than one line | |
| if table_header_cells > 0 and current_box_height>one_line_height+5: | |
| cell_img= cropImageExtraMargin([fourbytwo],cropped_tables[k],margin=1.4)[0] | |
| table_header_cells -= 1 | |
| #List of 4 x 2 | |
| detection_results = self.wordDetector.predict(cell_img,sort_vertical=True) | |
| input_to_recog = [] | |
| if detection_results == []: | |
| input_to_recog.append(cell_img) | |
| else: | |
| for wordbox in detection_results: | |
| cropped_image= crop_an_Image(wordbox.box,cell_img) | |
| if cropped_image.shape[0] >0 and cropped_image.shape[1]>0: | |
| input_to_recog.append(cropped_image) | |
| else: | |
| print("Empty image") | |
| else: | |
| cell_img = crop_an_Image(fourbytwo,cropped_tables[k]) | |
| if table_header_cells>0: | |
| table_header_cells -= 1 | |
| if cell_img.shape[0] >0 and cell_img.shape[1]>0: | |
| input_to_recog =[cell_img] | |
| cell_imgs_to_viz.append(cell_img) | |
| if input_to_recog != []: | |
| words = self.textRecognizer.predict_for_tables(input_to_recog) | |
| cell_output = " ".join(words) | |
| pred_cell.append(cell_output) | |
| else: | |
| #Don't lose empty cell | |
| pred_cell.append("") | |
| #self.save_detection(cell_imgs_to_viz,prefix = './res/test4/cell_imgs_') | |
| print(pred_cell) | |
| #Step3 : | |
| pred_code = self.build_table_from_html_and_cell(pred_html, pred_cell) | |
| pred_code = "".join(pred_code) | |
| pred_code = html_table_template(pred_code) | |
| soup = bs(pred_code) | |
| #formatted and indented) string representation of the HTML document | |
| table_code = soup.prettify() | |
| print(table_code) | |
| table_codes.append(table_code) | |
| return table_codes | |
| return [] | |