Spaces:

biglab
/

webui-screenrecognition

Running

App Files Files Community

Jsonwu commited on Jan 28, 2024

Commit

68135d2

verified ·

1 Parent(s): 5548d9e

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -62

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 import gradio as gr
 import json
 from torchvision import transforms
 from PIL import Image, ImageDraw, ImageFont
 TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
@@ -14,71 +15,17 @@ with open(LABELS_PATH, "r") as f:
 img_transforms = transforms.ToTensor()
-# inter_class_nms and iou functions implemented by GPT
 def inter_class_nms(boxes, scores, iou_threshold=0.5):
-    # Convert boxes and scores to torch tensors if they are not already
-    boxes = torch.as_tensor(boxes)
-    scores, class_indices = scores.max(dim=1)
-    # Keep track of final boxes and scores
-    final_boxes = []
-    final_scores = []
-    final_class_indices = []
-    for class_index in range(scores.shape[1]):
-        # Filter boxes and scores for the current class
-        class_scores = scores[:, class_index]
-        class_boxes = boxes
-        # Indices of boxes sorted by score (highest first)
-        sorted_indices = torch.argsort(class_scores, descending=True)
-        while len(sorted_indices) > 0:
-            # Take the box with the highest score
-            highest_index = sorted_indices[0]
-            highest_box = class_boxes[highest_index]
-            # Add the highest box and score to the final list
-            final_boxes.append(highest_box)
-            final_scores.append(class_scores[highest_index])
-            final_class_indices.append(class_index)
-            # Remove the highest box from the list
-            sorted_indices = sorted_indices[1:]
-            # Compute IoU of the highest box with the rest
-            ious = iou(class_boxes[sorted_indices], highest_box)
-            # Keep only boxes with IoU less than the threshold
-            sorted_indices = sorted_indices[ious < iou_threshold]
-    return {'boxes': final_boxes, 'scores': final_scores}
-def iou(boxes1, boxes2):
-    """
-    Compute the Intersection over Union (IoU) of two sets of boxes.
-    Args:
-    - boxes1 (Tensor[N, 4]): ground truth boxes
-    - boxes2 (Tensor[M, 4]): predicted boxes
-    Returns:
-    - iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
-    """
-    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
-    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
-    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
-    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
-    wh = (rb - lt).clamp(min=0)  # [N,M,2]
-    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
-    iou = inter / (area1[:, None] + area2 - inter)
-    return iou
 def predict(img, conf_thresh=0.4):
     img_input = [img_transforms(img)]

 import gradio as gr
 import json
 from torchvision import transforms
+from torchvision.ops import nms
 from PIL import Image, ImageDraw, ImageFont
 TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
 img_transforms = transforms.ToTensor()
+# inter_class_nms implemented by GPT
 def inter_class_nms(boxes, scores, iou_threshold=0.5):
+    # Perform non-maximum suppression
+    keep = nms(boxes, scores, iou_threshold)
+    # Filter boxes and scores
+    new_boxes = boxes[keep]
+    new_scores = scores[keep]
+    # Return the result in a dictionary
+    return {'boxes': new_boxes, 'scores': new_scores}
 def predict(img, conf_thresh=0.4):
     img_input = [img_transforms(img)]