Spaces:

muk42
/

histOSM

Sleeping

App Files Files Community

muk42 commited on Aug 14

Commit

2be323a

1 Parent(s): 74d134c

models moved to huggingface repo

Browse files

Files changed (9) hide show

annotation_tab/__init__.py +3 -0
annotation_tab/annotation_logic.py +118 -0
annotation_tab/annotation_setup.py +25 -0
app.py +9 -12
inference_tab/__init__.py +4 -0
inference_tab/inference_logic.py +748 -0
inference_tab/inference_setup.py +19 -0
packages.txt +2 -0
requirements.txt +18 -3

annotation_tab/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .annotation_setup import get_annotation_widgets
2	+
3	+ __all__ = ["get_annotation_widgets"]

annotation_tab/annotation_logic.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+import pandas as pd
+import threading
+import gradio as gr
+# ==== CONFIG ====
+IMAGE_FOLDER = "output/blobs"
+CSV_FILE = "output/manual_annotations.csv"
+# ==== STATE ====
+if os.path.exists(CSV_FILE):
+    df_annotations = pd.read_csv(CSV_FILE)
+    annotated_ids = set(df_annotations["blob_id"].astype(str).tolist())
+else:
+    df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
+    df_annotations.to_csv(CSV_FILE, index=False)
+    annotated_ids = set()
+all_images = [f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
+current_index = 0
+def get_current_image_path():
+    if 0 <= current_index < len(all_images_paths):
+        return all_images_paths[current_index]
+    return None
+def is_annotated(image_path):
+    return os.path.basename(image_path) in annotated_ids
+def get_annotation_for_image(image_path):
+    filename = os.path.basename(image_path)
+    row = df_annotations[df_annotations["blob_id"] == filename]
+    if not row.empty:
+        return row["human_ocr"].values[0]
+    return ""
+def find_next_unannotated_index(start):
+    n = len(all_images_paths)
+    idx = start
+    for _ in range(n):
+        idx = (idx + 1) % n
+        if not is_annotated(all_images_paths[idx]):
+            return idx
+    return None
+def save_annotation(user_text):
+    global df_annotations, annotated_ids
+    img_path = get_current_image_path()
+    if img_path:
+        filename = os.path.basename(img_path)
+        text_value = user_text.strip() if user_text and user_text.strip() else ""
+        if filename in annotated_ids:
+            df_annotations.loc[df_annotations["blob_id"] == filename, "human_ocr"] = text_value
+        else:
+            new_row = pd.DataFrame([{"blob_id": filename, "human_ocr": text_value}])
+            df_annotations = pd.concat([df_annotations, new_row], ignore_index=True)
+            annotated_ids.add(filename)
+        df_annotations.to_csv(CSV_FILE, index=False)
+def save_and_next(user_text):
+    global current_index
+    if get_current_image_path() is None:
+        return None, "", gr.update(visible=True, value="No images available."), "No image loaded"
+    save_annotation(user_text)
+    next_idx = find_next_unannotated_index(current_index)
+    if next_idx is None:
+        return None, "", gr.update(visible=True, value="All images annotated."), ""
+    current_index = next_idx
+    img_path = get_current_image_path()
+    annotation = get_annotation_for_image(img_path)
+    return img_path, annotation, gr.update(visible=False), img_path
+def previous_image():
+    global current_index
+    if len(all_images_paths) == 0:
+        return None, "", gr.update(visible=True, value="No images available."), "No image loaded"
+    current_index = (current_index - 1) % len(all_images_paths)
+    img_path = get_current_image_path()
+    annotation = get_annotation_for_image(img_path)
+    return img_path, annotation, gr.update(visible=False), img_path
+def delete_and_next():
+    global current_index, all_images_paths, annotated_ids, df_annotations
+    img_path = get_current_image_path()
+    if img_path and os.path.exists(img_path):
+        os.remove(img_path)
+    filename = os.path.basename(img_path)
+    if filename in annotated_ids:
+        annotated_ids.remove(filename)
+        df_annotations = df_annotations[df_annotations["blob_id"] != filename]
+        df_annotations.to_csv(CSV_FILE, index=False)
+    del all_images_paths[current_index]
+    if len(all_images_paths) == 0:
+        return None, "", gr.update(visible=True, value="No images left."), "No image loaded"
+    current_index = min(current_index, len(all_images_paths) - 1)
+    img_path = get_current_image_path()
+    annotation = get_annotation_for_image(img_path)
+    return img_path, annotation, gr.update(visible=False), img_path
+def shutdown():
+    os._exit(0)
+def save_and_exit(user_text):
+    if get_current_image_path() is not None:
+        save_annotation(user_text)
+    threading.Timer(1, shutdown).start()
+    return None, "", gr.update(visible=True, value="Session closed."), ""

annotation_tab/annotation_setup.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import gradio as gr
+from .annotation_logic import (
+    save_and_next, previous_image, delete_and_next, save_and_exit,
+    get_current_image_path, get_annotation_for_image
+)
+def get_annotation_widgets():
+    message = gr.Markdown("", visible=False)
+    image_path_display = gr.Markdown(value=get_current_image_path() or "No image loaded", elem_id="image_path")
+    img = gr.Image(type="filepath", value=get_current_image_path(), label="Blob")
+    txt = gr.Textbox(label="Transcription")
+    hint = gr.Markdown("*If there are multiple street names in the image, please separate them with commas.*")
+    with gr.Row():
+        prev_btn = gr.Button("Previous")
+        next_btn = gr.Button("Save & Next")
+        del_btn = gr.Button("Delete & Next", variant="stop")
+        exit_btn = gr.Button("Save & Exit", variant="secondary")
+    next_btn.click(save_and_next, inputs=txt, outputs=[img, txt, message, image_path_display])
+    prev_btn.click(previous_image, outputs=[img, txt, message, image_path_display])
+    del_btn.click(delete_and_next, outputs=[img, txt, message, image_path_display])
+    exit_btn.click(save_and_exit, inputs=txt, outputs=[img, txt, message, image_path_display])
+    return [message, image_path_display, img, txt, hint, prev_btn, next_btn, del_btn, exit_btn]

app.py CHANGED Viewed

@@ -1,23 +1,20 @@
 import gradio as gr
 import logging
 # setup logging
 logging.basicConfig(level=logging.DEBUG)
-import cv2
-import numpy as np
-def process_image_file(img_file):
-    # img_file.name is the path
-    img = cv2.imread(img_file.name)
-    return f"Shape: {img.shape}"
-demo = gr.Interface(
-    fn=process_image_file,
-    inputs=gr.File(label="Select Image File"),
-    outputs="text"
-)
-demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)

 import gradio as gr
 import logging
+from inference_tab import get_inference_widgets, run_inference
+from annotation_tab import get_annotation_widgets
 # setup logging
 logging.basicConfig(level=logging.DEBUG)
+with gr.Blocks() as demo:
+    with gr.Tab("Inference"):
+        get_inference_widgets(run_inference)
+    with gr.Tab("Annotation"):
+        get_annotation_widgets()
+demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)

inference_tab/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .inference_setup import get_inference_widgets
+from .inference_logic import run_inference
+__all__ = ["get_inference_widgets", "run_inference"]

inference_tab/inference_logic.py ADDED Viewed

	@@ -0,0 +1,748 @@

+import numpy as np
+from ultralytics import YOLO
+import os
+import json
+from PIL import Image
+from ultralytics import SAM
+import cv2
+import torch
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import rasterio
+import rasterio.features
+from shapely.geometry import shape
+import pandas as pd
+import osmnx as ox
+from osgeo import gdal
+import geopandas as gpd
+from rapidfuzz import process, fuzz
+from huggingface_hub import hf_hub_download
+yolo_weights = hf_hub_download(
+    repo_id="muk42/yolov9_streets",
+    filename="yolov9c_finetuned.pt"
+)
+def run_inference(image_path, gcp_path, city_name, score_th):
+    # ==== TEXT DETECTION ====
+    yield from getBBoxes(image_path)
+    yield from getSegments(image_path)
+    yield from extractSegments(image_path)
+    # === TEXT RECOGNITION ===
+    yield from blobsOCR(image_path)
+    # === ADD GEO DATA ===
+    yield from georefImg("output/mask.tif", gcp_path)
+    yield from extractCentroids(image_path)
+    yield from extractStreetNet(city_name)
+    # === POST OCR ===
+    for msg in fuzzyMatch():
+        if msg.endswith(".csv"):
+            yield f"Finished! CSV saved at {msg}", msg
+        else:
+            yield msg, None
+    return f"Street labels are ready for manual input.\nImage: {image_path}", None
+def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25):
+    yield f"DEBUG: Received image_path: {image_path}"
+    image = cv2.imread(image_path)
+    H, W, _ = image.shape
+    model = YOLO(yolo_weights)
+    step = int(tile_size * (1 - overlap))
+    all_detections = []
+    total_tiles = 0
+    # Calculate total tiles for progress reporting
+    for y in range(0, H, step):
+        for x in range(0, W, step):
+            # Skip small tiles at the edges
+            if y + tile_size > H or x + tile_size > W:
+                continue
+            total_tiles += 1
+    processed_tiles = 0
+    # Tile the image and run prediction
+    for y in range(0, H, step):
+        for x in range(0, W, step):
+            tile = image[y:y+tile_size, x:x+tile_size]
+            if tile.shape[0] < tile_size or tile.shape[1] < tile_size:
+                continue
+            results = model.predict(source=tile, imgsz=tile_size, conf=confidence_threshold, verbose=False)
+            for result in results:
+                boxes = result.boxes.xyxy.cpu().numpy()
+                scores = result.boxes.conf.cpu().numpy()
+                classes = result.boxes.cls.cpu().numpy()
+                for box, score, cls in zip(boxes, scores, classes):
+                    x1, y1, x2, y2 = box
+                    # Shift box coordinates relative to full image
+                    x1 += x
+                    x2 += x
+                    y1 += y
+                    y2 += y
+                    all_detections.append([x1, y1, x2, y2, score, int(cls)])
+            processed_tiles += 1
+            yield f"Processed tile {processed_tiles} of {total_tiles}"
+    # After all tiles are processed, save detections to JSON
+    boxes_to_save = [
+        {
+            "bbox": [float(x1), float(y1), float(x2), float(y2)],
+            "score": float(conf),
+            "class": int(cls)
+        }
+        for x1, y1, x2, y2, conf, cls in all_detections
+    ]
+    output_path = f"output/boxes.json"
+    os.makedirs("output", exist_ok=True)
+    with open(output_path, "w") as f:
+        json.dump(boxes_to_save, f, indent=4)
+    yield f"Inference complete. Results saved to {output_path}"
+def box_inside_global(box, global_box):
+    x1, y1, x2, y2 = box
+    gx1, gy1, gx2, gy2 = global_box
+    return (x1 >= gx1 and y1 >= gy1 and x2 <= gx2 and y2 <= gy2)
+def nms_iou(box1, box2):
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = box1_area + box2_area - inter_area
+    return inter_area / union_area if union_area > 0 else 0
+def non_max_suppression(boxes, scores, iou_threshold=0.5):
+    idxs = np.argsort(scores)[::-1]
+    keep = []
+    while len(idxs) > 0:
+        current = idxs[0]
+        keep.append(current)
+        idxs = idxs[1:]
+        idxs = np.array([i for i in idxs if nms_iou(boxes[current], boxes[i]) < iou_threshold])
+    return keep
+def tile_image_with_overlap(image_path, tile_size=1024, overlap=256):
+    """Tile PDF image into overlapping RGB tiles."""
+    image = cv2.imread(image_path)
+    height, width, _ = image.shape
+    step = tile_size - overlap
+    tile_list = []
+    for y in range(0, height, step):
+        for x in range(0, width, step):
+            x_end = min(x + tile_size, width)
+            y_end = min(y + tile_size, height)
+            x_start = max(0, x_end - tile_size)
+            y_start = max(0, y_end - tile_size)
+            tile = image[y_start:y_end, x_start:x_end, :]
+            tile_list.append((tile, (x_start, y_start)))
+    return tile_list, image.shape
+def compute_iou(box1, box2):
+    """Compute Intersection over Union for two boxes."""
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = area1 + area2 - inter_area
+    return inter_area / union_area if union_area > 0 else 0
+def merge_boxes(boxes, iou_threshold=0.8):
+    """Merge overlapping boxes based on IoU."""
+    merged = []
+    used = [False] * len(boxes)
+    for i, box in enumerate(boxes):
+        if used[i]:
+            continue
+        group = [box]
+        used[i] = True
+        for j in range(i + 1, len(boxes)):
+            if used[j]:
+                continue
+            if compute_iou(box, boxes[j]) > iou_threshold:
+                group.append(boxes[j])
+                used[j] = True
+        # Merge group into one bounding box
+        x1 = min(b[0] for b in group)
+        y1 = min(b[1] for b in group)
+        x2 = max(b[2] for b in group)
+        y2 = max(b[3] for b in group)
+        merged.append([x1, y1, x2, y2])
+    return merged
+def box_area(box):
+    return max(0, box[2] - box[0]) * max(0, box[3] - box[1])
+def is_contained(box1, box2, containment_threshold=0.9):
+    # Check if box1 is mostly inside box2
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
+    area1 = box_area(box1)
+    area2 = box_area(box2)
+    # If intersection covers most of smaller box area, consider contained
+    smaller_area = min(area1, area2)
+    if smaller_area == 0:
+        return False
+    return (inter_area / smaller_area) >= containment_threshold
+def merge_boxes_iterative(boxes, iou_threshold=0.25, containment_threshold=0.75):
+    boxes = boxes.copy()
+    changed = True
+    while changed:
+        changed = False
+        merged = []
+        used = [False] * len(boxes)
+        for i, box in enumerate(boxes):
+            if used[i]:
+                continue
+            group = [box]
+            used[i] = True
+            for j in range(i + 1, len(boxes)):
+                if used[j]:
+                    continue
+                iou = compute_iou(box, boxes[j])
+                contained = is_contained(box, boxes[j], containment_threshold)
+                if iou > iou_threshold or contained:
+                    group.append(boxes[j])
+                    used[j] = True
+            # Merge group into one bounding box
+            x1 = min(b[0] for b in group)
+            y1 = min(b[1] for b in group)
+            x2 = max(b[2] for b in group)
+            y2 = max(b[3] for b in group)
+            merged.append([x1, y1, x2, y2])
+        if len(merged) < len(boxes):
+            changed = True
+            boxes = merged
+    return boxes
+def get_corner_points(box):
+    x1, y1, x2, y2 = box
+    return [
+        [x1, y1],  # top-left
+        [x2, y1],  # top-right
+        [x1, y2],  # bottom-left
+        [x2, y2],  # bottom-right
+    ]
+def sample_negative_points_outside_boxes(mask, num_points):
+    points = []
+    tries = 0
+    max_tries = num_points * 20  # fail-safe to avoid infinite loops
+    while len(points) < num_points and tries < max_tries:
+        x = np.random.randint(0, mask.shape[1])
+        y = np.random.randint(0, mask.shape[0])
+        if not mask[y, x]:
+            points.append([x, y])
+        tries += 1
+    return np.array(points)
+def get_inset_corner_points(box, margin=5):
+    x1, y1, x2, y2 = box
+    # Ensure box is large enough for the margin
+    x1i = min(x1 + margin, x2)
+    y1i = min(y1 + margin, y2)
+    x2i = max(x2 - margin, x1)
+    y2i = max(y2 - margin, y1)
+    return [
+        [x1i, y1i],  # top-left (inset)
+        [x2i, y1i],  # top-right
+        [x1i, y2i],  # bottom-left
+        [x2i, y2i],  # bottom-right
+    ]
+def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
+    """
+            iou for combining bounding boxes
+            c_th defined share of the smaller box contained in the larger box for merge
+            edge_margin pixel margin for tiles
+            TBD as user input
+            # define global bounding box to filter out boxes outside of the main map
+            # [COL_MIN, ROW_MIN, COL_MAX, ROW_MAX]
+            #GLOBAL_BOX = [211,470,6198,4723]
+    """
+    yield f"Loading SAM model and data..."
+    # Load Ultralytics SAM2.1 model
+    model = SAM("sam2.1_l.pt")
+    # Load YOLO-predicted boxes
+    with open(f"output/boxes.json", "r") as f:
+        box_data = json.load(f)
+    # ==== PREPARE BOXES =====
+    yield f"Prepare bounding boxes..."
+    # Non-max suppression
+    boxes = np.array([item["bbox"] for item in box_data])
+    scores = np.array([item["score"] for item in box_data])
+    # Run NMS
+    keep_indices = non_max_suppression(boxes, scores, iou)
+    # Filter data
+    box_data = [box_data[i] for i in keep_indices]
+    # Filter boxes inside global bbox (TBD)
+    #box_data = [entry for entry in box_data if box_inside_global(entry["bbox"], GLOBAL_BOX)]
+    boxes_full = [b["bbox"] for b in box_data]  # Format: [x1, y1, x2, y2]
+    # Tile the image
+    yield f"Tile the image..."
+    tiles, (full_height, full_width, _) = tile_image_with_overlap(image_path, tile_size=1024, overlap=50)
+    # Prepare full-size mask
+    full_mask = np.zeros((full_height, full_width), dtype=np.uint16)
+    instance_id = 1
+    yield f"Running predictions..."
+    for tile_idx, (tile_array, (x_offset, y_offset)) in enumerate(tiles, desc="Processing Tiles"):
+        tile_height, tile_width, _ = tile_array.shape
+        # Select boxes overlapping this tile
+        candidate_boxes = []
+        for x1, y1, x2, y2 in boxes_full:
+            if (x2 > x_offset) and (x1 < x_offset + tile_width) and (y2 > y_offset) and (y1 < y_offset + tile_height):
+                candidate_boxes.append([x1, y1, x2, y2])
+        if not candidate_boxes:
+            continue
+        # Merge overlapping boxes
+        merged_boxes = merge_boxes_iterative(candidate_boxes, iou_threshold=iou, containment_threshold=c_th)
+        # Adjust boxes to tile-local coordinates
+        local_boxes = []
+        for x1, y1, x2, y2 in merged_boxes:
+            new_x1 = max(0, x1 - x_offset)
+            new_y1 = max(0, y1 - y_offset)
+            new_x2 = min(tile_width, x2 - x_offset)
+            new_y2 = min(tile_height, y2 - y_offset)
+            local_boxes.append([new_x1, new_y1, new_x2, new_y2])
+        tile_h, tile_w, _ = tile_array.shape
+        # Filter local_boxes to remove those too close to the tile edges
+        filtered_local_boxes = []
+        for box in local_boxes:
+            x1, y1, x2, y2 = box
+            if (x1 > edge_margin and y1 > edge_margin and (tile_w - x2) > edge_margin and (tile_h - y2) > edge_margin):
+                filtered_local_boxes.append(box)
+        local_boxes = filtered_local_boxes
+        if not local_boxes:
+            continue
+        # centroids will be positive point prompts as they align well with the text
+        centroids = [((bx1 + bx2) / 2, (by1 + by2) / 2) for bx1, by1, bx2, by2 in local_boxes]
+        # [STRATEGY 2] Negative points are within box at the corners
+        #negative_points_per_box = [get_corner_points(box) for box in local_boxes]
+        # [STRATEGY 3] Negative points are within box at the corners with a bit of a margin
+        negative_points_per_box = [get_inset_corner_points(box, margin=2) for box in local_boxes]
+        point_coords = []
+        point_labels = []
+        for centroid, neg_points in zip(centroids, negative_points_per_box):
+            if not isinstance(neg_points, list):
+                neg_points = neg_points.tolist()
+            all_points = [centroid] + neg_points
+            all_labels = [1] + [0] * len(neg_points)
+            assert len(all_points) == len(all_labels), f"Point-label mismatch: {len(all_points)} vs {len(all_labels)}"
+            point_coords.append(all_points)
+            point_labels.append(all_labels)
+        results = model(tile_array,
+                            bboxes=local_boxes,
+                            points=point_coords,
+                            labels=point_labels)
+        yield f"Merging segmentation masks..."
+        for result in results:
+            if result.masks is None or result.masks.data is None:
+                continue
+            # Create a copy of the tile image to overlay masks on
+            tile_with_masks = tile_array.copy()
+            for mask in result.masks.data:  # each mask: (H, W)
+                mask_np = mask.cpu().numpy().astype(bool)
+                # Create a red overlay for the mask
+                red_overlay = np.zeros_like(tile_with_masks, dtype=np.uint8)
+                red_overlay[..., 0] = 255  # Red channel
+                alpha = 0.5  # Transparency factor
+                # Blend the overlay on the tile where mask is True
+                tile_with_masks = np.where(
+                    mask_np[..., None],
+                    (1 - alpha) * tile_with_masks + alpha * red_overlay,
+                    tile_with_masks
+                ).astype(np.uint8)
+                # Paste into full-size canvas
+                y1 = y_offset
+                y2 = min(y_offset + tile_height, full_height)
+                x1 = x_offset
+                x2 = min(x_offset + tile_width, full_width)
+                cropped_mask = mask_np[:y2 - y1, :x2 - x1]
+                region = full_mask[y1:y2, x1:x2]
+                region[(cropped_mask) & (region == 0)] = instance_id
+                instance_id += 1
+    final_mask = Image.fromarray(full_mask)
+    final_mask.save(f"output/mask.tif")
+    yield f"Saved mask with {instance_id - 1} instances"
+def extractSegments(image_path, min_size=500, margin=10):
+    image = cv2.imread(image_path)
+    mask = cv2.imread(f"output/mask.tif", cv2.IMREAD_UNCHANGED)
+    height, width = mask.shape[:2]
+    # Get unique labels (excluding background label 0)
+    blob_ids = np.unique(mask)
+    blob_ids = blob_ids[blob_ids != 0]
+    yield f"Found {len(blob_ids)} blobs"
+    for blob_id in blob_ids:
+        yield f"Processing blob {blob_id}..."
+        # Create a binary mask for the current blob
+        blob_mask = (mask == blob_id).astype(np.uint8)
+        # Skip small blobs (WxH)
+        if np.sum(blob_mask) < min_size:
+            continue
+        # Find bounding box of the blob
+        ys, xs = np.where(blob_mask)
+        y_min, y_max = ys.min(), ys.max() + 1
+        x_min, x_max = xs.min(), xs.max() + 1
+        # Add margin to bounding box while keeping inside image bounds
+        x_min = max(0, x_min - margin)
+        y_min = max(0, y_min - margin)
+        x_max = min(width, x_max + margin)
+        y_max = min(height, y_max + margin)
+        # Crop the region from original image
+        cropped_image = image[y_min:y_max, x_min:x_max]
+        cropped_mask = blob_mask[y_min:y_max, x_min:x_max]
+        # Apply mask to original image
+        if image.ndim == 3:
+            masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
+        else:
+            masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
+        # Save the masked image
+        output_path = os.path.join('output/blobs', f"{blob_id}.png")
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        cv2.imwrite(output_path, masked_image)
+    yield f"Done."
+def blobsOCR(image_path):
+    # Load model + processor
+    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-str")
+    model = VisionEncoderDecoderModel.from_pretrained("muk42/trocr_streets")
+    image_extensions = (".png")
+    # Device setup
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    yield f"Running on {device}..."
+    # Open output file for writing
+    with open(f"output/ocr", "w", encoding="utf-8") as f_out:
+        # Process each image
+        image_folder = "output/blobs"
+        for filename in os.listdir(image_folder):
+            if filename.lower().endswith(image_extensions):
+                image_path = os.path.join(image_folder, filename)
+                try:
+                    image = Image.open(image_path).convert("RGB")
+                    pixel_values = processor(images=image, return_tensors="pt").pixel_values
+                    generated_ids = model.generate(pixel_values)
+                    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                    # Write to file
+                    name = os.path.splitext(os.path.basename(filename))[0]
+                    f_out.write(f'{name},"{generated_text}"\n')
+                    yield f"{filename} → {generated_text}"
+                except Exception as e:
+                    yield f"Error processing {filename}: {e}"
+                    f_out.write(f"{filename}\tERROR: {e}\n")
+def extractCentroids(image_path):
+    with rasterio.open(f"output/georeferenced.tif") as src:
+        mask = src.read(1)
+        transform = src.transform
+    labels = np.unique(mask)
+    labels = labels[labels != 0]
+    data = []
+    # Generate polygons and their values
+    shapes_gen = rasterio.features.shapes(mask, mask=(mask != 0), transform=transform)
+    # Create a dict to collect polygons by label
+    polygons_by_label = {}
+    for geom, val in shapes_gen:
+        if val == 0:
+            continue
+        polygons_by_label.setdefault(val, []).append(shape(geom))
+    # For each label, merge polygons and get centroid
+    for idx, label in enumerate(labels):
+        yield f"Processing {idx+1} out of {len(labels)}"
+        polygons = polygons_by_label.get(label)
+        if not polygons:
+            continue
+        # Merge polygons of the same label (if multiple parts)
+        multi_poly = polygons[0]
+        for poly in polygons[1:]:
+            multi_poly = multi_poly.union(poly)
+        centroid = multi_poly.centroid
+        data.append({"blob_id": label, "x": centroid.x, "y": centroid.y})
+    df = pd.DataFrame(data)
+    df.to_csv(f"output/centroids.csv", index=False)
+    yield f"Saved centroid coordinates of {len(labels)} blobs."
+def collectBlobs(image_path):
+    filename = os.path.splitext(os.path.basename(image_path))[0]
+    box_dir = "output/blobs"
+    # Get all filenames in the folder (only files, not subfolders)
+    file_names = [f for f in os.listdir(box_dir) if os.path.isfile(os.path.join(box_dir, f))]
+    # Save to text file
+    with open(f"output/{filename}_blobs.txt", "w") as f:
+        for name in file_names:
+            yield f"Writing {name}..."
+            f.write(name + "\n")
+def img_shape(image_path):
+    img = cv2.imread(image_path)
+    return img.shape
+def georefImg(image_path, gcp_path):
+    yield "Reading GCP CSV..."
+    df = pd.read_csv(gcp_path)
+    H,W,_ = img_shape(image_path)
+    # Build GCPs
+    gcps = []
+    for _, r in df.iterrows():
+        gcps.append(
+            gdal.GCP(
+                float(r['mapX']),
+                float(r['mapY']),
+                0,
+                float(r['sourceX']),
+                #H-float(r['sourceY'])
+                abs(float(r['sourceY']))
+            )
+        )
+    tmp_file = "output/tmp.tif"
+    gdal.Translate(
+            tmp_file,
+            image_path,
+            format="GTiff",
+            GCPs=gcps,
+            outputSRS="EPSG:3857"
+        )
+    geo_file = "output/georeferenced.tif"
+    yield "Running gdalwarp..."
+    gdal.Warp(
+        geo_file,
+        tmp_file,
+        dstSRS="EPSG:3857",
+        resampleAlg="near",
+        polynomialOrder=1,
+        creationOptions=["COMPRESS=LZW"]
+    )
+    yield f"Done."
+def extractStreetNet(city_name):
+    yield f"Extract OSM street network for {city_name}"
+    G = ox.graph_from_place(city_name, network_type='drive')
+    G_proj = ox.project_graph(G)
+    nodes, edges = ox.graph_to_gdfs(G_proj)
+    edges_3857 = edges.to_crs(epsg=3857)
+    edges_3857.to_file("output/osm_extract.geojson", driver="GeoJSON")
+    yield "Done."
+def best_street_match(point, query_name, edges_gdf, max_distance=100):
+        buffer = point.buffer(max_distance)
+        nearby_edges = edges_gdf[edges_gdf.intersects(buffer)]
+        if nearby_edges.empty:
+            return None, 0
+        candidate_names = nearby_edges['name'].tolist()
+        best_match = process.extractOne(query_name, candidate_names, scorer=fuzz.ratio)
+        return best_match  # (name, score, index)
+def fuzzyMatch():
+    coords_df = pd.read_csv("output/centroids.csv")
+    names_df = pd.read_csv("output/ocr.csv",sep="\t",columns=[['blob_id','pred_text']])
+    merged_df = coords_df.merge(names_df, on="blob_id")
+    gdf = gpd.GeoDataFrame(
+        merged_df,
+        geometry=gpd.points_from_xy(merged_df.x, merged_df.y),
+        crs="EPSG:3857"
+    )
+    osm_gdf = gpd.read_file("output/osm_extract.geojson")
+    osm_gdf = osm_gdf[osm_gdf['name'].notnull()]
+    yield "Process OSM candidates..."
+    results = []
+    for _, row in gdf.iterrows():
+        match = best_street_match(row.geometry, row['name'], osm_gdf, max_distance=100)
+        if match:
+            results.append({
+                "blob_id": row.blob_id,
+                "x": row.x,
+                "y": row.y,
+                "blob_name": row.pred_text,
+                "best_osm_match": match[0],
+                "osm_match_score": match[1]
+            })
+        else:
+            results.append({
+                "blob_id": row.blob_id,
+                "x": row.x,
+                "y": row.y,
+                "blob_name": row.pred_text,
+                "best_osm_match": None,
+                "osm_match_score": 0
+            })
+    results_df = pd.DataFrame(results)
+    results_df.to_csv("output/street_matches.csv", index=False)
+    yield "output/street_matches.csv"

inference_tab/inference_setup.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import gradio as gr
+def get_inference_widgets(run_inference):
+    image_input = gr.File(label="Select Image File")
+    gcp_input = gr.File(label="Select GCP Points File", file_types=[".points"])
+    city_name = gr.Textbox(label="Enter city name")
+    score_th = gr.Textbox(label="Enter a score threshold")
+    run_button = gr.Button("Run Inference")
+    output = gr.Textbox(label="Progress", lines=10, interactive=False)
+    download_file = gr.File(label="Download CSV")
+    run_button.click(
+        run_inference,
+        inputs=[image_input, gcp_input, city_name, score_th],
+        outputs=[output, download_file]
+    )
+    return image_input, gcp_input, city_name, score_th, run_button, output, download_file

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ libgdal-dev
2	+ gdal-bin

requirements.txt CHANGED Viewed

@@ -1,3 +1,18 @@
-numpy
-gradio
-opencv-python

+geopandas==1.0.1
+gradio==5.42.0
+numpy==2.3.2
+opencv_contrib_python==4.10.0.84
+opencv_python==4.10.0.84
+opencv_python_headless==4.10.0.84
+osgeo==0.0.1
+osmnx==2.0.6
+pandas==2.3.1
+Pillow==10.0.0
+Pillow==11.3.0
+rapidfuzz==3.13.0
+rasterio==1.4.3
+Shapely==2.1.1
+torch==2.7.1+cu128
+transformers==4.53.2
+ultralytics==8.3.94
+GDAL==3.7.0