Spaces:

muk42
/

histOSM

Running on Zero

App Files Files Community

muk42 commited on Sep 11

Commit

f8b90ea

1 Parent(s): d342b8a

map tab and seq in annotations

Browse files

Files changed (5) hide show

annotation_tab/annotation_logic.py +71 -60
app.py +7 -6
inference_tab/inference_logic.py +9 -17
map_tab/map_logic.py +0 -0
map_tab/map_setup.py +128 -5

annotation_tab/annotation_logic.py CHANGED Viewed

@@ -9,47 +9,48 @@ IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
 os.makedirs(IMAGE_FOLDER, exist_ok=True)
 CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
 # ==== HELPER COUNTER ====
 def get_progress_text():
     if not all_images_paths:
         return "No images loaded"
     return f"Image {current_index + 1} of {len(all_images_paths)}"
 # ==== STATE ====
 if os.path.exists(CSV_FILE):
-    df_annotations = pd.read_csv(CSV_FILE)
-    annotated_ids = set(df_annotations["blob_id"].astype(str).tolist())
 else:
     df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
     df_annotations.to_csv(CSV_FILE, index=False)
-    annotated_ids = set()
-all_images = [f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_margin' in f]
 all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
 current_index = 0
 def get_current_image_path():
     if 0 <= current_index < len(all_images_paths):
         return all_images_paths[current_index]
     return None
 def is_annotated(image_path):
     blob_id = os.path.basename(image_path).replace("_margin", "")
-    return blob_id in annotated_ids
 def get_annotation_for_image(image_path):
     blob_id = os.path.basename(image_path).replace("_margin", "")
     row = df_annotations[df_annotations["blob_id"] == blob_id]
     if not row.empty:
-        return row["human_ocr"].values[0]
     return ""
 def find_next_unannotated_index(start):
     n = len(all_images_paths)
     idx = start
@@ -59,24 +60,28 @@ def find_next_unannotated_index(start):
             return idx
     return None
 def save_annotation(user_text):
-    global df_annotations, annotated_ids
     img_path = get_current_image_path()
-    if img_path:
-        filename = os.path.basename(img_path)
-        blob_id = filename.replace("_margin", "")
-        text_value = user_text.strip() if user_text and user_text.strip() else ""
-        if filename in annotated_ids:
-            df_annotations.loc[df_annotations["blob_id"] == blob_id, "human_ocr"] = text_value
-        else:
-            new_row = pd.DataFrame([{"blob_id": blob_id, "human_ocr": text_value}])
-            df_annotations = pd.concat([df_annotations, new_row], ignore_index=True)
-            annotated_ids.add(blob_id)
-        df_annotations.to_csv(CSV_FILE, index=False)
 def save_and_next(user_text):
     global current_index
@@ -84,19 +89,23 @@ def save_and_next(user_text):
         return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
     save_annotation(user_text)
-    next_idx = find_next_unannotated_index(current_index)
-    if next_idx is None:
-        return None, "", gr.update(visible=True, value="All images annotated."), "", get_progress_text()
     current_index = next_idx
     img_path = get_current_image_path()
     annotation = get_annotation_for_image(img_path)
     return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
 def previous_image():
     global current_index
-    if len(all_images_paths) == 0:
         return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
     current_index = (current_index - 1) % len(all_images_paths)
@@ -104,63 +113,65 @@ def previous_image():
     annotation = get_annotation_for_image(img_path)
     return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
 def delete_and_next():
-    global current_index, df_annotations, annotated_ids
     img_path = get_current_image_path()
     if not img_path:
         return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
     blob_id = os.path.basename(img_path).replace("_margin", "")
-    # mark as deleted instead of removing
-    if blob_id in annotated_ids:
-        df_annotations.loc[df_annotations["blob_id"] == blob_id, "human_ocr"] = "DELETED"
     else:
-        new_row = pd.DataFrame([{"blob_id": blob_id, "human_ocr": "DELETED"}])
-        df_annotations = pd.concat([df_annotations, new_row], ignore_index=True)
-        annotated_ids.add(blob_id)
     df_annotations.to_csv(CSV_FILE, index=False)
-    # move to next image
-    current_index = (current_index + 1) % len(all_images_paths)
     img_path = get_current_image_path()
     annotation = get_annotation_for_image(img_path)
     return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
-def shutdown():
-    os._exit(0)
 def save_and_exit(user_text):
     if get_current_image_path() is not None:
         save_annotation(user_text)
-    threading.Timer(1, shutdown).start()
     return None, "", gr.update(visible=True, value="Session closed."), "", get_progress_text()
 def get_current_annotations_path():
-    return os.path.join(OUTPUT_DIR, "annotations.csv")
 def refresh_image_list():
-    """
-    Rescan the blobs folder and update global image list.
-    Clears annotations.csv and resets annotations state.
-    Returns the first image and its annotation placeholders.
-    """
-    global all_images_paths, current_index, df_annotations, annotated_ids
-    # clear annotations.csv
     df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
     df_annotations.to_csv(CSV_FILE, index=False)
-    annotated_ids = set()
-    all_images = [f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_margin' in f]
     all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
     current_index = 0

 os.makedirs(IMAGE_FOLDER, exist_ok=True)
 CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
 # ==== HELPER COUNTER ====
 def get_progress_text():
     if not all_images_paths:
         return "No images loaded"
     return f"Image {current_index + 1} of {len(all_images_paths)}"
 # ==== STATE ====
 if os.path.exists(CSV_FILE):
+    df_annotations = pd.read_csv(CSV_FILE, dtype={"blob_id": str, "human_ocr": str})
 else:
     df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
     df_annotations.to_csv(CSV_FILE, index=False)
+all_images = [
+    f for f in os.listdir(IMAGE_FOLDER)
+    if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_margin' in f
+]
 all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
 current_index = 0
+# ==== HELPERS ====
 def get_current_image_path():
     if 0 <= current_index < len(all_images_paths):
         return all_images_paths[current_index]
     return None
 def is_annotated(image_path):
+    """Return True if the image has any non-empty annotation, including 'DELETED'."""
     blob_id = os.path.basename(image_path).replace("_margin", "")
+    row = df_annotations[df_annotations["blob_id"] == blob_id]
+    if not row.empty:
+        val = str(row["human_ocr"].values[-1]).strip()
+        return val != ""  # counts "DELETED" as annotated
+    return False
 def get_annotation_for_image(image_path):
     blob_id = os.path.basename(image_path).replace("_margin", "")
     row = df_annotations[df_annotations["blob_id"] == blob_id]
     if not row.empty:
+        return str(row["human_ocr"].values[-1])
     return ""
 def find_next_unannotated_index(start):
     n = len(all_images_paths)
     idx = start
             return idx
     return None
+# ==== CORE FUNCTIONS ====
 def save_annotation(user_text):
+    """Save the current annotation for the active image."""
+    global df_annotations
     img_path = get_current_image_path()
+    if not img_path:
+        return
+    blob_id = os.path.basename(img_path).replace("_margin", "")
+    text_value = user_text.strip() if user_text else ""
+    row_idx = df_annotations.index[df_annotations["blob_id"] == blob_id].tolist()
+    if row_idx:
+        df_annotations.at[row_idx[0], "human_ocr"] = text_value
+    else:
+        df_annotations = pd.concat(
+            [df_annotations, pd.DataFrame([{"blob_id": blob_id, "human_ocr": text_value}])],
+            ignore_index=True
+        )
+    df_annotations.to_csv(CSV_FILE, index=False)
 def save_and_next(user_text):
     global current_index
         return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
     save_annotation(user_text)
+    # Check if all images are annotated
+    if all(is_annotated(p) for p in all_images_paths):
+        current_index = 0
+        img_path = get_current_image_path()
+        annotation = get_annotation_for_image(img_path)
+        return img_path, annotation, gr.update(visible=True, value="All images annotated."), img_path, get_progress_text()
+    next_idx = find_next_unannotated_index(current_index)
     current_index = next_idx
     img_path = get_current_image_path()
     annotation = get_annotation_for_image(img_path)
     return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
 def previous_image():
     global current_index
+    if not all_images_paths:
         return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
     current_index = (current_index - 1) % len(all_images_paths)
     annotation = get_annotation_for_image(img_path)
     return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
 def delete_and_next():
+    """Mark current image as DELETED and move to next image."""
+    global current_index, df_annotations
     img_path = get_current_image_path()
     if not img_path:
         return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
     blob_id = os.path.basename(img_path).replace("_margin", "")
+    row_idx = df_annotations.index[df_annotations["blob_id"] == blob_id].tolist()
+    if row_idx:
+        df_annotations.at[row_idx[0], "human_ocr"] = "DELETED"
     else:
+        df_annotations = pd.concat(
+            [df_annotations, pd.DataFrame([{"blob_id": blob_id, "human_ocr": "DELETED"}])],
+            ignore_index=True
+        )
     df_annotations.to_csv(CSV_FILE, index=False)
+    # Check if all images are annotated
+    if all(is_annotated(p) for p in all_images_paths):
+        current_index = 0
+        img_path = get_current_image_path()
+        annotation = get_annotation_for_image(img_path)
+        return img_path, annotation, gr.update(visible=True, value="All images annotated."), img_path, get_progress_text()
+    # Otherwise, move to next unannotated image
+    next_idx = find_next_unannotated_index(current_index)
+    if next_idx is not None:
+        current_index = next_idx
+    else:
+        current_index = 0
     img_path = get_current_image_path()
     annotation = get_annotation_for_image(img_path)
     return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
 def save_and_exit(user_text):
     if get_current_image_path() is not None:
         save_annotation(user_text)
+    threading.Timer(1, lambda: os._exit(0)).start()
     return None, "", gr.update(visible=True, value="Session closed."), "", get_progress_text()
 def get_current_annotations_path():
+    return CSV_FILE
 def refresh_image_list():
+    """Reload images and reset CSV and state."""
+    global all_images_paths, current_index, df_annotations
     df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
     df_annotations.to_csv(CSV_FILE, index=False)
+    all_images = [
+        f for f in os.listdir(IMAGE_FOLDER)
+        if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_margin' in f
+    ]
     all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
     current_index = 0

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # [DEBUG]
-#from osgeo import gdal
 import gradio as gr
 import logging
@@ -11,22 +11,23 @@ from map_tab import get_map_widgets
 logging.basicConfig(level=logging.DEBUG)
 with gr.Blocks() as demo:
     with gr.Tab("Inference"):
-        get_inference_widgets(run_inference,georefImg)
     with gr.Tab("Annotation"):
         get_annotation_widgets()
-    #with gr.Tab("Map"):
-    #    get_map_widgets()
 # [DEBUG]
-#demo.launch(inbrowser=True)
 # [PROD]
-demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)

 # [DEBUG]
+from osgeo import gdal
 import gradio as gr
 import logging
 logging.basicConfig(level=logging.DEBUG)
 with gr.Blocks() as demo:
     with gr.Tab("Inference"):
+        image_input, gcp_input, city_name, score_th, run_button, output, download_file = get_inference_widgets(run_inference,georefImg)
     with gr.Tab("Annotation"):
         get_annotation_widgets()
+    with gr.Tab("Map"):
+        get_map_widgets(city_name)
 # [DEBUG]
+demo.launch(inbrowser=True)
 # [PROD]
+#demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)

inference_tab/inference_logic.py CHANGED Viewed

@@ -23,6 +23,7 @@ from PIL import Image
 from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
 from pyproj import Transformer
 import shutil
 # Global cache
 _trocr_processor = None
@@ -89,9 +90,6 @@ def run_inference(tile_dict, gcp_path, city_name, score_th):
     for msg in georefTile(coords,gcp_path):
         log += msg + "\n"
         yield log, None
-    '''for msg in georefImg(MASK_PATH, gcp_path):
-        log += msg + "\n"
-        yield log, None'''
     for msg in extractCentroids(image_path):
         log += msg + "\n"
         yield log, None
@@ -100,7 +98,7 @@ def run_inference(tile_dict, gcp_path, city_name, score_th):
         yield log, None
     # === POST OCR ===
-    for msg in fuzzyMatch(score_th):
         if msg.endswith(".csv"):
             log+= f"Finished! CSV saved at {msg}. Street labels are ready for manual input."
             yield log, msg
@@ -127,7 +125,7 @@ def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25)
     yolo_weights = hf_hub_download(
         repo_id="muk42/yolov9_streets",
-        filename="yolov9c_finetuned.pt")
     model = YOLO(yolo_weights)
@@ -500,10 +498,6 @@ def georefImg(image_path, gcp_path):
             )
         )
     gdal.Translate(
             TMP_FILE,
             image_path,
@@ -512,11 +506,6 @@ def georefImg(image_path, gcp_path):
             outputSRS="EPSG:3857"
         )
-    yield "Running gdalwarp..."
     gdal.Warp(
         GEO_FILE,
         TMP_FILE,
@@ -528,7 +517,7 @@ def georefImg(image_path, gcp_path):
-    yield "Done."
 def extractStreetNet(city_name):
@@ -577,7 +566,7 @@ def extractStreetNet(city_name):
-def fuzzyMatch(score_th):
     COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
     OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
     coords_df = pd.read_csv(COORD_PATH)
@@ -621,7 +610,10 @@ def fuzzyMatch(score_th):
             })
     results_df = pd.DataFrame(results)
-    RES_PATH=os.path.join(OUTPUT_DIR,"street_matches.csv")
     results_df.to_csv(RES_PATH, index=False)
     # remove street labels from blobs folder that are more than or equal to score threshold

 from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
 from pyproj import Transformer
 import shutil
+import re
 # Global cache
 _trocr_processor = None
     for msg in georefTile(coords,gcp_path):
         log += msg + "\n"
         yield log, None
     for msg in extractCentroids(image_path):
         log += msg + "\n"
         yield log, None
         yield log, None
     # === POST OCR ===
+    for msg in fuzzyMatch(score_th,tile_dict):
         if msg.endswith(".csv"):
             log+= f"Finished! CSV saved at {msg}. Street labels are ready for manual input."
             yield log, msg
     yolo_weights = hf_hub_download(
         repo_id="muk42/yolov9_streets",
+        filename="yolov9c_finetuned_v2.pt") # fine-tuned on selection of city maps
     model = YOLO(yolo_weights)
             )
         )
     gdal.Translate(
             TMP_FILE,
             image_path,
             outputSRS="EPSG:3857"
         )
     gdal.Warp(
         GEO_FILE,
         TMP_FILE,
+    yield "Georeferencing is done."
 def extractStreetNet(city_name):
+def fuzzyMatch(score_th,tile_dict):
     COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
     OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
     coords_df = pd.read_csv(COORD_PATH)
             })
     results_df = pd.DataFrame(results)
+    tile = tile_dict["tile_path"]
+    match = re.search(r'\d+', tile)
+    tile_number=int(match.group())
+    RES_PATH=os.path.join(OUTPUT_DIR,f"street_matches_tile{tile_number}.csv")
     results_df.to_csv(RES_PATH, index=False)
     # remove street labels from blobs folder that are more than or equal to score threshold

map_tab/map_logic.py ADDED Viewed

File without changes

map_tab/map_setup.py CHANGED Viewed

@@ -1,8 +1,131 @@
 import gradio as gr
 import folium
-def get_map_widgets():
-    m = folium.Map(location=[48.8566, 2.3522], zoom_start=12)
-    map_html = m._repr_html_()
-    with gr.Column():
-        gr.HTML(value=map_html, elem_id="map-widget")

+import os
 import gradio as gr
 import folium
+from folium.raster_layers import ImageOverlay
+from geopy.geocoders import Nominatim
+import rasterio
+import numpy as np
+from matplotlib import cm, colors
+import pandas as pd
+import pyproj
+from config import OUTPUT_DIR
+from branca.colormap import linear
+CELL_SIZE_M = 500  # meters
+def make_map(city, show_grid, show_georef):
+    city = city.strip()
+    if not city:
+        return "Please enter a city"
+    geolocator = Nominatim(user_agent="my_app")
+    loc = geolocator.geocode(city)
+    if loc is None:
+        return f"Could not find '{city}'"
+    m = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=12)
+    raster_path = os.path.join(OUTPUT_DIR, "georeferenced.tif")
+    if not os.path.exists(raster_path):
+        return "Georeferenced raster not found"
+    # Raster bounds and CRS
+    with rasterio.open(raster_path) as src:
+        bounds = src.bounds
+        crs = src.crs
+    xmin, ymin, xmax, ymax = bounds
+    transformer = pyproj.Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
+    # Show georeferenced raster if requested
+    if show_georef:
+        raster_img_path = os.path.join(OUTPUT_DIR, "georeferenced_rgba.png")
+        if os.path.exists(raster_img_path):
+            lon0, lat0 = transformer.transform(xmin, ymin)
+            lon1, lat1 = transformer.transform(xmax, ymax)
+            ImageOverlay(
+                image=raster_img_path,
+                bounds=[[lat0, lon0], [lat1, lon1]],
+                opacity=0.7,
+                interactive=True
+            ).add_to(m)
+    # Grid overlay
+    if show_grid:
+        grid_values = []
+        for fname in os.listdir(OUTPUT_DIR):
+            if fname.startswith("street_matches_tile") and fname.endswith(".csv"):
+                df = pd.read_csv(os.path.join(OUTPUT_DIR, fname))
+                if df.empty:
+                    continue
+                # Tile bounds
+                tile_xmin, tile_xmax = df['x'].min(), df['x'].max()
+                tile_ymin, tile_ymax = df['y'].min(), df['y'].max()
+                n_cols = int(np.ceil((tile_xmax - tile_xmin) / CELL_SIZE_M))
+                n_rows = int(np.ceil((tile_ymax - tile_ymin) / CELL_SIZE_M))
+                grid = np.zeros((n_rows, n_cols))
+                counts = np.zeros((n_rows, n_cols))
+                for _, row in df.iterrows():
+                    col = int((row['x'] - tile_xmin) // CELL_SIZE_M)
+                    row_idx = int((row['y'] - tile_ymin) // CELL_SIZE_M)
+                    if 0 <= col < n_cols and 0 <= row_idx < n_rows:
+                        grid[row_idx, col] += row['osm_match_score']
+                        counts[row_idx, col] += 1
+                mask = counts > 0
+                grid[mask] /= counts[mask]  # average OSM match score
+                grid_values.append((grid, tile_xmin, tile_ymin, n_rows, n_cols))
+        # Flatten all grid values for global min/max
+        all_scores = np.concatenate([g[0].flatten() for g in grid_values])
+        min_val, max_val = all_scores.min(), all_scores.max()
+        if min_val == max_val:
+            max_val = min_val + 1e-6
+        cmap = cm.get_cmap('Reds')
+        # Create a linear colormap for legend
+        colormap = linear.Reds_09.scale(min_val, max_val)
+        colormap.caption = 'Average OSM Match Score'
+        colormap.add_to(m)
+        # Draw grid cells
+        for grid, tile_xmin, tile_ymin, n_rows, n_cols in grid_values:
+            for r in range(n_rows):
+                for c in range(n_cols):
+                    val = grid[r, c]
+                    if val <= 0:
+                        continue
+                    norm_val = (val - min_val) / (max_val - min_val)
+                    color = colors.to_hex(cmap(norm_val))
+                    x0 = tile_xmin + c * CELL_SIZE_M
+                    y0 = tile_ymin + r * CELL_SIZE_M
+                    x1 = x0 + CELL_SIZE_M
+                    y1 = y0 + CELL_SIZE_M
+                    lon0, lat0 = transformer.transform(x0, y0)
+                    lon1, lat1 = transformer.transform(x1, y1)
+                    folium.Rectangle(
+                        bounds=[[lat0, lon0], [lat1, lon1]],
+                        color=None,  # no border
+                        weight=0,
+                        fill=True,
+                        fill_color=color,
+                        fill_opacity=0.7,
+                        popup=f"{val:.2f}"  # only the value
+                    ).add_to(m)
+    return m._repr_html_()
+def get_map_widgets(city_component):
+    map_output = gr.HTML(value="Map will appear here once you type a city", elem_id="map-widget")
+    show_grid = gr.Checkbox(label="Draw Grid", value=False)
+    show_georef = gr.Checkbox(label="Show Georeferenced Map", value=False)
+    inputs = [city_component, show_grid, show_georef]
+    for comp in inputs:
+        comp.change(fn=make_map, inputs=inputs, outputs=[map_output])
+    return map_output, show_grid, show_georef