map tab and seq in annotations
Browse files- annotation_tab/annotation_logic.py +71 -60
- app.py +7 -6
- inference_tab/inference_logic.py +9 -17
- map_tab/map_logic.py +0 -0
- map_tab/map_setup.py +128 -5
annotation_tab/annotation_logic.py
CHANGED
|
@@ -9,47 +9,48 @@ IMAGE_FOLDER = os.path.join(OUTPUT_DIR, "blobs")
|
|
| 9 |
os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
| 10 |
CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
|
| 11 |
|
| 12 |
-
|
| 13 |
# ==== HELPER COUNTER ====
|
| 14 |
def get_progress_text():
|
| 15 |
if not all_images_paths:
|
| 16 |
return "No images loaded"
|
| 17 |
return f"Image {current_index + 1} of {len(all_images_paths)}"
|
| 18 |
|
| 19 |
-
|
| 20 |
# ==== STATE ====
|
| 21 |
if os.path.exists(CSV_FILE):
|
| 22 |
-
df_annotations = pd.read_csv(CSV_FILE)
|
| 23 |
-
annotated_ids = set(df_annotations["blob_id"].astype(str).tolist())
|
| 24 |
else:
|
| 25 |
df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
|
| 26 |
df_annotations.to_csv(CSV_FILE, index=False)
|
| 27 |
-
annotated_ids = set()
|
| 28 |
|
| 29 |
-
all_images = [
|
|
|
|
|
|
|
|
|
|
| 30 |
all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
|
| 31 |
current_index = 0
|
| 32 |
|
| 33 |
-
|
| 34 |
def get_current_image_path():
|
| 35 |
if 0 <= current_index < len(all_images_paths):
|
| 36 |
return all_images_paths[current_index]
|
| 37 |
return None
|
| 38 |
|
| 39 |
-
|
| 40 |
def is_annotated(image_path):
|
|
|
|
| 41 |
blob_id = os.path.basename(image_path).replace("_margin", "")
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
def get_annotation_for_image(image_path):
|
| 46 |
blob_id = os.path.basename(image_path).replace("_margin", "")
|
| 47 |
row = df_annotations[df_annotations["blob_id"] == blob_id]
|
| 48 |
if not row.empty:
|
| 49 |
-
return row["human_ocr"].values[
|
| 50 |
return ""
|
| 51 |
|
| 52 |
-
|
| 53 |
def find_next_unannotated_index(start):
|
| 54 |
n = len(all_images_paths)
|
| 55 |
idx = start
|
|
@@ -59,24 +60,28 @@ def find_next_unannotated_index(start):
|
|
| 59 |
return idx
|
| 60 |
return None
|
| 61 |
|
| 62 |
-
|
| 63 |
def save_annotation(user_text):
|
| 64 |
-
|
|
|
|
|
|
|
| 65 |
img_path = get_current_image_path()
|
| 66 |
-
if img_path:
|
| 67 |
-
|
| 68 |
-
blob_id = filename.replace("_margin", "")
|
| 69 |
-
text_value = user_text.strip() if user_text and user_text.strip() else ""
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
else:
|
| 74 |
-
new_row = pd.DataFrame([{"blob_id": blob_id, "human_ocr": text_value}])
|
| 75 |
-
df_annotations = pd.concat([df_annotations, new_row], ignore_index=True)
|
| 76 |
-
annotated_ids.add(blob_id)
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
|
|
|
| 80 |
|
| 81 |
def save_and_next(user_text):
|
| 82 |
global current_index
|
|
@@ -84,19 +89,23 @@ def save_and_next(user_text):
|
|
| 84 |
return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
|
| 85 |
|
| 86 |
save_annotation(user_text)
|
| 87 |
-
next_idx = find_next_unannotated_index(current_index)
|
| 88 |
-
if next_idx is None:
|
| 89 |
-
return None, "", gr.update(visible=True, value="All images annotated."), "", get_progress_text()
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
current_index = next_idx
|
| 92 |
img_path = get_current_image_path()
|
| 93 |
annotation = get_annotation_for_image(img_path)
|
| 94 |
return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
|
| 95 |
|
| 96 |
-
|
| 97 |
def previous_image():
|
| 98 |
global current_index
|
| 99 |
-
if
|
| 100 |
return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
|
| 101 |
|
| 102 |
current_index = (current_index - 1) % len(all_images_paths)
|
|
@@ -104,63 +113,65 @@ def previous_image():
|
|
| 104 |
annotation = get_annotation_for_image(img_path)
|
| 105 |
return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
|
| 106 |
|
| 107 |
-
|
| 108 |
def delete_and_next():
|
| 109 |
-
|
|
|
|
| 110 |
|
| 111 |
img_path = get_current_image_path()
|
| 112 |
if not img_path:
|
| 113 |
return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
|
| 114 |
|
| 115 |
blob_id = os.path.basename(img_path).replace("_margin", "")
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
df_annotations.loc[df_annotations["blob_id"] == blob_id, "human_ocr"] = "DELETED"
|
| 120 |
else:
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
| 124 |
|
| 125 |
df_annotations.to_csv(CSV_FILE, index=False)
|
| 126 |
|
| 127 |
-
#
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
img_path = get_current_image_path()
|
| 130 |
annotation = get_annotation_for_image(img_path)
|
| 131 |
return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
|
| 132 |
|
| 133 |
|
| 134 |
-
|
| 135 |
-
def shutdown():
|
| 136 |
-
os._exit(0)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
def save_and_exit(user_text):
|
| 140 |
if get_current_image_path() is not None:
|
| 141 |
save_annotation(user_text)
|
| 142 |
-
threading.Timer(1,
|
| 143 |
return None, "", gr.update(visible=True, value="Session closed."), "", get_progress_text()
|
| 144 |
|
| 145 |
-
|
| 146 |
def get_current_annotations_path():
|
| 147 |
-
return
|
| 148 |
-
|
| 149 |
|
| 150 |
def refresh_image_list():
|
| 151 |
-
"""
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
Returns the first image and its annotation placeholders.
|
| 155 |
-
"""
|
| 156 |
-
global all_images_paths, current_index, df_annotations, annotated_ids
|
| 157 |
-
|
| 158 |
-
# clear annotations.csv
|
| 159 |
df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
|
| 160 |
df_annotations.to_csv(CSV_FILE, index=False)
|
| 161 |
-
annotated_ids = set()
|
| 162 |
|
| 163 |
-
all_images = [
|
|
|
|
|
|
|
|
|
|
| 164 |
all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
|
| 165 |
current_index = 0
|
| 166 |
|
|
|
|
| 9 |
os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
| 10 |
CSV_FILE = os.path.join(OUTPUT_DIR, "annotations.csv")
|
| 11 |
|
|
|
|
| 12 |
# ==== HELPER COUNTER ====
|
| 13 |
def get_progress_text():
|
| 14 |
if not all_images_paths:
|
| 15 |
return "No images loaded"
|
| 16 |
return f"Image {current_index + 1} of {len(all_images_paths)}"
|
| 17 |
|
|
|
|
| 18 |
# ==== STATE ====
|
| 19 |
if os.path.exists(CSV_FILE):
|
| 20 |
+
df_annotations = pd.read_csv(CSV_FILE, dtype={"blob_id": str, "human_ocr": str})
|
|
|
|
| 21 |
else:
|
| 22 |
df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
|
| 23 |
df_annotations.to_csv(CSV_FILE, index=False)
|
|
|
|
| 24 |
|
| 25 |
+
all_images = [
|
| 26 |
+
f for f in os.listdir(IMAGE_FOLDER)
|
| 27 |
+
if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_margin' in f
|
| 28 |
+
]
|
| 29 |
all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
|
| 30 |
current_index = 0
|
| 31 |
|
| 32 |
+
# ==== HELPERS ====
|
| 33 |
def get_current_image_path():
|
| 34 |
if 0 <= current_index < len(all_images_paths):
|
| 35 |
return all_images_paths[current_index]
|
| 36 |
return None
|
| 37 |
|
|
|
|
| 38 |
def is_annotated(image_path):
|
| 39 |
+
"""Return True if the image has any non-empty annotation, including 'DELETED'."""
|
| 40 |
blob_id = os.path.basename(image_path).replace("_margin", "")
|
| 41 |
+
row = df_annotations[df_annotations["blob_id"] == blob_id]
|
| 42 |
+
if not row.empty:
|
| 43 |
+
val = str(row["human_ocr"].values[-1]).strip()
|
| 44 |
+
return val != "" # counts "DELETED" as annotated
|
| 45 |
+
return False
|
| 46 |
|
| 47 |
def get_annotation_for_image(image_path):
|
| 48 |
blob_id = os.path.basename(image_path).replace("_margin", "")
|
| 49 |
row = df_annotations[df_annotations["blob_id"] == blob_id]
|
| 50 |
if not row.empty:
|
| 51 |
+
return str(row["human_ocr"].values[-1])
|
| 52 |
return ""
|
| 53 |
|
|
|
|
| 54 |
def find_next_unannotated_index(start):
|
| 55 |
n = len(all_images_paths)
|
| 56 |
idx = start
|
|
|
|
| 60 |
return idx
|
| 61 |
return None
|
| 62 |
|
| 63 |
+
# ==== CORE FUNCTIONS ====
|
| 64 |
def save_annotation(user_text):
|
| 65 |
+
"""Save the current annotation for the active image."""
|
| 66 |
+
global df_annotations
|
| 67 |
+
|
| 68 |
img_path = get_current_image_path()
|
| 69 |
+
if not img_path:
|
| 70 |
+
return
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
blob_id = os.path.basename(img_path).replace("_margin", "")
|
| 73 |
+
text_value = user_text.strip() if user_text else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
row_idx = df_annotations.index[df_annotations["blob_id"] == blob_id].tolist()
|
| 76 |
+
if row_idx:
|
| 77 |
+
df_annotations.at[row_idx[0], "human_ocr"] = text_value
|
| 78 |
+
else:
|
| 79 |
+
df_annotations = pd.concat(
|
| 80 |
+
[df_annotations, pd.DataFrame([{"blob_id": blob_id, "human_ocr": text_value}])],
|
| 81 |
+
ignore_index=True
|
| 82 |
+
)
|
| 83 |
|
| 84 |
+
df_annotations.to_csv(CSV_FILE, index=False)
|
| 85 |
|
| 86 |
def save_and_next(user_text):
|
| 87 |
global current_index
|
|
|
|
| 89 |
return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
|
| 90 |
|
| 91 |
save_annotation(user_text)
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
# Check if all images are annotated
|
| 94 |
+
if all(is_annotated(p) for p in all_images_paths):
|
| 95 |
+
current_index = 0
|
| 96 |
+
img_path = get_current_image_path()
|
| 97 |
+
annotation = get_annotation_for_image(img_path)
|
| 98 |
+
return img_path, annotation, gr.update(visible=True, value="All images annotated."), img_path, get_progress_text()
|
| 99 |
+
|
| 100 |
+
next_idx = find_next_unannotated_index(current_index)
|
| 101 |
current_index = next_idx
|
| 102 |
img_path = get_current_image_path()
|
| 103 |
annotation = get_annotation_for_image(img_path)
|
| 104 |
return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
|
| 105 |
|
|
|
|
| 106 |
def previous_image():
|
| 107 |
global current_index
|
| 108 |
+
if not all_images_paths:
|
| 109 |
return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
|
| 110 |
|
| 111 |
current_index = (current_index - 1) % len(all_images_paths)
|
|
|
|
| 113 |
annotation = get_annotation_for_image(img_path)
|
| 114 |
return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
|
| 115 |
|
|
|
|
| 116 |
def delete_and_next():
|
| 117 |
+
"""Mark current image as DELETED and move to next image."""
|
| 118 |
+
global current_index, df_annotations
|
| 119 |
|
| 120 |
img_path = get_current_image_path()
|
| 121 |
if not img_path:
|
| 122 |
return None, "", gr.update(visible=True, value="No images available."), "No image loaded", "No images loaded"
|
| 123 |
|
| 124 |
blob_id = os.path.basename(img_path).replace("_margin", "")
|
| 125 |
+
row_idx = df_annotations.index[df_annotations["blob_id"] == blob_id].tolist()
|
| 126 |
+
if row_idx:
|
| 127 |
+
df_annotations.at[row_idx[0], "human_ocr"] = "DELETED"
|
|
|
|
| 128 |
else:
|
| 129 |
+
df_annotations = pd.concat(
|
| 130 |
+
[df_annotations, pd.DataFrame([{"blob_id": blob_id, "human_ocr": "DELETED"}])],
|
| 131 |
+
ignore_index=True
|
| 132 |
+
)
|
| 133 |
|
| 134 |
df_annotations.to_csv(CSV_FILE, index=False)
|
| 135 |
|
| 136 |
+
# Check if all images are annotated
|
| 137 |
+
if all(is_annotated(p) for p in all_images_paths):
|
| 138 |
+
current_index = 0
|
| 139 |
+
img_path = get_current_image_path()
|
| 140 |
+
annotation = get_annotation_for_image(img_path)
|
| 141 |
+
return img_path, annotation, gr.update(visible=True, value="All images annotated."), img_path, get_progress_text()
|
| 142 |
+
|
| 143 |
+
# Otherwise, move to next unannotated image
|
| 144 |
+
next_idx = find_next_unannotated_index(current_index)
|
| 145 |
+
if next_idx is not None:
|
| 146 |
+
current_index = next_idx
|
| 147 |
+
else:
|
| 148 |
+
current_index = 0
|
| 149 |
+
|
| 150 |
img_path = get_current_image_path()
|
| 151 |
annotation = get_annotation_for_image(img_path)
|
| 152 |
return img_path, annotation, gr.update(visible=False), img_path, get_progress_text()
|
| 153 |
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
def save_and_exit(user_text):
|
| 156 |
if get_current_image_path() is not None:
|
| 157 |
save_annotation(user_text)
|
| 158 |
+
threading.Timer(1, lambda: os._exit(0)).start()
|
| 159 |
return None, "", gr.update(visible=True, value="Session closed."), "", get_progress_text()
|
| 160 |
|
|
|
|
| 161 |
def get_current_annotations_path():
|
| 162 |
+
return CSV_FILE
|
|
|
|
| 163 |
|
| 164 |
def refresh_image_list():
|
| 165 |
+
"""Reload images and reset CSV and state."""
|
| 166 |
+
global all_images_paths, current_index, df_annotations
|
| 167 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
df_annotations = pd.DataFrame(columns=["blob_id", "human_ocr"])
|
| 169 |
df_annotations.to_csv(CSV_FILE, index=False)
|
|
|
|
| 170 |
|
| 171 |
+
all_images = [
|
| 172 |
+
f for f in os.listdir(IMAGE_FOLDER)
|
| 173 |
+
if f.lower().endswith(('.png', '.jpg', '.jpeg')) and '_margin' in f
|
| 174 |
+
]
|
| 175 |
all_images_paths = [os.path.join(IMAGE_FOLDER, f) for f in all_images]
|
| 176 |
current_index = 0
|
| 177 |
|
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# [DEBUG]
|
| 2 |
-
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import logging
|
|
@@ -11,22 +11,23 @@ from map_tab import get_map_widgets
|
|
| 11 |
logging.basicConfig(level=logging.DEBUG)
|
| 12 |
|
| 13 |
|
|
|
|
| 14 |
with gr.Blocks() as demo:
|
| 15 |
with gr.Tab("Inference"):
|
| 16 |
-
get_inference_widgets(run_inference,georefImg)
|
| 17 |
with gr.Tab("Annotation"):
|
| 18 |
get_annotation_widgets()
|
| 19 |
-
|
| 20 |
-
|
| 21 |
|
| 22 |
|
| 23 |
|
| 24 |
|
| 25 |
# [DEBUG]
|
| 26 |
-
|
| 27 |
|
| 28 |
# [PROD]
|
| 29 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)
|
| 30 |
|
| 31 |
|
| 32 |
|
|
|
|
| 1 |
# [DEBUG]
|
| 2 |
+
from osgeo import gdal
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import logging
|
|
|
|
| 11 |
logging.basicConfig(level=logging.DEBUG)
|
| 12 |
|
| 13 |
|
| 14 |
+
|
| 15 |
with gr.Blocks() as demo:
|
| 16 |
with gr.Tab("Inference"):
|
| 17 |
+
image_input, gcp_input, city_name, score_th, run_button, output, download_file = get_inference_widgets(run_inference,georefImg)
|
| 18 |
with gr.Tab("Annotation"):
|
| 19 |
get_annotation_widgets()
|
| 20 |
+
with gr.Tab("Map"):
|
| 21 |
+
get_map_widgets(city_name)
|
| 22 |
|
| 23 |
|
| 24 |
|
| 25 |
|
| 26 |
# [DEBUG]
|
| 27 |
+
demo.launch(inbrowser=True)
|
| 28 |
|
| 29 |
# [PROD]
|
| 30 |
+
#demo.launch(server_name="0.0.0.0", server_port=7860, inbrowser=False)
|
| 31 |
|
| 32 |
|
| 33 |
|
inference_tab/inference_logic.py
CHANGED
|
@@ -23,6 +23,7 @@ from PIL import Image
|
|
| 23 |
from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
|
| 24 |
from pyproj import Transformer
|
| 25 |
import shutil
|
|
|
|
| 26 |
|
| 27 |
# Global cache
|
| 28 |
_trocr_processor = None
|
|
@@ -89,9 +90,6 @@ def run_inference(tile_dict, gcp_path, city_name, score_th):
|
|
| 89 |
for msg in georefTile(coords,gcp_path):
|
| 90 |
log += msg + "\n"
|
| 91 |
yield log, None
|
| 92 |
-
'''for msg in georefImg(MASK_PATH, gcp_path):
|
| 93 |
-
log += msg + "\n"
|
| 94 |
-
yield log, None'''
|
| 95 |
for msg in extractCentroids(image_path):
|
| 96 |
log += msg + "\n"
|
| 97 |
yield log, None
|
|
@@ -100,7 +98,7 @@ def run_inference(tile_dict, gcp_path, city_name, score_th):
|
|
| 100 |
yield log, None
|
| 101 |
|
| 102 |
# === POST OCR ===
|
| 103 |
-
for msg in fuzzyMatch(score_th):
|
| 104 |
if msg.endswith(".csv"):
|
| 105 |
log+= f"Finished! CSV saved at {msg}. Street labels are ready for manual input."
|
| 106 |
yield log, msg
|
|
@@ -127,7 +125,7 @@ def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25)
|
|
| 127 |
|
| 128 |
yolo_weights = hf_hub_download(
|
| 129 |
repo_id="muk42/yolov9_streets",
|
| 130 |
-
filename="
|
| 131 |
|
| 132 |
model = YOLO(yolo_weights)
|
| 133 |
|
|
@@ -500,10 +498,6 @@ def georefImg(image_path, gcp_path):
|
|
| 500 |
)
|
| 501 |
)
|
| 502 |
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
gdal.Translate(
|
| 508 |
TMP_FILE,
|
| 509 |
image_path,
|
|
@@ -512,11 +506,6 @@ def georefImg(image_path, gcp_path):
|
|
| 512 |
outputSRS="EPSG:3857"
|
| 513 |
)
|
| 514 |
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
yield "Running gdalwarp..."
|
| 519 |
-
|
| 520 |
gdal.Warp(
|
| 521 |
GEO_FILE,
|
| 522 |
TMP_FILE,
|
|
@@ -528,7 +517,7 @@ def georefImg(image_path, gcp_path):
|
|
| 528 |
|
| 529 |
|
| 530 |
|
| 531 |
-
yield "
|
| 532 |
|
| 533 |
|
| 534 |
def extractStreetNet(city_name):
|
|
@@ -577,7 +566,7 @@ def extractStreetNet(city_name):
|
|
| 577 |
|
| 578 |
|
| 579 |
|
| 580 |
-
def fuzzyMatch(score_th):
|
| 581 |
COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
|
| 582 |
OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
|
| 583 |
coords_df = pd.read_csv(COORD_PATH)
|
|
@@ -621,7 +610,10 @@ def fuzzyMatch(score_th):
|
|
| 621 |
})
|
| 622 |
|
| 623 |
results_df = pd.DataFrame(results)
|
| 624 |
-
|
|
|
|
|
|
|
|
|
|
| 625 |
results_df.to_csv(RES_PATH, index=False)
|
| 626 |
|
| 627 |
# remove street labels from blobs folder that are more than or equal to score threshold
|
|
|
|
| 23 |
from .helpers import box_inside_global,nms_iou,non_max_suppression,tile_image_with_overlap,compute_iou,merge_boxes,box_area,is_contained,merge_boxes_iterative,get_corner_points,sample_negative_points_outside_boxes,get_inset_corner_points,processYOLOBoxes,prepare_tiles,merge_tile_masks,chunkify,img_shape,best_street_match
|
| 24 |
from pyproj import Transformer
|
| 25 |
import shutil
|
| 26 |
+
import re
|
| 27 |
|
| 28 |
# Global cache
|
| 29 |
_trocr_processor = None
|
|
|
|
| 90 |
for msg in georefTile(coords,gcp_path):
|
| 91 |
log += msg + "\n"
|
| 92 |
yield log, None
|
|
|
|
|
|
|
|
|
|
| 93 |
for msg in extractCentroids(image_path):
|
| 94 |
log += msg + "\n"
|
| 95 |
yield log, None
|
|
|
|
| 98 |
yield log, None
|
| 99 |
|
| 100 |
# === POST OCR ===
|
| 101 |
+
for msg in fuzzyMatch(score_th,tile_dict):
|
| 102 |
if msg.endswith(".csv"):
|
| 103 |
log+= f"Finished! CSV saved at {msg}. Street labels are ready for manual input."
|
| 104 |
yield log, msg
|
|
|
|
| 125 |
|
| 126 |
yolo_weights = hf_hub_download(
|
| 127 |
repo_id="muk42/yolov9_streets",
|
| 128 |
+
filename="yolov9c_finetuned_v2.pt") # fine-tuned on selection of city maps
|
| 129 |
|
| 130 |
model = YOLO(yolo_weights)
|
| 131 |
|
|
|
|
| 498 |
)
|
| 499 |
)
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
gdal.Translate(
|
| 502 |
TMP_FILE,
|
| 503 |
image_path,
|
|
|
|
| 506 |
outputSRS="EPSG:3857"
|
| 507 |
)
|
| 508 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
gdal.Warp(
|
| 510 |
GEO_FILE,
|
| 511 |
TMP_FILE,
|
|
|
|
| 517 |
|
| 518 |
|
| 519 |
|
| 520 |
+
yield "Georeferencing is done."
|
| 521 |
|
| 522 |
|
| 523 |
def extractStreetNet(city_name):
|
|
|
|
| 566 |
|
| 567 |
|
| 568 |
|
| 569 |
+
def fuzzyMatch(score_th,tile_dict):
|
| 570 |
COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
|
| 571 |
OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
|
| 572 |
coords_df = pd.read_csv(COORD_PATH)
|
|
|
|
| 610 |
})
|
| 611 |
|
| 612 |
results_df = pd.DataFrame(results)
|
| 613 |
+
tile = tile_dict["tile_path"]
|
| 614 |
+
match = re.search(r'\d+', tile)
|
| 615 |
+
tile_number=int(match.group())
|
| 616 |
+
RES_PATH=os.path.join(OUTPUT_DIR,f"street_matches_tile{tile_number}.csv")
|
| 617 |
results_df.to_csv(RES_PATH, index=False)
|
| 618 |
|
| 619 |
# remove street labels from blobs folder that are more than or equal to score threshold
|
map_tab/map_logic.py
ADDED
|
File without changes
|
map_tab/map_setup.py
CHANGED
|
@@ -1,8 +1,131 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import folium
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
import gradio as gr
|
| 3 |
import folium
|
| 4 |
+
from folium.raster_layers import ImageOverlay
|
| 5 |
+
from geopy.geocoders import Nominatim
|
| 6 |
+
import rasterio
|
| 7 |
+
import numpy as np
|
| 8 |
+
from matplotlib import cm, colors
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import pyproj
|
| 11 |
+
from config import OUTPUT_DIR
|
| 12 |
+
from branca.colormap import linear
|
| 13 |
|
| 14 |
+
CELL_SIZE_M = 500 # meters
|
| 15 |
+
|
| 16 |
+
def make_map(city, show_grid, show_georef):
|
| 17 |
+
city = city.strip()
|
| 18 |
+
if not city:
|
| 19 |
+
return "Please enter a city"
|
| 20 |
+
|
| 21 |
+
geolocator = Nominatim(user_agent="my_app")
|
| 22 |
+
loc = geolocator.geocode(city)
|
| 23 |
+
if loc is None:
|
| 24 |
+
return f"Could not find '{city}'"
|
| 25 |
+
|
| 26 |
+
m = folium.Map(location=[loc.latitude, loc.longitude], zoom_start=12)
|
| 27 |
+
|
| 28 |
+
raster_path = os.path.join(OUTPUT_DIR, "georeferenced.tif")
|
| 29 |
+
if not os.path.exists(raster_path):
|
| 30 |
+
return "Georeferenced raster not found"
|
| 31 |
+
|
| 32 |
+
# Raster bounds and CRS
|
| 33 |
+
with rasterio.open(raster_path) as src:
|
| 34 |
+
bounds = src.bounds
|
| 35 |
+
crs = src.crs
|
| 36 |
+
|
| 37 |
+
xmin, ymin, xmax, ymax = bounds
|
| 38 |
+
transformer = pyproj.Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)
|
| 39 |
+
|
| 40 |
+
# Show georeferenced raster if requested
|
| 41 |
+
if show_georef:
|
| 42 |
+
raster_img_path = os.path.join(OUTPUT_DIR, "georeferenced_rgba.png")
|
| 43 |
+
if os.path.exists(raster_img_path):
|
| 44 |
+
lon0, lat0 = transformer.transform(xmin, ymin)
|
| 45 |
+
lon1, lat1 = transformer.transform(xmax, ymax)
|
| 46 |
+
ImageOverlay(
|
| 47 |
+
image=raster_img_path,
|
| 48 |
+
bounds=[[lat0, lon0], [lat1, lon1]],
|
| 49 |
+
opacity=0.7,
|
| 50 |
+
interactive=True
|
| 51 |
+
).add_to(m)
|
| 52 |
+
|
| 53 |
+
# Grid overlay
|
| 54 |
+
if show_grid:
|
| 55 |
+
grid_values = []
|
| 56 |
+
for fname in os.listdir(OUTPUT_DIR):
|
| 57 |
+
if fname.startswith("street_matches_tile") and fname.endswith(".csv"):
|
| 58 |
+
df = pd.read_csv(os.path.join(OUTPUT_DIR, fname))
|
| 59 |
+
if df.empty:
|
| 60 |
+
continue
|
| 61 |
+
|
| 62 |
+
# Tile bounds
|
| 63 |
+
tile_xmin, tile_xmax = df['x'].min(), df['x'].max()
|
| 64 |
+
tile_ymin, tile_ymax = df['y'].min(), df['y'].max()
|
| 65 |
+
n_cols = int(np.ceil((tile_xmax - tile_xmin) / CELL_SIZE_M))
|
| 66 |
+
n_rows = int(np.ceil((tile_ymax - tile_ymin) / CELL_SIZE_M))
|
| 67 |
+
|
| 68 |
+
grid = np.zeros((n_rows, n_cols))
|
| 69 |
+
counts = np.zeros((n_rows, n_cols))
|
| 70 |
+
|
| 71 |
+
for _, row in df.iterrows():
|
| 72 |
+
col = int((row['x'] - tile_xmin) // CELL_SIZE_M)
|
| 73 |
+
row_idx = int((row['y'] - tile_ymin) // CELL_SIZE_M)
|
| 74 |
+
if 0 <= col < n_cols and 0 <= row_idx < n_rows:
|
| 75 |
+
grid[row_idx, col] += row['osm_match_score']
|
| 76 |
+
counts[row_idx, col] += 1
|
| 77 |
+
|
| 78 |
+
mask = counts > 0
|
| 79 |
+
grid[mask] /= counts[mask] # average OSM match score
|
| 80 |
+
grid_values.append((grid, tile_xmin, tile_ymin, n_rows, n_cols))
|
| 81 |
+
|
| 82 |
+
# Flatten all grid values for global min/max
|
| 83 |
+
all_scores = np.concatenate([g[0].flatten() for g in grid_values])
|
| 84 |
+
min_val, max_val = all_scores.min(), all_scores.max()
|
| 85 |
+
if min_val == max_val:
|
| 86 |
+
max_val = min_val + 1e-6
|
| 87 |
+
|
| 88 |
+
cmap = cm.get_cmap('Reds')
|
| 89 |
+
|
| 90 |
+
# Create a linear colormap for legend
|
| 91 |
+
colormap = linear.Reds_09.scale(min_val, max_val)
|
| 92 |
+
colormap.caption = 'Average OSM Match Score'
|
| 93 |
+
colormap.add_to(m)
|
| 94 |
+
|
| 95 |
+
# Draw grid cells
|
| 96 |
+
for grid, tile_xmin, tile_ymin, n_rows, n_cols in grid_values:
|
| 97 |
+
for r in range(n_rows):
|
| 98 |
+
for c in range(n_cols):
|
| 99 |
+
val = grid[r, c]
|
| 100 |
+
if val <= 0:
|
| 101 |
+
continue
|
| 102 |
+
norm_val = (val - min_val) / (max_val - min_val)
|
| 103 |
+
color = colors.to_hex(cmap(norm_val))
|
| 104 |
+
x0 = tile_xmin + c * CELL_SIZE_M
|
| 105 |
+
y0 = tile_ymin + r * CELL_SIZE_M
|
| 106 |
+
x1 = x0 + CELL_SIZE_M
|
| 107 |
+
y1 = y0 + CELL_SIZE_M
|
| 108 |
+
lon0, lat0 = transformer.transform(x0, y0)
|
| 109 |
+
lon1, lat1 = transformer.transform(x1, y1)
|
| 110 |
+
folium.Rectangle(
|
| 111 |
+
bounds=[[lat0, lon0], [lat1, lon1]],
|
| 112 |
+
color=None, # no border
|
| 113 |
+
weight=0,
|
| 114 |
+
fill=True,
|
| 115 |
+
fill_color=color,
|
| 116 |
+
fill_opacity=0.7,
|
| 117 |
+
popup=f"{val:.2f}" # only the value
|
| 118 |
+
).add_to(m)
|
| 119 |
+
|
| 120 |
+
return m._repr_html_()
|
| 121 |
+
|
| 122 |
+
def get_map_widgets(city_component):
|
| 123 |
+
map_output = gr.HTML(value="Map will appear here once you type a city", elem_id="map-widget")
|
| 124 |
+
show_grid = gr.Checkbox(label="Draw Grid", value=False)
|
| 125 |
+
show_georef = gr.Checkbox(label="Show Georeferenced Map", value=False)
|
| 126 |
+
|
| 127 |
+
inputs = [city_component, show_grid, show_georef]
|
| 128 |
+
for comp in inputs:
|
| 129 |
+
comp.change(fn=make_map, inputs=inputs, outputs=[map_output])
|
| 130 |
+
|
| 131 |
+
return map_output, show_grid, show_georef
|