added OUTPUT_PATH
Browse files- annotation_tab/annotation_logic.py +3 -3
- config.py +5 -0
- inference_tab/inference_logic.py +35 -35
annotation_tab/annotation_logic.py
CHANGED
|
@@ -2,11 +2,11 @@ import os
|
|
| 2 |
import pandas as pd
|
| 3 |
import threading
|
| 4 |
import gradio as gr
|
| 5 |
-
|
| 6 |
|
| 7 |
# ==== CONFIG ====
|
| 8 |
-
IMAGE_FOLDER = "
|
| 9 |
-
CSV_FILE =
|
| 10 |
|
| 11 |
# ==== STATE ====
|
| 12 |
if os.path.exists(CSV_FILE):
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import threading
|
| 4 |
import gradio as gr
|
| 5 |
+
from config import OUTPUT_DIR
|
| 6 |
|
| 7 |
# ==== CONFIG ====
|
| 8 |
+
IMAGE_FOLDER = os.path.join(OUTPUT_DIR,"blobs")
|
| 9 |
+
CSV_FILE = os.path.join(OUTPUT_DIR,"annotations")
|
| 10 |
|
| 11 |
# ==== STATE ====
|
| 12 |
if os.path.exists(CSV_FILE):
|
config.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 4 |
+
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
|
| 5 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
inference_tab/inference_logic.py
CHANGED
|
@@ -16,7 +16,7 @@ from osgeo import gdal
|
|
| 16 |
import geopandas as gpd
|
| 17 |
from rapidfuzz import process, fuzz
|
| 18 |
from huggingface_hub import hf_hub_download
|
| 19 |
-
|
| 20 |
|
| 21 |
yolo_weights = hf_hub_download(
|
| 22 |
repo_id="muk42/yolov9_streets",
|
|
@@ -33,7 +33,8 @@ def run_inference(image_path, gcp_path, city_name, score_th):
|
|
| 33 |
yield from blobsOCR(image_path)
|
| 34 |
|
| 35 |
# === ADD GEO DATA ===
|
| 36 |
-
|
|
|
|
| 37 |
yield from extractCentroids(image_path)
|
| 38 |
yield from extractStreetNet(city_name)
|
| 39 |
|
|
@@ -106,12 +107,12 @@ def getBBoxes(image_path, tile_size=256, overlap=0.3, confidence_threshold=0.25)
|
|
| 106 |
]
|
| 107 |
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
with open(
|
| 112 |
json.dump(boxes_to_save, f, indent=4)
|
| 113 |
|
| 114 |
-
yield f"Inference complete.
|
| 115 |
|
| 116 |
|
| 117 |
def box_inside_global(box, global_box):
|
|
@@ -325,7 +326,8 @@ def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
|
|
| 325 |
model = SAM("sam2.1_l.pt")
|
| 326 |
|
| 327 |
# Load YOLO-predicted boxes
|
| 328 |
-
|
|
|
|
| 329 |
box_data = json.load(f)
|
| 330 |
|
| 331 |
|
|
@@ -474,7 +476,8 @@ def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
|
|
| 474 |
|
| 475 |
|
| 476 |
final_mask = Image.fromarray(full_mask)
|
| 477 |
-
|
|
|
|
| 478 |
|
| 479 |
yield f"Saved mask with {instance_id - 1} instances"
|
| 480 |
|
|
@@ -483,7 +486,8 @@ def getSegments(image_path,iou=0.5,c_th=0.75,edge_margin=10):
|
|
| 483 |
def extractSegments(image_path, min_size=500, margin=10):
|
| 484 |
|
| 485 |
image = cv2.imread(image_path)
|
| 486 |
-
|
|
|
|
| 487 |
|
| 488 |
height, width = mask.shape[:2]
|
| 489 |
|
|
@@ -524,9 +528,8 @@ def extractSegments(image_path, min_size=500, margin=10):
|
|
| 524 |
masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
|
| 525 |
|
| 526 |
# Save the masked image
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
cv2.imwrite(output_path, masked_image)
|
| 530 |
|
| 531 |
yield f"Done."
|
| 532 |
|
|
@@ -548,9 +551,10 @@ def blobsOCR(image_path):
|
|
| 548 |
|
| 549 |
|
| 550 |
# Open output file for writing
|
| 551 |
-
|
|
|
|
| 552 |
# Process each image
|
| 553 |
-
image_folder = "
|
| 554 |
for filename in os.listdir(image_folder):
|
| 555 |
if filename.lower().endswith(image_extensions):
|
| 556 |
image_path = os.path.join(image_folder, filename)
|
|
@@ -575,8 +579,8 @@ def blobsOCR(image_path):
|
|
| 575 |
|
| 576 |
|
| 577 |
def extractCentroids(image_path):
|
| 578 |
-
|
| 579 |
-
with rasterio.open(
|
| 580 |
mask = src.read(1)
|
| 581 |
transform = src.transform
|
| 582 |
|
|
@@ -612,22 +616,13 @@ def extractCentroids(image_path):
|
|
| 612 |
data.append({"blob_id": label, "x": centroid.x, "y": centroid.y})
|
| 613 |
|
| 614 |
df = pd.DataFrame(data)
|
| 615 |
-
|
|
|
|
| 616 |
yield f"Saved centroid coordinates of {len(labels)} blobs."
|
| 617 |
|
| 618 |
|
| 619 |
|
| 620 |
-
def collectBlobs(image_path):
|
| 621 |
-
filename = os.path.splitext(os.path.basename(image_path))[0]
|
| 622 |
-
box_dir = "output/blobs"
|
| 623 |
-
# Get all filenames in the folder (only files, not subfolders)
|
| 624 |
-
file_names = [f for f in os.listdir(box_dir) if os.path.isfile(os.path.join(box_dir, f))]
|
| 625 |
|
| 626 |
-
# Save to text file
|
| 627 |
-
with open(f"output/{filename}_blobs.txt", "w") as f:
|
| 628 |
-
for name in file_names:
|
| 629 |
-
yield f"Writing {name}..."
|
| 630 |
-
f.write(name + "\n")
|
| 631 |
|
| 632 |
def img_shape(image_path):
|
| 633 |
img = cv2.imread(image_path)
|
|
@@ -656,7 +651,7 @@ def georefImg(image_path, gcp_path):
|
|
| 656 |
|
| 657 |
|
| 658 |
|
| 659 |
-
tmp_file = "
|
| 660 |
|
| 661 |
gdal.Translate(
|
| 662 |
tmp_file,
|
|
@@ -668,7 +663,7 @@ def georefImg(image_path, gcp_path):
|
|
| 668 |
|
| 669 |
|
| 670 |
|
| 671 |
-
geo_file = "
|
| 672 |
yield "Running gdalwarp..."
|
| 673 |
|
| 674 |
gdal.Warp(
|
|
@@ -691,7 +686,8 @@ def extractStreetNet(city_name):
|
|
| 691 |
G_proj = ox.project_graph(G)
|
| 692 |
nodes, edges = ox.graph_to_gdfs(G_proj)
|
| 693 |
edges_3857 = edges.to_crs(epsg=3857)
|
| 694 |
-
|
|
|
|
| 695 |
yield "Done."
|
| 696 |
|
| 697 |
|
|
@@ -707,8 +703,10 @@ def best_street_match(point, query_name, edges_gdf, max_distance=100):
|
|
| 707 |
return best_match # (name, score, index)
|
| 708 |
|
| 709 |
def fuzzyMatch():
|
| 710 |
-
|
| 711 |
-
|
|
|
|
|
|
|
| 712 |
merged_df = coords_df.merge(names_df, on="blob_id")
|
| 713 |
|
| 714 |
gdf = gpd.GeoDataFrame(
|
|
@@ -717,7 +715,8 @@ def fuzzyMatch():
|
|
| 717 |
crs="EPSG:3857"
|
| 718 |
)
|
| 719 |
|
| 720 |
-
|
|
|
|
| 721 |
osm_gdf = osm_gdf[osm_gdf['name'].notnull()]
|
| 722 |
|
| 723 |
yield "Process OSM candidates..."
|
|
@@ -744,5 +743,6 @@ def fuzzyMatch():
|
|
| 744 |
})
|
| 745 |
|
| 746 |
results_df = pd.DataFrame(results)
|
| 747 |
-
|
| 748 |
-
|
|
|
|
|
|
| 16 |
import geopandas as gpd
|
| 17 |
from rapidfuzz import process, fuzz
|
| 18 |
from huggingface_hub import hf_hub_download
|
| 19 |
+
from config import OUTPUT_DIR
|
| 20 |
|
| 21 |
yolo_weights = hf_hub_download(
|
| 22 |
repo_id="muk42/yolov9_streets",
|
|
|
|
| 33 |
yield from blobsOCR(image_path)
|
| 34 |
|
| 35 |
# === ADD GEO DATA ===
|
| 36 |
+
MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
|
| 37 |
+
yield from georefImg(MASK_PATH, gcp_path)
|
| 38 |
yield from extractCentroids(image_path)
|
| 39 |
yield from extractStreetNet(city_name)
|
| 40 |
|
|
|
|
| 107 |
]
|
| 108 |
|
| 109 |
|
| 110 |
+
BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
|
| 111 |
+
|
| 112 |
+
with open(BOXES_PATH, "w") as f:
|
| 113 |
json.dump(boxes_to_save, f, indent=4)
|
| 114 |
|
| 115 |
+
yield f"Inference complete."
|
| 116 |
|
| 117 |
|
| 118 |
def box_inside_global(box, global_box):
|
|
|
|
| 326 |
model = SAM("sam2.1_l.pt")
|
| 327 |
|
| 328 |
# Load YOLO-predicted boxes
|
| 329 |
+
BOXES_PATH = os.path.join(OUTPUT_DIR,"boxes.json")
|
| 330 |
+
with open(BOXES_PATH, "r") as f:
|
| 331 |
box_data = json.load(f)
|
| 332 |
|
| 333 |
|
|
|
|
| 476 |
|
| 477 |
|
| 478 |
final_mask = Image.fromarray(full_mask)
|
| 479 |
+
MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
|
| 480 |
+
final_mask.save(MASK_PATH)
|
| 481 |
|
| 482 |
yield f"Saved mask with {instance_id - 1} instances"
|
| 483 |
|
|
|
|
| 486 |
def extractSegments(image_path, min_size=500, margin=10):
|
| 487 |
|
| 488 |
image = cv2.imread(image_path)
|
| 489 |
+
MASK_PATH = os.path.join(OUTPUT_DIR,"mask.tif")
|
| 490 |
+
mask = cv2.imread(MASK_PATH, cv2.IMREAD_UNCHANGED)
|
| 491 |
|
| 492 |
height, width = mask.shape[:2]
|
| 493 |
|
|
|
|
| 528 |
masked_image = cv2.bitwise_and(cropped_image, cropped_image, mask=cropped_mask)
|
| 529 |
|
| 530 |
# Save the masked image
|
| 531 |
+
BLOB_PATH=os.path.join(OUTPUT_DIR,"blobs",f"{blob_id}.png")
|
| 532 |
+
cv2.imwrite(BLOB_PATH, masked_image)
|
|
|
|
| 533 |
|
| 534 |
yield f"Done."
|
| 535 |
|
|
|
|
| 551 |
|
| 552 |
|
| 553 |
# Open output file for writing
|
| 554 |
+
OCR_PATH = os.path.join(OUTPUT_DIR,"ocr")
|
| 555 |
+
with open(OCR_PATH, "w", encoding="utf-8") as f_out:
|
| 556 |
# Process each image
|
| 557 |
+
image_folder = os.path.join(OUTPUT_DIR,"blobs")
|
| 558 |
for filename in os.listdir(image_folder):
|
| 559 |
if filename.lower().endswith(image_extensions):
|
| 560 |
image_path = os.path.join(image_folder, filename)
|
|
|
|
| 579 |
|
| 580 |
|
| 581 |
def extractCentroids(image_path):
|
| 582 |
+
GEO_PATH=os.path.join(OUTPUT_DIR,"georeferenced.tif")
|
| 583 |
+
with rasterio.open(GEO_PATH) as src:
|
| 584 |
mask = src.read(1)
|
| 585 |
transform = src.transform
|
| 586 |
|
|
|
|
| 616 |
data.append({"blob_id": label, "x": centroid.x, "y": centroid.y})
|
| 617 |
|
| 618 |
df = pd.DataFrame(data)
|
| 619 |
+
COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
|
| 620 |
+
df.to_csv(COORD_PATH, index=False)
|
| 621 |
yield f"Saved centroid coordinates of {len(labels)} blobs."
|
| 622 |
|
| 623 |
|
| 624 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 625 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
|
| 627 |
def img_shape(image_path):
|
| 628 |
img = cv2.imread(image_path)
|
|
|
|
| 651 |
|
| 652 |
|
| 653 |
|
| 654 |
+
tmp_file = os.path.join(OUTPUT_DIR,"tmp.tif")
|
| 655 |
|
| 656 |
gdal.Translate(
|
| 657 |
tmp_file,
|
|
|
|
| 663 |
|
| 664 |
|
| 665 |
|
| 666 |
+
geo_file = os.path.join(OUTPUT_DIR,"georeferenced.tif")
|
| 667 |
yield "Running gdalwarp..."
|
| 668 |
|
| 669 |
gdal.Warp(
|
|
|
|
| 686 |
G_proj = ox.project_graph(G)
|
| 687 |
nodes, edges = ox.graph_to_gdfs(G_proj)
|
| 688 |
edges_3857 = edges.to_crs(epsg=3857)
|
| 689 |
+
OSM_PATH=os.path.join(OUTPUT_DIR,"osm_extract.geojson")
|
| 690 |
+
edges_3857.to_file(OSM_PATH, driver="GeoJSON")
|
| 691 |
yield "Done."
|
| 692 |
|
| 693 |
|
|
|
|
| 703 |
return best_match # (name, score, index)
|
| 704 |
|
| 705 |
def fuzzyMatch():
|
| 706 |
+
COORD_PATH=os.path.join(OUTPUT_DIR,"centroids.csv")
|
| 707 |
+
OCR_PATH=os.path.join(OUTPUT_DIR,"ocr.csv")
|
| 708 |
+
coords_df = pd.read_csv(COORD_PATH)
|
| 709 |
+
names_df = pd.read_csv(OCR_PATH,sep="\t",columns=[['blob_id','pred_text']])
|
| 710 |
merged_df = coords_df.merge(names_df, on="blob_id")
|
| 711 |
|
| 712 |
gdf = gpd.GeoDataFrame(
|
|
|
|
| 715 |
crs="EPSG:3857"
|
| 716 |
)
|
| 717 |
|
| 718 |
+
OSM_PATH=os.path.join(OUTPUT_DIR,"osm_extract.geojson")
|
| 719 |
+
osm_gdf = gpd.read_file(OSM_PATH)
|
| 720 |
osm_gdf = osm_gdf[osm_gdf['name'].notnull()]
|
| 721 |
|
| 722 |
yield "Process OSM candidates..."
|
|
|
|
| 743 |
})
|
| 744 |
|
| 745 |
results_df = pd.DataFrame(results)
|
| 746 |
+
RES_PATH=os.path.join(OUTPUT_DIR,"street_matches.csv")
|
| 747 |
+
results_df.to_csv(RES_PATH, index=False)
|
| 748 |
+
yield f"{RES_PATH}/street_matches.csv"
|