Spaces:
Sleeping
Sleeping
File size: 12,400 Bytes
c77f6df 35a3e38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
import gradio as gr
from transformers import pipeline
import os
import cv2
from ultralytics import YOLO
import shutil # Import shutil for copying files
import zipfile # Import zipfile for creating zip archives
def multi_model_detection(image_paths_list: list, model_paths_list: list, output_dir: str = 'detection_results', conf_threshold: float = 0.25):
"""
使用多個 YOLOv8 模型對多張圖片進行物件辨識,
並將結果繪製在圖片上,同時保存辨識資訊到文字檔案。
Args:
image_paths_list (list): 包含所有待辨識圖片路徑的列表。
model_paths_list (list): 包含所有模型 (.pt 檔案) 路徑的列表。
output_dir (str): 儲存結果圖片和文字檔案的目錄。
如果不存在,函式會自動創建。
conf_threshold (float): 置信度閾值,只有高於此值的偵測結果會被標示。
Returns:
list: A list of paths to the annotated images.
list: A list of paths to the text files with detection information.
"""
# 確保輸出目錄存在
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"已創建輸出目錄: {output_dir}")
# 載入所有模型
loaded_models = []
print("\n--- 載入模型 ---")
# If no models are uploaded, use the default yolov8n.pt
if not model_paths_list:
default_model_path = 'yolov8n.pt'
try:
model = YOLO(default_model_path)
loaded_models.append((default_model_path, model))
print(f"成功載入預設模型: {default_model_path}")
except Exception as e:
print(f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")
return [], []
else:
for model_path in model_paths_list:
try:
model = YOLO(model_path)
loaded_models.append((model_path, model)) # 儲存模型路徑和模型物件
print(f"成功載入模型: {model_path}")
except Exception as e:
print(f"錯誤: 無法載入模型 '{model_path}' - {e}")
continue # 如果模型載入失敗,跳過它
if not loaded_models:
print("沒有模型成功載入,請檢查模型路徑或預設模型。")
return [], []
annotated_image_paths = []
txt_output_paths = []
# 處理每張圖片
print("\n--- 開始圖片辨識 ---")
for image_path in image_paths_list:
if not os.path.exists(image_path):
print(f"警告: 圖片 '{image_path}' 不存在,跳過。")
continue
print(f"\n處理圖片: {os.path.basename(image_path)}")
original_image = cv2.imread(image_path)
if original_image is None:
print(f"錯誤: 無法讀取圖片 '{image_path}',跳過。")
continue
# 複製圖片用於繪製,避免修改原始圖片
# 使用 NumPy 複製,而不是直接賦值
annotated_image = original_image.copy()
# 準備寫入文字檔的內容
txt_output_content = []
txt_output_content.append(f"檔案: {os.path.basename(image_path)}\n")
# 對每張圖片使用所有模型進行辨識
all_detections_for_image = [] # 儲存所有模型在當前圖片上的偵測結果
for model_path_str, model_obj in loaded_models:
model_name = os.path.basename(model_path_str) # 獲取模型檔案名
print(f" 使用模型 '{model_name}' 進行辨識...")
# 執行推論, device="cpu" ensures it runs on CPU if GPU is not available or preferred
results = model_obj(image_path, verbose=False, device="cpu")[0]
# 將辨識結果添加到 txt 輸出內容和繪圖列表
txt_output_content.append(f"\n--- 模型: {model_name} ---")
if results.boxes: # 檢查是否有偵測到物件
for box in results.boxes:
# 取得邊界框座標和置信度
conf = float(box.conf[0])
if conf >= conf_threshold: # 檢查置信度是否達到閾值
x1, y1, x2, y2 = map(int, box.xyxy[0])
cls_id = int(box.cls[0])
cls_name = model_obj.names[cls_id] # 取得類別名稱
detection_info = {
'model_name': model_name,
'class_name': cls_name,
'confidence': conf,
'bbox': (x1, y1, x2, y2)
}
all_detections_for_image.append(detection_info)
# 加入到文字檔內容
txt_output_content.append(f" - {cls_name} (Conf: {conf:.2f}) [x1:{x1}, y1:{y1}, x2:{x2}, y2:{y2}]")
else:
txt_output_content.append(" 沒有偵測到任何物件。")
# 繪製所有模型在當前圖片上的偵測結果
# 我們會根據模型來源給予不同的顏色或樣式,讓結果更容易區分
# 定義一個顏色循環列表,方便給不同模型分配不同顏色
colors = [
(255, 0, 0), # 紅色 (例如給模型 A)
(0, 255, 0), # 綠色 (例如給模型 B)
(0, 0, 255), # 藍色
(255, 255, 0), # 黃色
(255, 0, 255), # 紫色
(0, 255, 255), # 青色
(128, 0, 0), # 深紅
(0, 128, 0) # 深綠
]
color_map = {} # 用來映射模型名稱到顏色
for idx, (model_path_str, _) in enumerate(loaded_models):
model_name = os.path.basename(model_path_str)
color_map[model_name] = colors[idx % len(colors)] # 確保顏色循環使用
for det in all_detections_for_image:
x1, y1, x2, y2 = det['bbox']
conf = det['confidence']
cls_name = det['class_name']
model_name = det['model_name']
color = color_map.get(model_name, (200, 200, 200)) # 預設灰色
# 繪製邊界框
cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
# 繪製標籤 (類別名稱 + 置信度 + 模型名稱縮寫)
# 為了避免標籤過長,模型名稱只取前幾個字母
model_abbr = "".join([s[0] for s in model_name.split('.')[:-1]]) # 例如 'a.pt' -> 'a'
label = f'{cls_name} {conf:.2f} ({model_abbr})'
cv2.putText(annotated_image, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
# 保存繪製後的圖片
image_base_name = os.path.basename(image_path)
image_name_without_ext = os.path.splitext(image_base_name)[0]
output_image_path = os.path.join(output_dir, f"{image_name_without_ext}_detected.jpg")
cv2.imwrite(output_image_path, annotated_image)
annotated_image_paths.append(output_image_path)
print(f" 結果圖片保存至: {output_image_path}")
# 保存辨識資訊到文字檔案
output_txt_path = os.path.join(output_dir, f"{image_name_without_ext}.txt")
with open(output_txt_path, 'w', encoding='utf-8') as f:
f.write("\n".join(txt_output_content))
txt_output_paths.append(output_txt_path)
print(f" 辨識資訊保存至: {output_txt_path}")
print("\n--- 所有圖片處理完成 ---")
return annotated_image_paths, txt_output_paths
def create_zip_archive(files, zip_filename):
"""Creates a zip archive from a list of files."""
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file in files:
if os.path.exists(file):
zipf.write(file, os.path.basename(file))
else:
print(f"警告: 檔案 '{file}' 不存在,無法加入壓縮檔。")
return zip_filename
# --- Gradio Interface ---
def gradio_multi_model_detection(image_files, model_files, conf_threshold, output_subdir):
"""
Gradio 的主要處理函式。
接收上傳的檔案和參數,呼叫後端辨識函式,並返回結果。
Args:
image_files (list): Gradio File 元件回傳的圖片檔案列表 (暫存路徑)。
model_files (list): Gradio File 元件回傳的模型檔案列表 (暫存路徑)。
conf_threshold (float): 置信度閾值。
output_subdir (str): 用於儲存本次執行結果的子目錄名稱。
Returns:
tuple: 更新 Gradio 介面所需的多個輸出。
"""
if not image_files:
return None, "請上傳圖片檔案。", None, None
# Get the temporary file paths from Gradio File objects
image_paths = [file.name for file in image_files]
# Use uploaded model paths or an empty list if none are uploaded
model_paths = [file.name for file in model_files] if model_files else []
# Define the output directory for this run within the main results directory
base_output_dir = 'gradio_detection_results'
run_output_dir = os.path.join(base_output_dir, output_subdir)
# Perform detection
annotated_images, detection_texts = multi_model_detection(
image_paths_list=image_paths,
model_paths_list=model_paths,
output_dir=run_output_dir,
conf_threshold=conf_threshold
)
if not annotated_images:
return None, "辨識失敗,請檢查輸入或模型。", None, None
# Combine detection texts for display in one textbox
combined_detection_text = "--- 辨識結果 ---\n\n"
for txt_path in detection_texts:
with open(txt_path, 'r', encoding='utf-8') as f:
combined_detection_text += f.read() + "\n\n"
# Create a zip file containing both annotated images and text files
all_result_files = annotated_images + detection_texts
zip_filename = os.path.join(run_output_dir, f"{output_subdir}_results.zip")
created_zip_path = create_zip_archive(all_result_files, zip_filename)
# Return annotated images and combined text for Gradio output
# Gradio Gallery expects a list of image paths
return annotated_images, combined_detection_text, f"結果儲存於: {os.path.abspath(run_output_dir)}", created_zip_path
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# 支援多模型YOLO物件辨識(demo)")
gr.Markdown("上傳您的圖片和模型,並設定置信度閾值進行物件辨識。若未上傳模型,將使用預設的 yolov8n.pt 進行辨識。")
with gr.Row():
with gr.Column():
image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
model_input = gr.File(label="上傳模型 (.pt)", file_count="multiple", file_types=[".pt"])
conf_slider = gr.Slider(minimum=0, maximum=1, value=0.25, step=0.05, label="置信度閾值")
output_subdir_input = gr.Textbox(label="結果子目錄名稱", value="run_1", placeholder="請輸入儲存結果的子目錄名稱")
run_button = gr.Button("開始辨識")
with gr.Column():
# show_label=False hides the class name label below each image
# allow_preview=True enables double-clicking to zoom
# allow_download=True adds a download button for each image in the gallery
output_gallery = gr.Gallery(label="辨識結果圖片", height=400, allow_preview=True, object_fit="contain")
output_text = gr.Textbox(label="辨識資訊", lines=10)
output_status = gr.Textbox(label="狀態/儲存路徑")
download_button = gr.File(label="下載所有結果 (.zip)", file_count="single")
# Link the button click to the function
run_button.click(
fn=gradio_multi_model_detection,
inputs=[image_input, model_input, conf_slider, output_subdir_input],
outputs=[output_gallery, output_text, output_status, download_button]
)
# Launch the interface
demo.launch(debug=True) |