Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import cv2
|
|
| 3 |
import torch
|
| 4 |
import numpy as np
|
| 5 |
from PIL import Image
|
|
|
|
| 6 |
|
| 7 |
# Load the YOLOv5 model
|
| 8 |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
|
|
@@ -21,27 +22,68 @@ def run_inference(image):
|
|
| 21 |
|
| 22 |
return Image.fromarray(annotated_image)
|
| 23 |
|
| 24 |
-
# Function to generate a summary for the detected objects
|
| 25 |
-
def
|
| 26 |
results = model(image)
|
| 27 |
detected_objects = results.pandas().xyxy[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
summary = "Detected objects:\n\n"
|
| 29 |
-
for
|
| 30 |
-
summary += f"- {obj
|
| 31 |
-
return summary
|
| 32 |
|
| 33 |
-
|
| 34 |
-
def generate_scene_description(summary):
|
| 35 |
-
if "person" in summary.lower():
|
| 36 |
-
return "This scene might involve people interacting or a social gathering."
|
| 37 |
-
elif "car" in summary.lower() or "truck" in summary.lower():
|
| 38 |
-
return "This could be a street scene or a transportation-related scenario."
|
| 39 |
-
elif "dog" in summary.lower() or "cat" in summary.lower():
|
| 40 |
-
return "This appears to involve pets or animals, possibly in a domestic or outdoor setting."
|
| 41 |
-
else:
|
| 42 |
-
return "This scene involves various objects. It could be a dynamic or static environment."
|
| 43 |
|
| 44 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
with gr.Blocks(css="""
|
| 46 |
body {
|
| 47 |
font-family: 'Poppins', sans-serif;
|
|
@@ -76,19 +118,19 @@ with gr.Blocks(css="""
|
|
| 76 |
.gr-button {
|
| 77 |
font-size: 1em;
|
| 78 |
padding: 12px 24px;
|
| 79 |
-
background
|
| 80 |
color: #FFFFFF;
|
| 81 |
border: none;
|
| 82 |
border-radius: 5px;
|
| 83 |
transition: all 0.3s ease-in-out;
|
| 84 |
}
|
| 85 |
.gr-button:hover {
|
| 86 |
-
background
|
| 87 |
transform: scale(1.05);
|
| 88 |
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
|
| 89 |
}
|
| 90 |
.gr-box {
|
| 91 |
-
background: rgba(255, 255, 255, 0.
|
| 92 |
border: 1px solid rgba(255, 255, 255, 0.3);
|
| 93 |
border-radius: 10px;
|
| 94 |
padding: 15px;
|
|
@@ -105,14 +147,14 @@ with gr.Blocks(css="""
|
|
| 105 |
detect_button = gr.Button("Run Detection", elem_classes="gr-button")
|
| 106 |
with gr.Column(scale=3):
|
| 107 |
annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
|
| 108 |
-
summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box")
|
| 109 |
scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
|
| 110 |
|
| 111 |
# Actions for buttons
|
| 112 |
def detect_and_process(image):
|
| 113 |
annotated_image = run_inference(image)
|
| 114 |
-
summary =
|
| 115 |
-
scene_description = generate_scene_description(
|
| 116 |
return annotated_image, summary, scene_description
|
| 117 |
|
| 118 |
detect_button.click(
|
|
|
|
| 3 |
import torch
|
| 4 |
import numpy as np
|
| 5 |
from PIL import Image
|
| 6 |
+
from collections import Counter
|
| 7 |
|
| 8 |
# Load the YOLOv5 model
|
| 9 |
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
|
|
|
|
| 22 |
|
| 23 |
return Image.fromarray(annotated_image)
|
| 24 |
|
| 25 |
+
# Function to generate a summary for the detected objects with counts
|
| 26 |
+
def generate_summary_with_counts(image):
|
| 27 |
results = model(image)
|
| 28 |
detected_objects = results.pandas().xyxy[0]
|
| 29 |
+
|
| 30 |
+
# Count detected objects
|
| 31 |
+
object_names = detected_objects['name'].tolist()
|
| 32 |
+
object_counts = Counter(object_names)
|
| 33 |
+
|
| 34 |
+
# Create a summary
|
| 35 |
summary = "Detected objects:\n\n"
|
| 36 |
+
for obj, count in object_counts.items():
|
| 37 |
+
summary += f"- {obj}: {count}\n"
|
|
|
|
| 38 |
|
| 39 |
+
return summary, object_counts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# Function to generate a scene description based on the detected objects
|
| 42 |
+
def generate_scene_description(object_counts):
|
| 43 |
+
"""
|
| 44 |
+
Generate a possible scene description based on detected objects and their counts.
|
| 45 |
+
"""
|
| 46 |
+
if "person" in object_counts and "dog" in object_counts:
|
| 47 |
+
return "This scene seems to capture people spending time outdoors with pets, possibly in a park or recreational area."
|
| 48 |
+
elif "person" in object_counts and "laptop" in object_counts:
|
| 49 |
+
return "This might be a workplace or a study environment, featuring individuals using laptops for work or study."
|
| 50 |
+
elif "car" in object_counts or "truck" in object_counts:
|
| 51 |
+
return "This appears to be a street or traffic scene with vehicles in motion or parked."
|
| 52 |
+
elif "cat" in object_counts and "sofa" in object_counts:
|
| 53 |
+
return "This scene seems to capture a cozy indoor environment, likely a home with pets relaxing."
|
| 54 |
+
elif "bicycle" in object_counts and "person" in object_counts:
|
| 55 |
+
return "This could depict an outdoor activity, such as cycling or commuting by bike."
|
| 56 |
+
elif "boat" in object_counts or "ship" in object_counts:
|
| 57 |
+
return "This seems to be a water-based setting, possibly near a harbor, river, or open sea."
|
| 58 |
+
elif "bird" in object_counts and "tree" in object_counts:
|
| 59 |
+
return "This scene depicts a natural setting, possibly a park or forest, with birds and trees."
|
| 60 |
+
elif "person" in object_counts and "microwave" in object_counts:
|
| 61 |
+
return "This is likely an indoor setting, such as a kitchen, where cooking or meal preparation is taking place."
|
| 62 |
+
elif "cow" in object_counts or "sheep" in object_counts:
|
| 63 |
+
return "This scene appears to capture a rural or farming environment, featuring livestock in open fields or farms."
|
| 64 |
+
elif "horse" in object_counts and "person" in object_counts:
|
| 65 |
+
return "This might depict an equestrian scene, possibly involving horseback riding or ranch activities."
|
| 66 |
+
elif "dog" in object_counts and "ball" in object_counts:
|
| 67 |
+
return "This scene seems to show playful activities, possibly a game of fetch with a dog."
|
| 68 |
+
elif "umbrella" in object_counts and "person" in object_counts:
|
| 69 |
+
return "This might capture a rainy day or a sunny outdoor activity where umbrellas are being used."
|
| 70 |
+
elif "train" in object_counts or "railway" in object_counts:
|
| 71 |
+
return "This scene could involve a railway station or a train passing through a scenic route."
|
| 72 |
+
elif "surfboard" in object_counts or "person" in object_counts:
|
| 73 |
+
return "This is likely a beach or coastal scene featuring activities like surfing or water sports."
|
| 74 |
+
elif "book" in object_counts and "person" in object_counts:
|
| 75 |
+
return "This scene could depict a quiet reading environment, such as a library or a study room."
|
| 76 |
+
elif "traffic light" in object_counts and "car" in object_counts:
|
| 77 |
+
return "This seems to capture an urban street scene with traffic and signals controlling the flow."
|
| 78 |
+
elif "chair" in object_counts and "dining table" in object_counts:
|
| 79 |
+
return "This is likely an indoor dining area, possibly a family meal or a restaurant setting."
|
| 80 |
+
elif "flower" in object_counts and "person" in object_counts:
|
| 81 |
+
return "This scene could depict a garden or a floral setting, possibly involving gardening or photography."
|
| 82 |
+
elif "airplane" in object_counts:
|
| 83 |
+
return "This appears to capture an airport or an aerial view, featuring an airplane in flight or on the ground."
|
| 84 |
+
else:
|
| 85 |
+
return "This scene involves various objects, indicating a dynamic or diverse setting."
|
| 86 |
+
# Create the Gradio interface with enhanced UI
|
| 87 |
with gr.Blocks(css="""
|
| 88 |
body {
|
| 89 |
font-family: 'Poppins', sans-serif;
|
|
|
|
| 118 |
.gr-button {
|
| 119 |
font-size: 1em;
|
| 120 |
padding: 12px 24px;
|
| 121 |
+
background: linear-gradient(90deg, #7091E6, #8697C4);
|
| 122 |
color: #FFFFFF;
|
| 123 |
border: none;
|
| 124 |
border-radius: 5px;
|
| 125 |
transition: all 0.3s ease-in-out;
|
| 126 |
}
|
| 127 |
.gr-button:hover {
|
| 128 |
+
background: linear-gradient(90deg, #8697C4, #7091E6);
|
| 129 |
transform: scale(1.05);
|
| 130 |
box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
|
| 131 |
}
|
| 132 |
.gr-box {
|
| 133 |
+
background: rgba(255, 255, 255, 0.2);
|
| 134 |
border: 1px solid rgba(255, 255, 255, 0.3);
|
| 135 |
border-radius: 10px;
|
| 136 |
padding: 15px;
|
|
|
|
| 147 |
detect_button = gr.Button("Run Detection", elem_classes="gr-button")
|
| 148 |
with gr.Column(scale=3):
|
| 149 |
annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
|
| 150 |
+
summary_output = gr.Textbox(label="Detection Summary with Object Counts", lines=10, interactive=False, elem_classes="gr-box")
|
| 151 |
scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
|
| 152 |
|
| 153 |
# Actions for buttons
|
| 154 |
def detect_and_process(image):
|
| 155 |
annotated_image = run_inference(image)
|
| 156 |
+
summary, object_counts = generate_summary_with_counts(np.array(image))
|
| 157 |
+
scene_description = generate_scene_description(object_counts)
|
| 158 |
return annotated_image, summary, scene_description
|
| 159 |
|
| 160 |
detect_button.click(
|