Spaces:

iBrokeTheCode
/

Multimodal_Product_Classification

Sleeping

App Files Files Community

iBrokeTheCode commited on Aug 27

Commit

dd8438e

1 Parent(s): 0e07292

chore: Improve App interface

Browse files

Files changed (2) hide show

app.py +87 -46
base.py +102 -64

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import gradio as gr
-# Updated placeholder for the prediction function
 def predict(mode, text, image_path):
     """
     This placeholder function now returns a dictionary
     in the format expected by the gr.Label component.
     """
-    # Hardcoded, sample output. In the future, this will come from your model.
     multimodal_output = {
         "abcat0100000": 0.05,
         "abcat0200000": 0.10,
@@ -37,11 +36,10 @@ def predict(mode, text, image_path):
     elif mode == "Image Only":
         return image_only_output
     else:
-        return {}  # Return an empty dictionary for no selection
-# Function to update input visibility based on mode selection
-def update_inputs(mode):
     if mode == "Multimodal":
         return gr.Textbox(visible=True), gr.Image(visible=True)
     elif mode == "Text Only":
@@ -52,76 +50,119 @@ def update_inputs(mode):
         return gr.Textbox(visible=True), gr.Image(visible=True)
-# Gradio Interface using Blocks
-with gr.Blocks(title="Multimodal Product Classification") as demo:
     with gr.Tabs():
         with gr.TabItem("App"):
-            gr.Markdown("# Multimodal Product Classifier")
-            gr.Markdown("Classify products using either text, images, or both.")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    with gr.Column(variant="panel"):
-                        gr.Markdown("### ⚙️ Classification Inputs")
                         mode_radio = gr.Radio(
                             choices=["Multimodal", "Text Only", "Image Only"],
                             value="Multimodal",
-                            label="Choose Classification Mode",
                         )
                         text_input = gr.Textbox(
-                            label="Product Description",
                             placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
                         )
                         image_input = gr.Image(
-                            label="Product Image", type="filepath", visible=True
                         )
-                    classify_btn = gr.Button("🚀 Classify Product", variant="primary")
-                with gr.Column(scale=1):
-                    with gr.Column(variant="panel"):
-                        gr.Markdown("### 📊 Classification Results")
-                        output_label = gr.Label(
-                            label="Predicted Category", num_top_classes=5
                         )
-                    with gr.Accordion("How to use this demo", open=False):
                         gr.Markdown(
-                            """
-                            This demo classifies a product based on its description and image.
                             - **Multimodal:** Uses both text and image for the most accurate prediction.
                             - **Text Only:** Uses only the product description.
                             - **Image Only:** Uses only the product image.
                             """
                         )
         with gr.TabItem("About"):
-            gr.Markdown(
-                """
-                ### About the Project
-                This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
-                """
-            )
-        with gr.TabItem("Architecture"):
-            gr.Markdown(
-                """
-                ### Model Architecture
-                This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
-                """
-            )
-    # Event listeners for conditional rendering
     mode_radio.change(
-        fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
     )
-    # Event listener for the classify button
-    classify_btn.click(
         fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
     )
 demo.launch()

 import gradio as gr
+# 📌 FUNCTIONS
 def predict(mode, text, image_path):
     """
     This placeholder function now returns a dictionary
     in the format expected by the gr.Label component.
     """
     multimodal_output = {
         "abcat0100000": 0.05,
         "abcat0200000": 0.10,
     elif mode == "Image Only":
         return image_only_output
     else:
+        return {}
+def update_inputs(mode: str):
     if mode == "Multimodal":
         return gr.Textbox(visible=True), gr.Image(visible=True)
     elif mode == "Text Only":
         return gr.Textbox(visible=True), gr.Image(visible=True)
+# 📌 USER INTERFACE
+with gr.Blocks(
+    title="Multimodal Product Classification",
+    theme=gr.themes.Ocean(),
+) as demo:
     with gr.Tabs():
+        # 📌 APP TAB
         with gr.TabItem("App"):
+            gr.Markdown("# 🛍️ Multimodal Product Classification")
+            with gr.Row(equal_height=True):
+                with gr.Column():
+                    with gr.Column():
+                        gr.Markdown("## ⚙️ Classification Inputs")
                         mode_radio = gr.Radio(
                             choices=["Multimodal", "Text Only", "Image Only"],
                             value="Multimodal",
+                            label="Choose Classification Mode:",
                         )
                         text_input = gr.Textbox(
+                            label="Product Description:",
                             placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
                         )
                         image_input = gr.Image(
+                            label="Product Image",
+                            type="filepath",
+                            visible=True,
+                            height=300,
+                            width="100%",
                         )
+                        classify_button = gr.Button(
+                            "✨ Classify Product", variant="primary"
                         )
+                with gr.Column():
+                    with gr.Column():
+                        gr.Markdown("## 📊 Results")
                         gr.Markdown(
+                            """**💡 How to use this app**
+                            This app classifies a product based on its description and image.
                             - **Multimodal:** Uses both text and image for the most accurate prediction.
                             - **Text Only:** Uses only the product description.
                             - **Image Only:** Uses only the product image.
                             """
                         )
+                        output_label = gr.Label(
+                            label="Predict category", num_top_classes=5
+                        )
+        # 📌 ABOUT TAB
         with gr.TabItem("About"):
+            gr.Markdown("""
+## About This Project
+- This project is an image classification app powered by a Convolutional Neural Network (CNN).
+- Simply upload an image, and the app predicts its category from over 1,000 classes using a pre-trained ResNet50 model.
+- Originally developed as a multi-service ML system (FastAPI + Redis + Streamlit), this version has been adapted into a single Streamlit app for lightweight, cost-effective deployment on Hugging Face Spaces.
+## Model & Description
+- Model: ResNet50 (pre-trained on the ImageNet dataset with 1,000+ categories).
+- Pipeline: Images are resized, normalized, and passed to the model.
+- Output: The app displays the Top prediction with confidence score.
+ResNet50 is widely used in both research and production, making it an excellent showcase of deep learning capabilities and transferable ML skills.
+""")
+        # 📌 MODEL TAB
+        with gr.TabItem("Model"):
+            gr.Markdown("""
+## Original Architecture
+- FastAPI → REST API for image processing
+- Redis → Message broker for service communication
+- Streamlit → Interactive web UI
+- TensorFlow → Deep learning inference engine
+- Locust → Load testing & benchmarking
+- Docker Compose → Service orchestration
+## Simplified Version
+- Streamlit only → UI and model combined in a single app
+- TensorFlow (ResNet50) → Core prediction engine
+- Docker → Containerized for Hugging Face Spaces deployment
+This evolution demonstrates the ability to design a scalable microservices system and also adapt it into a lightweight single-service solution for cost-effective demos.
+""")
+    # 📌 FOOTER
+    gr.HTML("<hr>")
+    with gr.Row():
+        gr.Markdown("""
+<div style="text-align: center; margin-bottom: 1.5rem;">
+        <b>Connect with me:</b> 💼 <a href="https://www.linkedin.com/in/alex-turpo/" target="_blank">LinkedIn</a> •
+        🐱 <a href="https://github.com/iBrokeTheCode" target="_blank">GitHub</a> •
+        🤗 <a href="https://huggingface.co/iBrokeTheCode" target="_blank">Hugging Face</a>
+    </div>
+""")
+    # 📌 EVENT LISTENERS
     mode_radio.change(
+        fn=update_inputs,
+        inputs=mode_radio,
+        outputs=[text_input, image_input],
     )
+    classify_button.click(
         fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
     )
 demo.launch()

base.py CHANGED Viewed

@@ -1,116 +1,154 @@
 import gradio as gr
-# Placeholder for the prediction function
 def predict(mode, text, image_path):
-    """
-    This is a placeholder for the final prediction logic.
-    It will return a hardcoded dictionary to demonstrate the output format.
-    """
     if mode == "Multimodal":
-        result_text = "Result for Multimodal input: a category from a real model. Confidence: 0.95"
     elif mode == "Text Only":
-        result_text = (
-            "Result for Text Only input: a category from a real model. Confidence: 0.92"
-        )
     elif mode == "Image Only":
-        result_text = "Result for Image Only input: a category from a real model. Confidence: 0.88"
     else:
-        result_text = "Please select a classification mode."
-    return {
-        "label": result_text,
-        "confidences": {
-            "abcat0100000": 0.05,
-            "abcat0200000": 0.10,
-            "abcat0300000": 0.20,
-            "abcat0400000": 0.45,
-            "abcat0500000": 0.20,
-        },
-    }
-# Function to update input visibility based on mode selection
-def update_inputs(mode):
     if mode == "Multimodal":
         return gr.Textbox(visible=True), gr.Image(visible=True)
     elif mode == "Text Only":
         return gr.Textbox(visible=True), gr.Image(visible=False)
     elif mode == "Image Only":
         return gr.Textbox(visible=False), gr.Image(visible=True)
-    else:  # Default case
         return gr.Textbox(visible=True), gr.Image(visible=True)
-# Gradio Interface using Blocks
-with gr.Blocks(title="Multimodal Product Classification") as demo:
     with gr.Tabs():
         with gr.TabItem("App"):
-            gr.Markdown("# Multimodal Product Classifier")
-            gr.Markdown("Classify products using either text, images, or both.")
-            with gr.Row():
                 with gr.Column(scale=1):
-                    with gr.Column(variant="panel"):
-                        gr.Markdown("### ⚙️ Classification Inputs")
                         mode_radio = gr.Radio(
                             choices=["Multimodal", "Text Only", "Image Only"],
                             value="Multimodal",
-                            label="Choose Classification Mode",
                         )
                         text_input = gr.Textbox(
-                            label="Product Description",
                             placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
                         )
                         image_input = gr.Image(
                             label="Product Image", type="filepath", visible=True
                         )
-                    classify_btn = gr.Button("🚀 Classify Product", variant="primary")
-                with gr.Column(scale=1):
-                    with gr.Column(variant="panel"):
-                        gr.Markdown("### 📊 Classification Results")
-                        output_label = gr.Label(
-                            label="Predicted Category", num_top_classes=5
                         )
-                    with gr.Accordion("How to use this demo", open=False):
                         gr.Markdown(
-                            """
-                            This demo classifies a product based on its description and image.
                             - **Multimodal:** Uses both text and image for the most accurate prediction.
                             - **Text Only:** Uses only the product description.
                             - **Image Only:** Uses only the product image.
                             """
                         )
         with gr.TabItem("About"):
-            gr.Markdown(
-                """
-                ### About the Project
-                This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
-                """
-            )
-        with gr.TabItem("Architecture"):
-            gr.Markdown(
-                """
-                ### Model Architecture
-                This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
-                """
-            )
-    # Event listeners for conditional rendering
     mode_radio.change(
-        fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
     )
-    # Event listener for the classify button
-    classify_btn.click(
         fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
     )

 import gradio as gr
+# 📌 FUNCTIONS
 def predict(mode, text, image_path):
+    # ... your existing predict function ...
+    multimodal_output = {
+        "abcat0100000": 0.05,
+        "abcat0200000": 0.10,
+        "abcat0300000": 0.20,
+        "abcat0400000": 0.45,
+        "abcat0500000": 0.20,
+    }
+    text_only_output = {
+        "abcat0100000": 0.08,
+        "abcat0200000": 0.15,
+        "abcat0300000": 0.25,
+        "abcat0400000": 0.35,
+        "abcat0500000": 0.17,
+    }
+    image_only_output = {
+        "abcat0100000": 0.10,
+        "abcat0200000": 0.20,
+        "abcat0300000": 0.30,
+        "abcat0400000": 0.25,
+        "abcat0500000": 0.15,
+    }
     if mode == "Multimodal":
+        return multimodal_output
     elif mode == "Text Only":
+        return text_only_output
     elif mode == "Image Only":
+        return image_only_output
     else:
+        return {}
+def update_inputs(mode: str):
+    # ... your existing update_inputs function ...
     if mode == "Multimodal":
         return gr.Textbox(visible=True), gr.Image(visible=True)
     elif mode == "Text Only":
         return gr.Textbox(visible=True), gr.Image(visible=False)
     elif mode == "Image Only":
         return gr.Textbox(visible=False), gr.Image(visible=True)
+    else:
         return gr.Textbox(visible=True), gr.Image(visible=True)
+# 📌 CUSTOM CSS FOR FIXED FOOTER
+css_code = """
+/* Target the footer container by its ID and apply fixed positioning */
+#footer-container {
+    position: fixed;
+    bottom: 0;
+    left: 0;
+    right: 0;
+    z-index: 1000; /* Ensure it stays on top of other content */
+    background-color: var(--background-fill-primary); /* Use a Gradio theme variable */
+    padding: var(--spacing-md);
+    border-top: 1px solid var(--border-color-primary);
+}
+/* Add padding to the body to prevent content from being hidden by the footer */
+.gradio-container {
+    padding-bottom: 70px !important;
+}
+"""
+# 📌 USER INTERFACE
+with gr.Blocks(
+    title="Multimodal Product Classification",
+    theme=gr.themes.Ocean(),
+    css=css_code,
+) as demo:
+    # 📌 TABS
     with gr.Tabs():
+        # ... your existing tabs ...
+        # 📌 APP TAB
         with gr.TabItem("App"):
+            gr.Markdown("# 🛍️ Multimodal Product Classification")
+            with gr.Row(equal_height=True):
                 with gr.Column(scale=1):
+                    with gr.Column():
+                        gr.Markdown("## ⚙️ Classification Inputs")
                         mode_radio = gr.Radio(
                             choices=["Multimodal", "Text Only", "Image Only"],
                             value="Multimodal",
+                            label="Choose Classification Mode:",
                         )
                         text_input = gr.Textbox(
+                            label="Product Description:",
                             placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
                         )
                         image_input = gr.Image(
                             label="Product Image", type="filepath", visible=True
                         )
+                        classify_button = gr.Button(
+                            "✨ Classify Product", variant="primary"
                         )
+                with gr.Column(scale=2):
+                    with gr.Column():
+                        gr.Markdown("## 📊 Results")
                         gr.Markdown(
+                            """**💡 How to use this app**
+                            This app classifies a product based on its description and image.
                             - **Multimodal:** Uses both text and image for the most accurate prediction.
                             - **Text Only:** Uses only the product description.
                             - **Image Only:** Uses only the product image.
                             """
                         )
+                        output_label = gr.Label(
+                            label="Predict category", num_top_classes=5
+                        )
+        # 📌 ABOUT TAB
         with gr.TabItem("About"):
+            gr.Markdown("""...""")
+        # 📌 MODEL TAB
+        with gr.TabItem("Model"):
+            gr.Markdown("""...""")
+    # 📌 FOOTER
+    with gr.Row(elem_id="footer-container"):
+        gr.HTML("""
+<div style="text-align: center;">
+    <b>Connect with me:</b> 💼 <a href="https://www.linkedin.com/in/alex-turpo/" target="_blank">LinkedIn</a> •
+    🐱 <a href="https://github.com/iBrokeTheCode" target="_blank">GitHub</a> •
+    🤗 <a href="https://huggingface.co/iBrokeTheCode" target="_blank">Hugging Face</a>
+</div>
+""")
+    # 📌 EVENT LISTENERS
     mode_radio.change(
+        fn=update_inputs,
+        inputs=mode_radio,
+        outputs=[text_input, image_input],
     )
+    classify_button.click(
         fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
     )