File size: 4,724 Bytes
d7c8166 0e07292 d7c8166 0e07292 d7c8166 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
# Updated placeholder for the prediction function
def predict(mode, text, image_path):
"""
This placeholder function now returns a dictionary
in the format expected by the gr.Label component.
"""
# Hardcoded, sample output. In the future, this will come from your model.
multimodal_output = {
"abcat0100000": 0.05,
"abcat0200000": 0.10,
"abcat0300000": 0.20,
"abcat0400000": 0.45,
"abcat0500000": 0.20,
}
text_only_output = {
"abcat0100000": 0.08,
"abcat0200000": 0.15,
"abcat0300000": 0.25,
"abcat0400000": 0.35,
"abcat0500000": 0.17,
}
image_only_output = {
"abcat0100000": 0.10,
"abcat0200000": 0.20,
"abcat0300000": 0.30,
"abcat0400000": 0.25,
"abcat0500000": 0.15,
}
if mode == "Multimodal":
return multimodal_output
elif mode == "Text Only":
return text_only_output
elif mode == "Image Only":
return image_only_output
else:
return {} # Return an empty dictionary for no selection
# Function to update input visibility based on mode selection
def update_inputs(mode):
if mode == "Multimodal":
return gr.Textbox(visible=True), gr.Image(visible=True)
elif mode == "Text Only":
return gr.Textbox(visible=True), gr.Image(visible=False)
elif mode == "Image Only":
return gr.Textbox(visible=False), gr.Image(visible=True)
else: # Default case
return gr.Textbox(visible=True), gr.Image(visible=True)
# Gradio Interface using Blocks
with gr.Blocks(title="Multimodal Product Classification") as demo:
with gr.Tabs():
with gr.TabItem("App"):
gr.Markdown("# Multimodal Product Classifier")
gr.Markdown("Classify products using either text, images, or both.")
with gr.Row():
with gr.Column(scale=1):
with gr.Column(variant="panel"):
gr.Markdown("### βοΈ Classification Inputs")
mode_radio = gr.Radio(
choices=["Multimodal", "Text Only", "Image Only"],
value="Multimodal",
label="Choose Classification Mode",
)
text_input = gr.Textbox(
label="Product Description",
placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
)
image_input = gr.Image(
label="Product Image", type="filepath", visible=True
)
classify_btn = gr.Button("π Classify Product", variant="primary")
with gr.Column(scale=1):
with gr.Column(variant="panel"):
gr.Markdown("### π Classification Results")
output_label = gr.Label(
label="Predicted Category", num_top_classes=5
)
with gr.Accordion("How to use this demo", open=False):
gr.Markdown(
"""
This demo classifies a product based on its description and image.
- **Multimodal:** Uses both text and image for the most accurate prediction.
- **Text Only:** Uses only the product description.
- **Image Only:** Uses only the product image.
"""
)
with gr.TabItem("About"):
gr.Markdown(
"""
### About the Project
This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
"""
)
with gr.TabItem("Architecture"):
gr.Markdown(
"""
### Model Architecture
This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
"""
)
# Event listeners for conditional rendering
mode_radio.change(
fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
)
# Event listener for the classify button
classify_btn.click(
fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
)
demo.launch()
|