iBrokeTheCode commited on
Commit
0e07292
Β·
1 Parent(s): 4c38445

chore: Add base code for Gradio app

Browse files
Files changed (2) hide show
  1. app.py +121 -3
  2. base.py +117 -0
app.py CHANGED
@@ -1,9 +1,127 @@
1
  import gradio as gr
2
 
3
 
4
- def greet(name):
5
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
9
  demo.launch()
 
1
  import gradio as gr
2
 
3
 
4
+ # Updated placeholder for the prediction function
5
+ def predict(mode, text, image_path):
6
+ """
7
+ This placeholder function now returns a dictionary
8
+ in the format expected by the gr.Label component.
9
+ """
10
+ # Hardcoded, sample output. In the future, this will come from your model.
11
+ multimodal_output = {
12
+ "abcat0100000": 0.05,
13
+ "abcat0200000": 0.10,
14
+ "abcat0300000": 0.20,
15
+ "abcat0400000": 0.45,
16
+ "abcat0500000": 0.20,
17
+ }
18
+ text_only_output = {
19
+ "abcat0100000": 0.08,
20
+ "abcat0200000": 0.15,
21
+ "abcat0300000": 0.25,
22
+ "abcat0400000": 0.35,
23
+ "abcat0500000": 0.17,
24
+ }
25
+ image_only_output = {
26
+ "abcat0100000": 0.10,
27
+ "abcat0200000": 0.20,
28
+ "abcat0300000": 0.30,
29
+ "abcat0400000": 0.25,
30
+ "abcat0500000": 0.15,
31
+ }
32
 
33
+ if mode == "Multimodal":
34
+ return multimodal_output
35
+ elif mode == "Text Only":
36
+ return text_only_output
37
+ elif mode == "Image Only":
38
+ return image_only_output
39
+ else:
40
+ return {} # Return an empty dictionary for no selection
41
+
42
+
43
+ # Function to update input visibility based on mode selection
44
+ def update_inputs(mode):
45
+ if mode == "Multimodal":
46
+ return gr.Textbox(visible=True), gr.Image(visible=True)
47
+ elif mode == "Text Only":
48
+ return gr.Textbox(visible=True), gr.Image(visible=False)
49
+ elif mode == "Image Only":
50
+ return gr.Textbox(visible=False), gr.Image(visible=True)
51
+ else: # Default case
52
+ return gr.Textbox(visible=True), gr.Image(visible=True)
53
+
54
+
55
+ # Gradio Interface using Blocks
56
+ with gr.Blocks(title="Multimodal Product Classification") as demo:
57
+ with gr.Tabs():
58
+ with gr.TabItem("App"):
59
+ gr.Markdown("# Multimodal Product Classifier")
60
+ gr.Markdown("Classify products using either text, images, or both.")
61
+
62
+ with gr.Row():
63
+ with gr.Column(scale=1):
64
+ with gr.Column(variant="panel"):
65
+ gr.Markdown("### βš™οΈ Classification Inputs")
66
+
67
+ mode_radio = gr.Radio(
68
+ choices=["Multimodal", "Text Only", "Image Only"],
69
+ value="Multimodal",
70
+ label="Choose Classification Mode",
71
+ )
72
+
73
+ text_input = gr.Textbox(
74
+ label="Product Description",
75
+ placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
76
+ )
77
+ image_input = gr.Image(
78
+ label="Product Image", type="filepath", visible=True
79
+ )
80
+
81
+ classify_btn = gr.Button("πŸš€ Classify Product", variant="primary")
82
+
83
+ with gr.Column(scale=1):
84
+ with gr.Column(variant="panel"):
85
+ gr.Markdown("### πŸ“Š Classification Results")
86
+
87
+ output_label = gr.Label(
88
+ label="Predicted Category", num_top_classes=5
89
+ )
90
+
91
+ with gr.Accordion("How to use this demo", open=False):
92
+ gr.Markdown(
93
+ """
94
+ This demo classifies a product based on its description and image.
95
+ - **Multimodal:** Uses both text and image for the most accurate prediction.
96
+ - **Text Only:** Uses only the product description.
97
+ - **Image Only:** Uses only the product image.
98
+ """
99
+ )
100
+
101
+ with gr.TabItem("About"):
102
+ gr.Markdown(
103
+ """
104
+ ### About the Project
105
+ This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
106
+ """
107
+ )
108
+
109
+ with gr.TabItem("Architecture"):
110
+ gr.Markdown(
111
+ """
112
+ ### Model Architecture
113
+ This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
114
+ """
115
+ )
116
+
117
+ # Event listeners for conditional rendering
118
+ mode_radio.change(
119
+ fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
120
+ )
121
+
122
+ # Event listener for the classify button
123
+ classify_btn.click(
124
+ fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
125
+ )
126
 
 
127
  demo.launch()
base.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+ # Placeholder for the prediction function
5
+ def predict(mode, text, image_path):
6
+ """
7
+ This is a placeholder for the final prediction logic.
8
+ It will return a hardcoded dictionary to demonstrate the output format.
9
+ """
10
+ if mode == "Multimodal":
11
+ result_text = "Result for Multimodal input: a category from a real model. Confidence: 0.95"
12
+ elif mode == "Text Only":
13
+ result_text = (
14
+ "Result for Text Only input: a category from a real model. Confidence: 0.92"
15
+ )
16
+ elif mode == "Image Only":
17
+ result_text = "Result for Image Only input: a category from a real model. Confidence: 0.88"
18
+ else:
19
+ result_text = "Please select a classification mode."
20
+
21
+ return {
22
+ "label": result_text,
23
+ "confidences": {
24
+ "abcat0100000": 0.05,
25
+ "abcat0200000": 0.10,
26
+ "abcat0300000": 0.20,
27
+ "abcat0400000": 0.45,
28
+ "abcat0500000": 0.20,
29
+ },
30
+ }
31
+
32
+
33
+ # Function to update input visibility based on mode selection
34
+ def update_inputs(mode):
35
+ if mode == "Multimodal":
36
+ return gr.Textbox(visible=True), gr.Image(visible=True)
37
+ elif mode == "Text Only":
38
+ return gr.Textbox(visible=True), gr.Image(visible=False)
39
+ elif mode == "Image Only":
40
+ return gr.Textbox(visible=False), gr.Image(visible=True)
41
+ else: # Default case
42
+ return gr.Textbox(visible=True), gr.Image(visible=True)
43
+
44
+
45
+ # Gradio Interface using Blocks
46
+ with gr.Blocks(title="Multimodal Product Classification") as demo:
47
+ with gr.Tabs():
48
+ with gr.TabItem("App"):
49
+ gr.Markdown("# Multimodal Product Classifier")
50
+ gr.Markdown("Classify products using either text, images, or both.")
51
+
52
+ with gr.Row():
53
+ with gr.Column(scale=1):
54
+ with gr.Column(variant="panel"):
55
+ gr.Markdown("### βš™οΈ Classification Inputs")
56
+
57
+ mode_radio = gr.Radio(
58
+ choices=["Multimodal", "Text Only", "Image Only"],
59
+ value="Multimodal",
60
+ label="Choose Classification Mode",
61
+ )
62
+
63
+ text_input = gr.Textbox(
64
+ label="Product Description",
65
+ placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
66
+ )
67
+ image_input = gr.Image(
68
+ label="Product Image", type="filepath", visible=True
69
+ )
70
+
71
+ classify_btn = gr.Button("πŸš€ Classify Product", variant="primary")
72
+
73
+ with gr.Column(scale=1):
74
+ with gr.Column(variant="panel"):
75
+ gr.Markdown("### πŸ“Š Classification Results")
76
+
77
+ output_label = gr.Label(
78
+ label="Predicted Category", num_top_classes=5
79
+ )
80
+
81
+ with gr.Accordion("How to use this demo", open=False):
82
+ gr.Markdown(
83
+ """
84
+ This demo classifies a product based on its description and image.
85
+ - **Multimodal:** Uses both text and image for the most accurate prediction.
86
+ - **Text Only:** Uses only the product description.
87
+ - **Image Only:** Uses only the product image.
88
+ """
89
+ )
90
+
91
+ with gr.TabItem("About"):
92
+ gr.Markdown(
93
+ """
94
+ ### About the Project
95
+ This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
96
+ """
97
+ )
98
+
99
+ with gr.TabItem("Architecture"):
100
+ gr.Markdown(
101
+ """
102
+ ### Model Architecture
103
+ This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
104
+ """
105
+ )
106
+
107
+ # Event listeners for conditional rendering
108
+ mode_radio.change(
109
+ fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
110
+ )
111
+
112
+ # Event listener for the classify button
113
+ classify_btn.click(
114
+ fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
115
+ )
116
+
117
+ demo.launch()