iBrokeTheCode commited on
Commit
dd8438e
Β·
1 Parent(s): 0e07292

chore: Improve App interface

Browse files
Files changed (2) hide show
  1. app.py +87 -46
  2. base.py +102 -64
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import gradio as gr
2
 
3
 
4
- # Updated placeholder for the prediction function
5
  def predict(mode, text, image_path):
6
  """
7
  This placeholder function now returns a dictionary
8
  in the format expected by the gr.Label component.
9
  """
10
- # Hardcoded, sample output. In the future, this will come from your model.
11
  multimodal_output = {
12
  "abcat0100000": 0.05,
13
  "abcat0200000": 0.10,
@@ -37,11 +36,10 @@ def predict(mode, text, image_path):
37
  elif mode == "Image Only":
38
  return image_only_output
39
  else:
40
- return {} # Return an empty dictionary for no selection
41
 
42
 
43
- # Function to update input visibility based on mode selection
44
- def update_inputs(mode):
45
  if mode == "Multimodal":
46
  return gr.Textbox(visible=True), gr.Image(visible=True)
47
  elif mode == "Text Only":
@@ -52,76 +50,119 @@ def update_inputs(mode):
52
  return gr.Textbox(visible=True), gr.Image(visible=True)
53
 
54
 
55
- # Gradio Interface using Blocks
56
- with gr.Blocks(title="Multimodal Product Classification") as demo:
 
 
 
57
  with gr.Tabs():
 
58
  with gr.TabItem("App"):
59
- gr.Markdown("# Multimodal Product Classifier")
60
- gr.Markdown("Classify products using either text, images, or both.")
61
 
62
- with gr.Row():
63
- with gr.Column(scale=1):
64
- with gr.Column(variant="panel"):
65
- gr.Markdown("### βš™οΈ Classification Inputs")
66
 
67
  mode_radio = gr.Radio(
68
  choices=["Multimodal", "Text Only", "Image Only"],
69
  value="Multimodal",
70
- label="Choose Classification Mode",
71
  )
72
 
73
  text_input = gr.Textbox(
74
- label="Product Description",
75
  placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
76
  )
 
77
  image_input = gr.Image(
78
- label="Product Image", type="filepath", visible=True
 
 
 
 
79
  )
80
 
81
- classify_btn = gr.Button("πŸš€ Classify Product", variant="primary")
82
-
83
- with gr.Column(scale=1):
84
- with gr.Column(variant="panel"):
85
- gr.Markdown("### πŸ“Š Classification Results")
86
-
87
- output_label = gr.Label(
88
- label="Predicted Category", num_top_classes=5
89
  )
90
 
91
- with gr.Accordion("How to use this demo", open=False):
 
 
 
92
  gr.Markdown(
93
- """
94
- This demo classifies a product based on its description and image.
 
95
  - **Multimodal:** Uses both text and image for the most accurate prediction.
96
  - **Text Only:** Uses only the product description.
97
  - **Image Only:** Uses only the product image.
98
  """
99
  )
100
 
 
 
 
 
 
101
  with gr.TabItem("About"):
102
- gr.Markdown(
103
- """
104
- ### About the Project
105
- This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
106
- """
107
- )
108
-
109
- with gr.TabItem("Architecture"):
110
- gr.Markdown(
111
- """
112
- ### Model Architecture
113
- This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
114
- """
115
- )
116
-
117
- # Event listeners for conditional rendering
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  mode_radio.change(
119
- fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
 
 
120
  )
121
 
122
- # Event listener for the classify button
123
- classify_btn.click(
124
  fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
125
  )
126
 
 
127
  demo.launch()
 
1
  import gradio as gr
2
 
3
 
4
+ # πŸ“Œ FUNCTIONS
5
  def predict(mode, text, image_path):
6
  """
7
  This placeholder function now returns a dictionary
8
  in the format expected by the gr.Label component.
9
  """
 
10
  multimodal_output = {
11
  "abcat0100000": 0.05,
12
  "abcat0200000": 0.10,
 
36
  elif mode == "Image Only":
37
  return image_only_output
38
  else:
39
+ return {}
40
 
41
 
42
+ def update_inputs(mode: str):
 
43
  if mode == "Multimodal":
44
  return gr.Textbox(visible=True), gr.Image(visible=True)
45
  elif mode == "Text Only":
 
50
  return gr.Textbox(visible=True), gr.Image(visible=True)
51
 
52
 
53
+ # πŸ“Œ USER INTERFACE
54
+ with gr.Blocks(
55
+ title="Multimodal Product Classification",
56
+ theme=gr.themes.Ocean(),
57
+ ) as demo:
58
  with gr.Tabs():
59
+ # πŸ“Œ APP TAB
60
  with gr.TabItem("App"):
61
+ gr.Markdown("# πŸ›οΈ Multimodal Product Classification")
 
62
 
63
+ with gr.Row(equal_height=True):
64
+ with gr.Column():
65
+ with gr.Column():
66
+ gr.Markdown("## βš™οΈ Classification Inputs")
67
 
68
  mode_radio = gr.Radio(
69
  choices=["Multimodal", "Text Only", "Image Only"],
70
  value="Multimodal",
71
+ label="Choose Classification Mode:",
72
  )
73
 
74
  text_input = gr.Textbox(
75
+ label="Product Description:",
76
  placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
77
  )
78
+
79
  image_input = gr.Image(
80
+ label="Product Image",
81
+ type="filepath",
82
+ visible=True,
83
+ height=300,
84
+ width="100%",
85
  )
86
 
87
+ classify_button = gr.Button(
88
+ "✨ Classify Product", variant="primary"
 
 
 
 
 
 
89
  )
90
 
91
+ with gr.Column():
92
+ with gr.Column():
93
+ gr.Markdown("## πŸ“Š Results")
94
+
95
  gr.Markdown(
96
+ """**πŸ’‘ How to use this app**
97
+
98
+ This app classifies a product based on its description and image.
99
  - **Multimodal:** Uses both text and image for the most accurate prediction.
100
  - **Text Only:** Uses only the product description.
101
  - **Image Only:** Uses only the product image.
102
  """
103
  )
104
 
105
+ output_label = gr.Label(
106
+ label="Predict category", num_top_classes=5
107
+ )
108
+
109
+ # πŸ“Œ ABOUT TAB
110
  with gr.TabItem("About"):
111
+ gr.Markdown("""
112
+ ## About This Project
113
+
114
+ - This project is an image classification app powered by a Convolutional Neural Network (CNN).
115
+ - Simply upload an image, and the app predicts its category from over 1,000 classes using a pre-trained ResNet50 model.
116
+ - Originally developed as a multi-service ML system (FastAPI + Redis + Streamlit), this version has been adapted into a single Streamlit app for lightweight, cost-effective deployment on Hugging Face Spaces.
117
+
118
+ ## Model & Description
119
+ - Model: ResNet50 (pre-trained on the ImageNet dataset with 1,000+ categories).
120
+ - Pipeline: Images are resized, normalized, and passed to the model.
121
+ - Output: The app displays the Top prediction with confidence score.
122
+ ResNet50 is widely used in both research and production, making it an excellent showcase of deep learning capabilities and transferable ML skills.
123
+ """)
124
+
125
+ # πŸ“Œ MODEL TAB
126
+ with gr.TabItem("Model"):
127
+ gr.Markdown("""
128
+ ## Original Architecture
129
+
130
+ - FastAPI β†’ REST API for image processing
131
+ - Redis β†’ Message broker for service communication
132
+ - Streamlit β†’ Interactive web UI
133
+ - TensorFlow β†’ Deep learning inference engine
134
+ - Locust β†’ Load testing & benchmarking
135
+ - Docker Compose β†’ Service orchestration
136
+
137
+ ## Simplified Version
138
+
139
+ - Streamlit only β†’ UI and model combined in a single app
140
+ - TensorFlow (ResNet50) β†’ Core prediction engine
141
+ - Docker β†’ Containerized for Hugging Face Spaces deployment
142
+ This evolution demonstrates the ability to design a scalable microservices system and also adapt it into a lightweight single-service solution for cost-effective demos.
143
+ """)
144
+
145
+ # πŸ“Œ FOOTER
146
+ gr.HTML("<hr>")
147
+ with gr.Row():
148
+ gr.Markdown("""
149
+ <div style="text-align: center; margin-bottom: 1.5rem;">
150
+ <b>Connect with me:</b> πŸ’Ό <a href="https://www.linkedin.com/in/alex-turpo/" target="_blank">LinkedIn</a> β€’
151
+ 🐱 <a href="https://github.com/iBrokeTheCode" target="_blank">GitHub</a> β€’
152
+ πŸ€— <a href="https://huggingface.co/iBrokeTheCode" target="_blank">Hugging Face</a>
153
+ </div>
154
+ """)
155
+
156
+ # πŸ“Œ EVENT LISTENERS
157
  mode_radio.change(
158
+ fn=update_inputs,
159
+ inputs=mode_radio,
160
+ outputs=[text_input, image_input],
161
  )
162
 
163
+ classify_button.click(
 
164
  fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
165
  )
166
 
167
+
168
  demo.launch()
base.py CHANGED
@@ -1,116 +1,154 @@
1
  import gradio as gr
2
 
3
 
4
- # Placeholder for the prediction function
5
  def predict(mode, text, image_path):
6
- """
7
- This is a placeholder for the final prediction logic.
8
- It will return a hardcoded dictionary to demonstrate the output format.
9
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  if mode == "Multimodal":
11
- result_text = "Result for Multimodal input: a category from a real model. Confidence: 0.95"
12
  elif mode == "Text Only":
13
- result_text = (
14
- "Result for Text Only input: a category from a real model. Confidence: 0.92"
15
- )
16
  elif mode == "Image Only":
17
- result_text = "Result for Image Only input: a category from a real model. Confidence: 0.88"
18
  else:
19
- result_text = "Please select a classification mode."
20
-
21
- return {
22
- "label": result_text,
23
- "confidences": {
24
- "abcat0100000": 0.05,
25
- "abcat0200000": 0.10,
26
- "abcat0300000": 0.20,
27
- "abcat0400000": 0.45,
28
- "abcat0500000": 0.20,
29
- },
30
- }
31
 
32
 
33
- # Function to update input visibility based on mode selection
34
- def update_inputs(mode):
35
  if mode == "Multimodal":
36
  return gr.Textbox(visible=True), gr.Image(visible=True)
37
  elif mode == "Text Only":
38
  return gr.Textbox(visible=True), gr.Image(visible=False)
39
  elif mode == "Image Only":
40
  return gr.Textbox(visible=False), gr.Image(visible=True)
41
- else: # Default case
42
  return gr.Textbox(visible=True), gr.Image(visible=True)
43
 
44
 
45
- # Gradio Interface using Blocks
46
- with gr.Blocks(title="Multimodal Product Classification") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  with gr.Tabs():
 
 
48
  with gr.TabItem("App"):
49
- gr.Markdown("# Multimodal Product Classifier")
50
- gr.Markdown("Classify products using either text, images, or both.")
51
 
52
- with gr.Row():
53
  with gr.Column(scale=1):
54
- with gr.Column(variant="panel"):
55
- gr.Markdown("### βš™οΈ Classification Inputs")
56
 
57
  mode_radio = gr.Radio(
58
  choices=["Multimodal", "Text Only", "Image Only"],
59
  value="Multimodal",
60
- label="Choose Classification Mode",
61
  )
62
 
63
  text_input = gr.Textbox(
64
- label="Product Description",
65
  placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
66
  )
 
67
  image_input = gr.Image(
68
  label="Product Image", type="filepath", visible=True
69
  )
70
 
71
- classify_btn = gr.Button("πŸš€ Classify Product", variant="primary")
72
-
73
- with gr.Column(scale=1):
74
- with gr.Column(variant="panel"):
75
- gr.Markdown("### πŸ“Š Classification Results")
76
-
77
- output_label = gr.Label(
78
- label="Predicted Category", num_top_classes=5
79
  )
80
 
81
- with gr.Accordion("How to use this demo", open=False):
 
 
 
82
  gr.Markdown(
83
- """
84
- This demo classifies a product based on its description and image.
 
85
  - **Multimodal:** Uses both text and image for the most accurate prediction.
86
  - **Text Only:** Uses only the product description.
87
  - **Image Only:** Uses only the product image.
88
  """
89
  )
90
 
 
 
 
 
 
91
  with gr.TabItem("About"):
92
- gr.Markdown(
93
- """
94
- ### About the Project
95
- This project demonstrates a multimodal classification system trained on data from Best Buy. It uses a Multilayer Perceptron (MLP) model trained on pre-generated embeddings from a Text-based model (MiniLM-L6) and an Image-based model (ConvNeXtV2).
96
- """
97
- )
98
-
99
- with gr.TabItem("Architecture"):
100
- gr.Markdown(
101
- """
102
- ### Model Architecture
103
- This section would contain details about the MLP architecture, the embedding models used, and a diagram explaining the data flow.
104
- """
105
- )
106
-
107
- # Event listeners for conditional rendering
 
108
  mode_radio.change(
109
- fn=update_inputs, inputs=mode_radio, outputs=[text_input, image_input]
 
 
110
  )
111
 
112
- # Event listener for the classify button
113
- classify_btn.click(
114
  fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
115
  )
116
 
 
1
  import gradio as gr
2
 
3
 
4
+ # πŸ“Œ FUNCTIONS
5
  def predict(mode, text, image_path):
6
+ # ... your existing predict function ...
7
+ multimodal_output = {
8
+ "abcat0100000": 0.05,
9
+ "abcat0200000": 0.10,
10
+ "abcat0300000": 0.20,
11
+ "abcat0400000": 0.45,
12
+ "abcat0500000": 0.20,
13
+ }
14
+ text_only_output = {
15
+ "abcat0100000": 0.08,
16
+ "abcat0200000": 0.15,
17
+ "abcat0300000": 0.25,
18
+ "abcat0400000": 0.35,
19
+ "abcat0500000": 0.17,
20
+ }
21
+ image_only_output = {
22
+ "abcat0100000": 0.10,
23
+ "abcat0200000": 0.20,
24
+ "abcat0300000": 0.30,
25
+ "abcat0400000": 0.25,
26
+ "abcat0500000": 0.15,
27
+ }
28
+
29
  if mode == "Multimodal":
30
+ return multimodal_output
31
  elif mode == "Text Only":
32
+ return text_only_output
 
 
33
  elif mode == "Image Only":
34
+ return image_only_output
35
  else:
36
+ return {}
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
+ def update_inputs(mode: str):
40
+ # ... your existing update_inputs function ...
41
  if mode == "Multimodal":
42
  return gr.Textbox(visible=True), gr.Image(visible=True)
43
  elif mode == "Text Only":
44
  return gr.Textbox(visible=True), gr.Image(visible=False)
45
  elif mode == "Image Only":
46
  return gr.Textbox(visible=False), gr.Image(visible=True)
47
+ else:
48
  return gr.Textbox(visible=True), gr.Image(visible=True)
49
 
50
 
51
+ # πŸ“Œ CUSTOM CSS FOR FIXED FOOTER
52
+ css_code = """
53
+ /* Target the footer container by its ID and apply fixed positioning */
54
+ #footer-container {
55
+ position: fixed;
56
+ bottom: 0;
57
+ left: 0;
58
+ right: 0;
59
+ z-index: 1000; /* Ensure it stays on top of other content */
60
+ background-color: var(--background-fill-primary); /* Use a Gradio theme variable */
61
+ padding: var(--spacing-md);
62
+ border-top: 1px solid var(--border-color-primary);
63
+ }
64
+
65
+ /* Add padding to the body to prevent content from being hidden by the footer */
66
+ .gradio-container {
67
+ padding-bottom: 70px !important;
68
+ }
69
+ """
70
+
71
+ # πŸ“Œ USER INTERFACE
72
+ with gr.Blocks(
73
+ title="Multimodal Product Classification",
74
+ theme=gr.themes.Ocean(),
75
+ css=css_code,
76
+ ) as demo:
77
+ # πŸ“Œ TABS
78
  with gr.Tabs():
79
+ # ... your existing tabs ...
80
+ # πŸ“Œ APP TAB
81
  with gr.TabItem("App"):
82
+ gr.Markdown("# πŸ›οΈ Multimodal Product Classification")
 
83
 
84
+ with gr.Row(equal_height=True):
85
  with gr.Column(scale=1):
86
+ with gr.Column():
87
+ gr.Markdown("## βš™οΈ Classification Inputs")
88
 
89
  mode_radio = gr.Radio(
90
  choices=["Multimodal", "Text Only", "Image Only"],
91
  value="Multimodal",
92
+ label="Choose Classification Mode:",
93
  )
94
 
95
  text_input = gr.Textbox(
96
+ label="Product Description:",
97
  placeholder="e.g., Apple iPhone 15 Pro Max 256GB",
98
  )
99
+
100
  image_input = gr.Image(
101
  label="Product Image", type="filepath", visible=True
102
  )
103
 
104
+ classify_button = gr.Button(
105
+ "✨ Classify Product", variant="primary"
 
 
 
 
 
 
106
  )
107
 
108
+ with gr.Column(scale=2):
109
+ with gr.Column():
110
+ gr.Markdown("## πŸ“Š Results")
111
+
112
  gr.Markdown(
113
+ """**πŸ’‘ How to use this app**
114
+
115
+ This app classifies a product based on its description and image.
116
  - **Multimodal:** Uses both text and image for the most accurate prediction.
117
  - **Text Only:** Uses only the product description.
118
  - **Image Only:** Uses only the product image.
119
  """
120
  )
121
 
122
+ output_label = gr.Label(
123
+ label="Predict category", num_top_classes=5
124
+ )
125
+
126
+ # πŸ“Œ ABOUT TAB
127
  with gr.TabItem("About"):
128
+ gr.Markdown("""...""")
129
+
130
+ # πŸ“Œ MODEL TAB
131
+ with gr.TabItem("Model"):
132
+ gr.Markdown("""...""")
133
+
134
+ # πŸ“Œ FOOTER
135
+ with gr.Row(elem_id="footer-container"):
136
+ gr.HTML("""
137
+ <div style="text-align: center;">
138
+ <b>Connect with me:</b> πŸ’Ό <a href="https://www.linkedin.com/in/alex-turpo/" target="_blank">LinkedIn</a> β€’
139
+ 🐱 <a href="https://github.com/iBrokeTheCode" target="_blank">GitHub</a> β€’
140
+ πŸ€— <a href="https://huggingface.co/iBrokeTheCode" target="_blank">Hugging Face</a>
141
+ </div>
142
+ """)
143
+
144
+ # πŸ“Œ EVENT LISTENERS
145
  mode_radio.change(
146
+ fn=update_inputs,
147
+ inputs=mode_radio,
148
+ outputs=[text_input, image_input],
149
  )
150
 
151
+ classify_button.click(
 
152
  fn=predict, inputs=[mode_radio, text_input, image_input], outputs=output_label
153
  )
154