Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
import torch
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import gradio as gr
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def run_inference(review_text: str) -> str:
|
| 7 |
"""
|
| 8 |
-
Perform inference on the given wine review text and return the predicted wine variety
|
|
|
|
| 9 |
|
| 10 |
Args:
|
| 11 |
review_text (str): Wine review text in the format "country [SEP] description".
|
|
@@ -19,6 +21,7 @@ def run_inference(review_text: str) -> str:
|
|
| 19 |
|
| 20 |
# Load tokenizer and model
|
| 21 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
|
|
|
| 22 |
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
| 23 |
|
| 24 |
# Tokenize the input text
|
|
@@ -46,26 +49,37 @@ def run_inference(review_text: str) -> str:
|
|
| 46 |
return variety
|
| 47 |
|
| 48 |
|
| 49 |
-
def predict_wine_variety(country: str, description: str) ->
|
| 50 |
"""
|
| 51 |
-
Combine the provided country and description,
|
|
|
|
|
|
|
| 52 |
Enforces a maximum character limit of 750 on the description.
|
| 53 |
|
| 54 |
Args:
|
| 55 |
country (str): The country of wine origin.
|
| 56 |
description (str): The wine review description.
|
|
|
|
|
|
|
| 57 |
|
| 58 |
Returns:
|
| 59 |
-
|
| 60 |
"""
|
| 61 |
-
# Validate description length
|
| 62 |
if len(description) > 750:
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Capitalize input values and format the review text accordingly.
|
| 66 |
review_text = f"{country.capitalize()} [SEP] {description.capitalize()}"
|
| 67 |
predicted_variety = run_inference(review_text)
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
if __name__ == "__main__":
|
|
@@ -73,10 +87,18 @@ if __name__ == "__main__":
|
|
| 73 |
fn=predict_wine_variety,
|
| 74 |
inputs=[
|
| 75 |
gr.Textbox(label="Country", placeholder="Enter country of origin..."),
|
| 76 |
-
gr.Textbox(label="Description", placeholder="Enter wine review description...")
|
|
|
|
|
|
|
| 77 |
],
|
| 78 |
-
outputs
|
|
|
|
| 79 |
title="Wine Variety Predictor",
|
| 80 |
-
description=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
)
|
| 82 |
iface.launch()
|
|
|
|
| 1 |
import torch
|
| 2 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 3 |
import gradio as gr
|
| 4 |
+
import json # Added for JSON conversion
|
| 5 |
|
| 6 |
|
| 7 |
def run_inference(review_text: str) -> str:
|
| 8 |
"""
|
| 9 |
+
Perform inference on the given wine review text and return the predicted wine variety
|
| 10 |
+
using ModernBERT, an encoder-only classifier from "spawn99/modernbert-wine-classification".
|
| 11 |
|
| 12 |
Args:
|
| 13 |
review_text (str): Wine review text in the format "country [SEP] description".
|
|
|
|
| 21 |
|
| 22 |
# Load tokenizer and model
|
| 23 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
|
| 24 |
+
# The model used here is a ModernBERT encoder-only classifier.
|
| 25 |
model = AutoModelForSequenceClassification.from_pretrained(model_id)
|
| 26 |
|
| 27 |
# Tokenize the input text
|
|
|
|
| 49 |
return variety
|
| 50 |
|
| 51 |
|
| 52 |
+
def predict_wine_variety(country: str, description: str, output_format: str) -> str:
|
| 53 |
"""
|
| 54 |
+
Combine the provided country and description, perform inference, and format the output
|
| 55 |
+
based on the selected output format.
|
| 56 |
+
|
| 57 |
Enforces a maximum character limit of 750 on the description.
|
| 58 |
|
| 59 |
Args:
|
| 60 |
country (str): The country of wine origin.
|
| 61 |
description (str): The wine review description.
|
| 62 |
+
output_format (str): Either "JSON" to return output as a JSON-formatted string,
|
| 63 |
+
or "Text" for plain text output.
|
| 64 |
|
| 65 |
Returns:
|
| 66 |
+
str: The predicted wine variety formatted as JSON (if selected) or as plain text.
|
| 67 |
"""
|
|
|
|
| 68 |
if len(description) > 750:
|
| 69 |
+
error_msg = "Description exceeds 750 character limit. Please shorten your input."
|
| 70 |
+
if output_format.lower() == "json":
|
| 71 |
+
return json.dumps({"error": error_msg}, indent=2)
|
| 72 |
+
else:
|
| 73 |
+
return error_msg
|
| 74 |
|
| 75 |
# Capitalize input values and format the review text accordingly.
|
| 76 |
review_text = f"{country.capitalize()} [SEP] {description.capitalize()}"
|
| 77 |
predicted_variety = run_inference(review_text)
|
| 78 |
+
|
| 79 |
+
if output_format.lower() == "json":
|
| 80 |
+
return json.dumps({"Variety": predicted_variety}, indent=2)
|
| 81 |
+
else:
|
| 82 |
+
return predicted_variety
|
| 83 |
|
| 84 |
|
| 85 |
if __name__ == "__main__":
|
|
|
|
| 87 |
fn=predict_wine_variety,
|
| 88 |
inputs=[
|
| 89 |
gr.Textbox(label="Country", placeholder="Enter country of origin..."),
|
| 90 |
+
gr.Textbox(label="Description", placeholder="Enter wine review description..."),
|
| 91 |
+
# New radio input to choose between JSON and plain text output formats:
|
| 92 |
+
gr.Radio(choices=["JSON", "Text"], value="JSON", label="Output Format")
|
| 93 |
],
|
| 94 |
+
# Changed outputs to a Textbox so that plain text output shows naturally
|
| 95 |
+
outputs=gr.Textbox(label="Prediction"),
|
| 96 |
title="Wine Variety Predictor",
|
| 97 |
+
description=(
|
| 98 |
+
"Predict the wine variety based on the country and wine review.\n\n"
|
| 99 |
+
"This tool uses ModernBERT, an encoder-only classifier, trained on the wine reviews dataset\n"
|
| 100 |
+
"(model: spawn99/modernbert-wine-classification, dataset: spawn99/wine-reviews).\n\n"
|
| 101 |
+
"Use the Output Format selector to toggle between a JSON-formatted result and a plain text prediction."
|
| 102 |
+
)
|
| 103 |
)
|
| 104 |
iface.launch()
|