jvamvas's picture
Implement app
1b40346
raw
history blame
3.63 kB
import re
import gradio as gr
from pathlib import Path
from joblib import load
# Load the classifier only once during development (reload mode)
if gr.NO_RELOAD:
classifier = load(Path(__file__).parent / "classification_model" / "svm_char_word.joblib")
def classify_text(text: str):
"""Classify Romansh text and return prediction with probabilities."""
if not text.strip():
return "Please enter some text to classify."
# Map predictions to readable language names
language_names = {
'rm-sursilv': 'Sursilvan',
'rm-vallader': 'Vallader',
'rm-rumgr': 'Rumantsch Grischun',
'rm-surmiran': 'Surmiran',
'rm-puter': 'Puter',
'rm-sutsilv': 'Sutsilvan',
'unknown': 'Unknown'
}
# Get class labels from the classifier
class_labels = classifier.classes_
# Try to get probabilities if available, otherwise use decision function
try:
probabilities = classifier.predict_proba([text])[0]
# Create result dictionary with probabilities
result = {}
for i, label in enumerate(class_labels):
readable_name = language_names.get(label, label)
result[readable_name] = float(probabilities[i])
except AttributeError:
# LinearSVC doesn't have predict_proba, use decision function instead
decision_scores = classifier.decision_function([text])[0]
# Convert decision scores to probabilities using softmax
import numpy as np
exp_scores = np.exp(decision_scores - np.max(decision_scores))
probabilities = exp_scores / np.sum(exp_scores)
result = {}
for i, label in enumerate(class_labels):
readable_name = language_names.get(label, label)
result[readable_name] = float(probabilities[i])
return result
# Read examples from the TSV file
import pandas as pd
import os
tsv_path = os.path.join(os.path.dirname(__file__), "..", "lemmatizer", "demo", "example_sentences.tsv")
df = pd.read_csv(tsv_path, sep='\t')
# Create a list of examples with their idiom labels
examples_data = []
for col in df.columns:
for sentence in df[col].dropna():
if sentence.strip(): # Skip empty sentences
examples_data.append((sentence, col))
# Create the examples list and labels
examples = [sentence for sentence, _ in examples_data]
example_labels = [f"[{idiom}:] {sentence}" for sentence, idiom in examples_data]
# Create the Gradio interface
with gr.Blocks(title="Romansh Idiom Classifier") as demo:
gr.Markdown("# Romansh Idiom Classifier")
gr.Markdown("Enter Romansh text to classify which idiom/variety it belongs to.")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Romansh Text",
placeholder="Enter Romansh text here...",
lines=5,
max_lines=10
)
with gr.Column():
output = gr.Label(
label="Predicted Idiom",
num_top_classes=7
)
# Set up event handlers
text_input.change(fn=classify_text, inputs=text_input, outputs=output)
# Add examples from TSV file
gr.Examples(
examples=examples,
inputs=text_input,
label="Example Sentences",
example_labels=example_labels,
examples_per_page=100,
fn=classify_text,
outputs=output,
run_on_click=True,
cache_examples=True,
cache_mode='eager',
preload=0,
)
if __name__ == "__main__":
demo.launch()