|
|
import re |
|
|
import gradio as gr |
|
|
from pathlib import Path |
|
|
from joblib import load |
|
|
|
|
|
|
|
|
if gr.NO_RELOAD: |
|
|
classifier = load(Path(__file__).parent / "classification_model" / "svm_char_word.joblib") |
|
|
|
|
|
|
|
|
def classify_text(text: str): |
|
|
"""Classify Romansh text and return prediction with probabilities.""" |
|
|
if not text.strip(): |
|
|
return "Please enter some text to classify." |
|
|
|
|
|
|
|
|
language_names = { |
|
|
'rm-sursilv': 'Sursilvan', |
|
|
'rm-vallader': 'Vallader', |
|
|
'rm-rumgr': 'Rumantsch Grischun', |
|
|
'rm-surmiran': 'Surmiran', |
|
|
'rm-puter': 'Puter', |
|
|
'rm-sutsilv': 'Sutsilvan', |
|
|
'unknown': 'Unknown' |
|
|
} |
|
|
|
|
|
|
|
|
class_labels = classifier.classes_ |
|
|
|
|
|
|
|
|
try: |
|
|
probabilities = classifier.predict_proba([text])[0] |
|
|
|
|
|
result = {} |
|
|
for i, label in enumerate(class_labels): |
|
|
readable_name = language_names.get(label, label) |
|
|
result[readable_name] = float(probabilities[i]) |
|
|
except AttributeError: |
|
|
|
|
|
decision_scores = classifier.decision_function([text])[0] |
|
|
|
|
|
import numpy as np |
|
|
exp_scores = np.exp(decision_scores - np.max(decision_scores)) |
|
|
probabilities = exp_scores / np.sum(exp_scores) |
|
|
|
|
|
result = {} |
|
|
for i, label in enumerate(class_labels): |
|
|
readable_name = language_names.get(label, label) |
|
|
result[readable_name] = float(probabilities[i]) |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
import os |
|
|
|
|
|
tsv_path = os.path.join(os.path.dirname(__file__), "..", "lemmatizer", "demo", "example_sentences.tsv") |
|
|
df = pd.read_csv(tsv_path, sep='\t') |
|
|
|
|
|
|
|
|
examples_data = [] |
|
|
for col in df.columns: |
|
|
for sentence in df[col].dropna(): |
|
|
if sentence.strip(): |
|
|
examples_data.append((sentence, col)) |
|
|
|
|
|
|
|
|
examples = [sentence for sentence, _ in examples_data] |
|
|
example_labels = [f"[{idiom}:] {sentence}" for sentence, idiom in examples_data] |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Romansh Idiom Classifier") as demo: |
|
|
gr.Markdown("# Romansh Idiom Classifier") |
|
|
gr.Markdown("Enter Romansh text to classify which idiom/variety it belongs to.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.Textbox( |
|
|
label="Romansh Text", |
|
|
placeholder="Enter Romansh text here...", |
|
|
lines=5, |
|
|
max_lines=10 |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
output = gr.Label( |
|
|
label="Predicted Idiom", |
|
|
num_top_classes=7 |
|
|
) |
|
|
|
|
|
|
|
|
text_input.change(fn=classify_text, inputs=text_input, outputs=output) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=examples, |
|
|
inputs=text_input, |
|
|
label="Example Sentences", |
|
|
example_labels=example_labels, |
|
|
examples_per_page=100, |
|
|
fn=classify_text, |
|
|
outputs=output, |
|
|
run_on_click=True, |
|
|
cache_examples=True, |
|
|
cache_mode='eager', |
|
|
preload=0, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|