jvamvas commited on
Commit
1b40346
·
1 Parent(s): 4463fb4

Implement app

Browse files
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from pathlib import Path
4
+ from joblib import load
5
+
6
+ # Load the classifier only once during development (reload mode)
7
+ if gr.NO_RELOAD:
8
+ classifier = load(Path(__file__).parent / "classification_model" / "svm_char_word.joblib")
9
+
10
+
11
+ def classify_text(text: str):
12
+ """Classify Romansh text and return prediction with probabilities."""
13
+ if not text.strip():
14
+ return "Please enter some text to classify."
15
+
16
+ # Map predictions to readable language names
17
+ language_names = {
18
+ 'rm-sursilv': 'Sursilvan',
19
+ 'rm-vallader': 'Vallader',
20
+ 'rm-rumgr': 'Rumantsch Grischun',
21
+ 'rm-surmiran': 'Surmiran',
22
+ 'rm-puter': 'Puter',
23
+ 'rm-sutsilv': 'Sutsilvan',
24
+ 'unknown': 'Unknown'
25
+ }
26
+
27
+ # Get class labels from the classifier
28
+ class_labels = classifier.classes_
29
+
30
+ # Try to get probabilities if available, otherwise use decision function
31
+ try:
32
+ probabilities = classifier.predict_proba([text])[0]
33
+ # Create result dictionary with probabilities
34
+ result = {}
35
+ for i, label in enumerate(class_labels):
36
+ readable_name = language_names.get(label, label)
37
+ result[readable_name] = float(probabilities[i])
38
+ except AttributeError:
39
+ # LinearSVC doesn't have predict_proba, use decision function instead
40
+ decision_scores = classifier.decision_function([text])[0]
41
+ # Convert decision scores to probabilities using softmax
42
+ import numpy as np
43
+ exp_scores = np.exp(decision_scores - np.max(decision_scores))
44
+ probabilities = exp_scores / np.sum(exp_scores)
45
+
46
+ result = {}
47
+ for i, label in enumerate(class_labels):
48
+ readable_name = language_names.get(label, label)
49
+ result[readable_name] = float(probabilities[i])
50
+
51
+ return result
52
+
53
+ # Read examples from the TSV file
54
+ import pandas as pd
55
+ import os
56
+
57
+ tsv_path = os.path.join(os.path.dirname(__file__), "..", "lemmatizer", "demo", "example_sentences.tsv")
58
+ df = pd.read_csv(tsv_path, sep='\t')
59
+
60
+ # Create a list of examples with their idiom labels
61
+ examples_data = []
62
+ for col in df.columns:
63
+ for sentence in df[col].dropna():
64
+ if sentence.strip(): # Skip empty sentences
65
+ examples_data.append((sentence, col))
66
+
67
+ # Create the examples list and labels
68
+ examples = [sentence for sentence, _ in examples_data]
69
+ example_labels = [f"[{idiom}:] {sentence}" for sentence, idiom in examples_data]
70
+
71
+ # Create the Gradio interface
72
+ with gr.Blocks(title="Romansh Idiom Classifier") as demo:
73
+ gr.Markdown("# Romansh Idiom Classifier")
74
+ gr.Markdown("Enter Romansh text to classify which idiom/variety it belongs to.")
75
+
76
+ with gr.Row():
77
+ with gr.Column():
78
+ text_input = gr.Textbox(
79
+ label="Romansh Text",
80
+ placeholder="Enter Romansh text here...",
81
+ lines=5,
82
+ max_lines=10
83
+ )
84
+
85
+ with gr.Column():
86
+ output = gr.Label(
87
+ label="Predicted Idiom",
88
+ num_top_classes=7
89
+ )
90
+
91
+ # Set up event handlers
92
+ text_input.change(fn=classify_text, inputs=text_input, outputs=output)
93
+
94
+ # Add examples from TSV file
95
+ gr.Examples(
96
+ examples=examples,
97
+ inputs=text_input,
98
+ label="Example Sentences",
99
+ example_labels=example_labels,
100
+ examples_per_page=100,
101
+ fn=classify_text,
102
+ outputs=output,
103
+ run_on_click=True,
104
+ cache_examples=True,
105
+ cache_mode='eager',
106
+ preload=0,
107
+ )
108
+
109
+ if __name__ == "__main__":
110
+ demo.launch()
classification_model/svm_char_word.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580b6d0e0cc96adbcf322d67f4caa4e46e7c2afc8e14e8b32f00e1c77f93cd47
3
+ size 47463929
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ joblib
3
+ scikit-learn
4
+ pandas