Implement app
Browse files- app.py +110 -0
- classification_model/svm_char_word.joblib +3 -0
- requirements.txt +4 -0
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,110 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import re
         | 
| 2 | 
            +
            import gradio as gr
         | 
| 3 | 
            +
            from pathlib import Path
         | 
| 4 | 
            +
            from joblib import load
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Load the classifier only once during development (reload mode)
         | 
| 7 | 
            +
            if gr.NO_RELOAD:
         | 
| 8 | 
            +
                classifier = load(Path(__file__).parent / "classification_model" / "svm_char_word.joblib")
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            def classify_text(text: str):
         | 
| 12 | 
            +
                """Classify Romansh text and return prediction with probabilities."""
         | 
| 13 | 
            +
                if not text.strip():
         | 
| 14 | 
            +
                    return "Please enter some text to classify."
         | 
| 15 | 
            +
                
         | 
| 16 | 
            +
                # Map predictions to readable language names
         | 
| 17 | 
            +
                language_names = {
         | 
| 18 | 
            +
                    'rm-sursilv': 'Sursilvan',
         | 
| 19 | 
            +
                    'rm-vallader': 'Vallader',
         | 
| 20 | 
            +
                    'rm-rumgr': 'Rumantsch Grischun',
         | 
| 21 | 
            +
                    'rm-surmiran': 'Surmiran',
         | 
| 22 | 
            +
                    'rm-puter': 'Puter',
         | 
| 23 | 
            +
                    'rm-sutsilv': 'Sutsilvan',
         | 
| 24 | 
            +
                    'unknown': 'Unknown'
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
                
         | 
| 27 | 
            +
                # Get class labels from the classifier
         | 
| 28 | 
            +
                class_labels = classifier.classes_
         | 
| 29 | 
            +
                
         | 
| 30 | 
            +
                # Try to get probabilities if available, otherwise use decision function
         | 
| 31 | 
            +
                try:
         | 
| 32 | 
            +
                    probabilities = classifier.predict_proba([text])[0]
         | 
| 33 | 
            +
                    # Create result dictionary with probabilities
         | 
| 34 | 
            +
                    result = {}
         | 
| 35 | 
            +
                    for i, label in enumerate(class_labels):
         | 
| 36 | 
            +
                        readable_name = language_names.get(label, label)
         | 
| 37 | 
            +
                        result[readable_name] = float(probabilities[i])
         | 
| 38 | 
            +
                except AttributeError:
         | 
| 39 | 
            +
                    # LinearSVC doesn't have predict_proba, use decision function instead
         | 
| 40 | 
            +
                    decision_scores = classifier.decision_function([text])[0]
         | 
| 41 | 
            +
                    # Convert decision scores to probabilities using softmax
         | 
| 42 | 
            +
                    import numpy as np
         | 
| 43 | 
            +
                    exp_scores = np.exp(decision_scores - np.max(decision_scores))
         | 
| 44 | 
            +
                    probabilities = exp_scores / np.sum(exp_scores)
         | 
| 45 | 
            +
                    
         | 
| 46 | 
            +
                    result = {}
         | 
| 47 | 
            +
                    for i, label in enumerate(class_labels):
         | 
| 48 | 
            +
                        readable_name = language_names.get(label, label)
         | 
| 49 | 
            +
                        result[readable_name] = float(probabilities[i])
         | 
| 50 | 
            +
                
         | 
| 51 | 
            +
                return result
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            # Read examples from the TSV file
         | 
| 54 | 
            +
            import pandas as pd
         | 
| 55 | 
            +
            import os
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            tsv_path = os.path.join(os.path.dirname(__file__), "..", "lemmatizer", "demo", "example_sentences.tsv")
         | 
| 58 | 
            +
            df = pd.read_csv(tsv_path, sep='\t')
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            # Create a list of examples with their idiom labels
         | 
| 61 | 
            +
            examples_data = []
         | 
| 62 | 
            +
            for col in df.columns:
         | 
| 63 | 
            +
                for sentence in df[col].dropna():
         | 
| 64 | 
            +
                    if sentence.strip():  # Skip empty sentences
         | 
| 65 | 
            +
                        examples_data.append((sentence, col))
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            # Create the examples list and labels
         | 
| 68 | 
            +
            examples = [sentence for sentence, _ in examples_data]
         | 
| 69 | 
            +
            example_labels = [f"[{idiom}:] {sentence}" for sentence, idiom in examples_data]
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            # Create the Gradio interface
         | 
| 72 | 
            +
            with gr.Blocks(title="Romansh Idiom Classifier") as demo:
         | 
| 73 | 
            +
                gr.Markdown("# Romansh Idiom Classifier")
         | 
| 74 | 
            +
                gr.Markdown("Enter Romansh text to classify which idiom/variety it belongs to.")
         | 
| 75 | 
            +
                
         | 
| 76 | 
            +
                with gr.Row():
         | 
| 77 | 
            +
                    with gr.Column():
         | 
| 78 | 
            +
                        text_input = gr.Textbox(
         | 
| 79 | 
            +
                            label="Romansh Text",
         | 
| 80 | 
            +
                            placeholder="Enter Romansh text here...",
         | 
| 81 | 
            +
                            lines=5,
         | 
| 82 | 
            +
                            max_lines=10
         | 
| 83 | 
            +
                        )
         | 
| 84 | 
            +
                        
         | 
| 85 | 
            +
                    with gr.Column():
         | 
| 86 | 
            +
                        output = gr.Label(
         | 
| 87 | 
            +
                            label="Predicted Idiom",
         | 
| 88 | 
            +
                            num_top_classes=7
         | 
| 89 | 
            +
                        )
         | 
| 90 | 
            +
                
         | 
| 91 | 
            +
                # Set up event handlers
         | 
| 92 | 
            +
                text_input.change(fn=classify_text, inputs=text_input, outputs=output)
         | 
| 93 | 
            +
                
         | 
| 94 | 
            +
                # Add examples from TSV file
         | 
| 95 | 
            +
                gr.Examples(
         | 
| 96 | 
            +
                    examples=examples,
         | 
| 97 | 
            +
                    inputs=text_input,
         | 
| 98 | 
            +
                    label="Example Sentences",
         | 
| 99 | 
            +
                    example_labels=example_labels,
         | 
| 100 | 
            +
                    examples_per_page=100,
         | 
| 101 | 
            +
                    fn=classify_text,
         | 
| 102 | 
            +
                    outputs=output,
         | 
| 103 | 
            +
                    run_on_click=True,
         | 
| 104 | 
            +
                    cache_examples=True,
         | 
| 105 | 
            +
                    cache_mode='eager',
         | 
| 106 | 
            +
                    preload=0,
         | 
| 107 | 
            +
                )
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            if __name__ == "__main__":
         | 
| 110 | 
            +
                demo.launch()
         | 
    	
        classification_model/svm_char_word.joblib
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:580b6d0e0cc96adbcf322d67f4caa4e46e7c2afc8e14e8b32f00e1c77f93cd47
         | 
| 3 | 
            +
            size 47463929
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            gradio
         | 
| 2 | 
            +
            joblib
         | 
| 3 | 
            +
            scikit-learn
         | 
| 4 | 
            +
            pandas
         |