Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	
		Gundeep Singh
		
	commited on
		
		
					Commit 
							
							·
						
						860d7e4
	
1
								Parent(s):
							
							ebee301
								
Update auto detect label on language detection
Browse files- .gitignore +1 -1
- app.py +52 -14
- examples.py +14 -0
- iso639_wrapper.py +22 -0
- language_directions.py +19 -18
- project-notes.md +3 -1
- utils.py +17 -1
    	
        .gitignore
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            *pycache*
         | 
|  | |
| 1 | 
            +
            *pycache*
         | 
    	
        app.py
    CHANGED
    
    | @@ -1,15 +1,27 @@ | |
|  | |
| 1 | 
             
            import gradio as gr
         | 
| 2 | 
             
            from language_directions import *
         | 
| 3 | 
             
            from transformers import pipeline
         | 
|  | |
| 4 |  | 
| 5 | 
             
            source_lang_dict = get_all_source_languages()
         | 
| 6 | 
             
            target_lang_dict = {}
         | 
| 7 | 
             
            source_languages = source_lang_dict.keys()
         | 
| 8 |  | 
| 9 | 
            -
            def  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 10 | 
             
                global target_lang_dict
         | 
| 11 | 
            -
                target_lang_dict = get_target_languages(source_lang_dict[ | 
| 12 | 
            -
                target_languages = target_lang_dict.keys()
         | 
| 13 | 
             
                default_target_value = None
         | 
| 14 | 
             
                if "English" in target_languages or "english" in target_languages:
         | 
| 15 | 
             
                    default_target_value = "English"
         | 
| @@ -19,16 +31,41 @@ def source_dropdown_changed(source_dropdown, input_text=""): | |
| 19 | 
             
                                              value=default_target_value,
         | 
| 20 | 
             
                                              label="Target Language")
         | 
| 21 | 
             
                return target_dropdown
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 22 |  | 
| 23 | 
             
            def translate(input_text, source, target):
         | 
| 24 | 
            -
                 | 
|  | |
| 25 | 
             
                  source, _ = auto_detect_language_code(input_text)
         | 
| 26 | 
            -
                 | 
| 27 | 
            -
             | 
| 28 | 
            -
                 | 
| 29 | 
            -
                 | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 32 |  | 
| 33 |  | 
| 34 | 
             
            with gr.Blocks() as demo:
         | 
| @@ -55,14 +92,15 @@ with gr.Blocks() as demo: | |
| 55 | 
             
                                                               value="English",
         | 
| 56 | 
             
                                                               label="Target Language")
         | 
| 57 | 
             
                        translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
         | 
|  | |
| 58 | 
             
                btn = gr.Button("Translate")
         | 
| 59 | 
            -
                source_language_dropdown.change( | 
|  | |
| 60 | 
             
                btn.click(translate, inputs=[input_textbox,
         | 
| 61 | 
             
                                             source_language_dropdown,
         | 
| 62 | 
             
                                             target_language_dropdown],
         | 
| 63 | 
            -
                                               outputs=translated_textbox)
         | 
| 64 | 
            -
                gr.Examples( | 
| 65 | 
            -
                            inputs=[input_textbox])
         | 
| 66 |  | 
| 67 | 
             
            if __name__ == "__main__":
         | 
| 68 | 
             
                demo.launch()
         | 
|  | |
| 1 | 
            +
            # from responses import start
         | 
| 2 | 
             
            import gradio as gr
         | 
| 3 | 
             
            from language_directions import *
         | 
| 4 | 
             
            from transformers import pipeline
         | 
| 5 | 
            +
            from examples import example_sentences
         | 
| 6 |  | 
| 7 | 
             
            source_lang_dict = get_all_source_languages()
         | 
| 8 | 
             
            target_lang_dict = {}
         | 
| 9 | 
             
            source_languages = source_lang_dict.keys()
         | 
| 10 |  | 
| 11 | 
            +
            def get_auto_detect_source_dropdown(input_text):
         | 
| 12 | 
            +
                source, _ = auto_detect_language_code(input_text)
         | 
| 13 | 
            +
                language_name = get_name_from_iso_code(source)
         | 
| 14 | 
            +
                source_dropdown_text = "Detected - " + language_name
         | 
| 15 | 
            +
                update_source_languages_dict(source_lang_dict, source_dropdown_text)
         | 
| 16 | 
            +
                source_language_dropdown = gr.Dropdown(choices=source_languages,
         | 
| 17 | 
            +
                                                              value=source_dropdown_text,
         | 
| 18 | 
            +
                                                              label="Source Language")
         | 
| 19 | 
            +
                return source_language_dropdown, language_name
         | 
| 20 | 
            +
              
         | 
| 21 | 
            +
            def get_target_dropdown(source_language_name, input_text):
         | 
| 22 | 
             
                global target_lang_dict
         | 
| 23 | 
            +
                target_lang_dict, source_language = get_target_languages(source_lang_dict[source_language_name], input_text)
         | 
| 24 | 
            +
                target_languages = list(target_lang_dict.keys())
         | 
| 25 | 
             
                default_target_value = None
         | 
| 26 | 
             
                if "English" in target_languages or "english" in target_languages:
         | 
| 27 | 
             
                    default_target_value = "English"
         | 
|  | |
| 31 | 
             
                                              value=default_target_value,
         | 
| 32 | 
             
                                              label="Target Language")
         | 
| 33 | 
             
                return target_dropdown
         | 
| 34 | 
            +
              
         | 
| 35 | 
            +
            def get_dropdown_value(dropdown):
         | 
| 36 | 
            +
                if isinstance(dropdown, gr.Dropdown):
         | 
| 37 | 
            +
                    dropdown_value = dropdown.constructor_args.get('value')
         | 
| 38 | 
            +
                elif isinstance(dropdown, str):
         | 
| 39 | 
            +
                  dropdown_value = dropdown
         | 
| 40 | 
            +
                return dropdown_value
         | 
| 41 | 
            +
              
         | 
| 42 | 
            +
            def get_dropdowns(source_dropdown, input_text):
         | 
| 43 | 
            +
                source_language_name = get_dropdown_value(source_dropdown)
         | 
| 44 | 
            +
                if input_text and source_language_name == "Auto Detect" or source_language_name.startswith("Detected"):
         | 
| 45 | 
            +
                  source_dropdown, source_language_name = get_auto_detect_source_dropdown(input_text)
         | 
| 46 | 
            +
                target_dropdown = get_target_dropdown(source_language_name=source_language_name,
         | 
| 47 | 
            +
                                                      input_text=input_text)
         | 
| 48 | 
            +
                return source_dropdown, target_dropdown
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            def input_changed(source_language_dropdown, input_text=""):
         | 
| 51 | 
            +
                return get_dropdowns(source_dropdown=source_language_dropdown,
         | 
| 52 | 
            +
                                     input_text=input_text)
         | 
| 53 |  | 
| 54 | 
             
            def translate(input_text, source, target):
         | 
| 55 | 
            +
                source_readable = source
         | 
| 56 | 
            +
                if source == "Auto Detect" or source.startswith("Detected"):
         | 
| 57 | 
             
                  source, _ = auto_detect_language_code(input_text)
         | 
| 58 | 
            +
                if source in source_lang_dict.keys():
         | 
| 59 | 
            +
                  source = source_lang_dict[source]
         | 
| 60 | 
            +
                target_lang_dict, _ = get_target_languages(source)
         | 
| 61 | 
            +
                try:
         | 
| 62 | 
            +
                  target = target_lang_dict[target]
         | 
| 63 | 
            +
                  model = f"Helsinki-NLP/opus-mt-{source}-{target}"
         | 
| 64 | 
            +
                  pipe = pipeline("translation", model=model)
         | 
| 65 | 
            +
                  translation = pipe(input_text)
         | 
| 66 | 
            +
                  return translation[0]['translation_text'], ""
         | 
| 67 | 
            +
                except KeyError:
         | 
| 68 | 
            +
                  return "", f"Error: Translation direction {source_readable} to {target} is not supported by Helsinki Translation Models"
         | 
| 69 |  | 
| 70 |  | 
| 71 | 
             
            with gr.Blocks() as demo:
         | 
|  | |
| 92 | 
             
                                                               value="English",
         | 
| 93 | 
             
                                                               label="Target Language")
         | 
| 94 | 
             
                        translated_textbox = gr.Textbox(lines=5, placeholder="", label="Translated Text")
         | 
| 95 | 
            +
                info_label = gr.HTML("")
         | 
| 96 | 
             
                btn = gr.Button("Translate")
         | 
| 97 | 
            +
                source_language_dropdown.change(input_changed, inputs=[source_language_dropdown, input_textbox], outputs=[source_language_dropdown, target_language_dropdown])
         | 
| 98 | 
            +
                input_textbox.change(input_changed, inputs=[source_language_dropdown, input_textbox], outputs=[source_language_dropdown, target_language_dropdown])
         | 
| 99 | 
             
                btn.click(translate, inputs=[input_textbox,
         | 
| 100 | 
             
                                             source_language_dropdown,
         | 
| 101 | 
             
                                             target_language_dropdown],
         | 
| 102 | 
            +
                                               outputs=[translated_textbox, info_label])
         | 
| 103 | 
            +
                gr.Examples(example_sentences, inputs=[input_textbox])
         | 
|  | |
| 104 |  | 
| 105 | 
             
            if __name__ == "__main__":
         | 
| 106 | 
             
                demo.launch()
         | 
    	
        examples.py
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            example_sentences = [
         | 
| 2 | 
            +
                "Je te rencontre au café", "Répétez s'il vous plaît.",
         | 
| 3 | 
            +
                "The mountains stand tall, embracing the clouds with their majestic peaks.",
         | 
| 4 | 
            +
                "सितारों का आकाश में खोया होने का एहसास मन को अद्वितीय सुख देता है।",
         | 
| 5 | 
            +
                "ਜਟ ਦਾ ਮੁਕਾਬਲਾ ਦਸ ਮੈਨੂੰ ਕਿਥੇ ਆ ਨੀ।",
         | 
| 6 | 
            +
                "Il profumo dei fiori primaverili riempie l'aria, portando gioia e speranza.",
         | 
| 7 | 
            +
                "Güneş batarken, gökyüzünü altın rengine boyuyor ve doğayı sihirli bir atmosfere bürüyor.",
         | 
| 8 | 
            +
                "De wind fluistert door de bomen, een symfonie van rust en harmonie.",
         | 
| 9 | 
            +
                "눈이 하얗게 내리고, 숲은 고요로움으로 가득 차 있습니다.",
         | 
| 10 | 
            +
                "הכוכבים מאירים בשמי הלילה, משאירים את הלב פתוח לקסמם.",
         | 
| 11 | 
            +
                "Hương hoa lan tỏa trong không khí, mang lại cảm giác êm đềm và sự bình yên.",
         | 
| 12 | 
            +
                "Regnet faller mjukt mot marken, skapar en känsla av förnyelse och friskhet.",
         | 
| 13 | 
            +
                "Η θάλασσα χτυπά την ακτή με απαλές κύματα, φέρνοντας ηρεμία και γαλήνη στην ψυχή.",
         | 
| 14 | 
            +
            ]
         | 
    	
        iso639_wrapper.py
    CHANGED
    
    | @@ -1,4 +1,5 @@ | |
| 1 | 
             
            from iso639 import Lang, iter_langs
         | 
|  | |
| 2 |  | 
| 3 |  | 
| 4 | 
             
            langs = [lang for lang in iter_langs()]
         | 
| @@ -24,6 +25,27 @@ iso5_name_to_code = {lg.name: lg.pt5 for lg in langs} | |
| 24 | 
             
            # https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
         | 
| 25 | 
             
            helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
         | 
| 26 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 27 | 
             
            def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
         | 
| 28 | 
             
                for code_type in precedence:
         | 
| 29 | 
             
                    if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
         | 
|  | |
| 1 | 
             
            from iso639 import Lang, iter_langs
         | 
| 2 | 
            +
            from regex import R
         | 
| 3 |  | 
| 4 |  | 
| 5 | 
             
            langs = [lang for lang in iter_langs()]
         | 
|  | |
| 25 | 
             
            # https://github.com/Helsinki-NLP/Tatoeba-Challenge/blob/master/README.md#in-more-detail
         | 
| 26 | 
             
            helsinki_precendence = ["iso3", "iso5", "iso1", "iso2t", "iso2b"]
         | 
| 27 |  | 
| 28 | 
            +
            rename_dict = {"Panjabi":  "Punjabi"}
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            def rename_languages(language):
         | 
| 31 | 
            +
                if language in rename_dict:
         | 
| 32 | 
            +
                    return rename_dict[language]
         | 
| 33 | 
            +
                return language
         | 
| 34 | 
            +
                
         | 
| 35 | 
            +
            def rename_return_value(func):
         | 
| 36 | 
            +
                def wrapper(*args, **kwargs):
         | 
| 37 | 
            +
                    result = func(*args, **kwargs)
         | 
| 38 | 
            +
                    if isinstance(result, str):
         | 
| 39 | 
            +
                        return rename_languages(result)
         | 
| 40 | 
            +
                    elif isinstance(result, list):
         | 
| 41 | 
            +
                        return [rename_languages(item) for item in result]
         | 
| 42 | 
            +
                    elif isinstance(result, dict):
         | 
| 43 | 
            +
                        return {key: rename_languages(value) for key, value in result.items()}
         | 
| 44 | 
            +
                    else:
         | 
| 45 | 
            +
                        return result
         | 
| 46 | 
            +
                return wrapper
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            @rename_return_value
         | 
| 49 | 
             
            def get_name_from_iso_code(iso_code, precedence=helsinki_precendence):
         | 
| 50 | 
             
                for code_type in precedence:
         | 
| 51 | 
             
                    if code_type == "iso1" and iso_code in iso1_code_to_name.keys():
         | 
    	
        language_directions.py
    CHANGED
    
    | @@ -2,12 +2,12 @@ from helsinki_models import helsinki_models, get_clearly_formatted_langauge_dire | |
| 2 | 
             
            from iso639_wrapper import get_name_from_iso_code
         | 
| 3 | 
             
            from language_detection import detect_language
         | 
| 4 | 
             
            from collections import OrderedDict
         | 
| 5 | 
            -
            from utils import convert_keys_to_lowercase
         | 
| 6 |  | 
| 7 |  | 
| 8 | 
             
            def get_all_source_languages():
         | 
| 9 | 
             
                """
         | 
| 10 | 
            -
                Returns a human-readable `dict  | 
| 11 | 
             
                based on the available models.
         | 
| 12 | 
             
                """
         | 
| 13 | 
             
                source_languages = {}
         | 
| @@ -23,6 +23,9 @@ def get_all_source_languages(): | |
| 23 | 
             
                    { **{'Auto Detect' : 'Auto Detect'}, **source_languages}
         | 
| 24 | 
             
                return all_source_langs_including_auto_detect
         | 
| 25 |  | 
|  | |
|  | |
|  | |
| 26 | 
             
            def get_target_languages(source_language_code, input_text=None):
         | 
| 27 | 
             
                """
         | 
| 28 | 
             
                Returns a human-readable `dict of target languages names to codes` 
         | 
| @@ -40,26 +43,24 @@ def get_target_languages(source_language_code, input_text=None): | |
| 40 | 
             
                        target_language_name = get_name_from_iso_code(target_language)
         | 
| 41 | 
             
                        if target_language_name:
         | 
| 42 | 
             
                            target_languages[target_language_name] = target_language
         | 
| 43 | 
            -
                return OrderedDict(sorted(target_languages.items()))
         | 
| 44 |  | 
| 45 | 
             
            def auto_detect_language_code(input_text):
         | 
|  | |
|  | |
| 46 | 
             
                if not input_text:
         | 
| 47 | 
            -
                    return  | 
| 48 | 
            -
                 | 
| 49 | 
            -
                if  | 
| 50 | 
            -
                     | 
| 51 | 
            -
                    return "unknown", True
         | 
| 52 | 
            -
                elif language in list(get_all_source_languages().keys())\
         | 
| 53 | 
            -
                    or language.lower() in [k.lower() for k in list(get_all_source_languages().keys())]:
         | 
| 54 | 
            -
                    source_languages_dict = convert_keys_to_lowercase(get_all_source_languages())
         | 
| 55 | 
            -
                    source_language_code = source_languages_dict.get(language.lower())
         | 
| 56 | 
            -
                    return source_language_code, False
         | 
| 57 | 
            -
                elif language in list(get_all_source_languages().values())\
         | 
| 58 | 
            -
                    or language.lower() in [k.lower() for k in list(get_all_source_languages().values())]:
         | 
| 59 | 
            -
                    source_language_code = language
         | 
| 60 | 
            -
                    return source_language_code, False
         | 
| 61 | 
             
                else:
         | 
| 62 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 63 |  | 
| 64 |  | 
| 65 | 
             
            # Example usage:
         | 
|  | |
| 2 | 
             
            from iso639_wrapper import get_name_from_iso_code
         | 
| 3 | 
             
            from language_detection import detect_language
         | 
| 4 | 
             
            from collections import OrderedDict
         | 
| 5 | 
            +
            from utils import convert_keys_to_lowercase, match_in_keys, match_in_values
         | 
| 6 |  | 
| 7 |  | 
| 8 | 
             
            def get_all_source_languages():
         | 
| 9 | 
             
                """
         | 
| 10 | 
            +
                Returns a human-readable `dict source_languages_names:codes` 
         | 
| 11 | 
             
                based on the available models.
         | 
| 12 | 
             
                """
         | 
| 13 | 
             
                source_languages = {}
         | 
|  | |
| 23 | 
             
                    { **{'Auto Detect' : 'Auto Detect'}, **source_languages}
         | 
| 24 | 
             
                return all_source_langs_including_auto_detect
         | 
| 25 |  | 
| 26 | 
            +
            def update_source_languages_dict(source_languages_dict, auto_detected_language):
         | 
| 27 | 
            +
                source_languages_dict[auto_detected_language] = "Auto Detect"
         | 
| 28 | 
            +
             | 
| 29 | 
             
            def get_target_languages(source_language_code, input_text=None):
         | 
| 30 | 
             
                """
         | 
| 31 | 
             
                Returns a human-readable `dict of target languages names to codes` 
         | 
|  | |
| 43 | 
             
                        target_language_name = get_name_from_iso_code(target_language)
         | 
| 44 | 
             
                        if target_language_name:
         | 
| 45 | 
             
                            target_languages[target_language_name] = target_language
         | 
| 46 | 
            +
                return OrderedDict(sorted(target_languages.items())), source_language_code
         | 
| 47 |  | 
| 48 | 
             
            def auto_detect_language_code(input_text):
         | 
| 49 | 
            +
                DEFAULT_SOURCE_LANGUAGE = "en"
         | 
| 50 | 
            +
                detected_language_string = DEFAULT_SOURCE_LANGUAGE
         | 
| 51 | 
             
                if not input_text:
         | 
| 52 | 
            +
                    return DEFAULT_SOURCE_LANGUAGE, True
         | 
| 53 | 
            +
                language_or_code = detect_language(input_text)
         | 
| 54 | 
            +
                if language_or_code == "unknown":
         | 
| 55 | 
            +
                    return DEFAULT_SOURCE_LANGUAGE, True
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 56 | 
             
                else:
         | 
| 57 | 
            +
                    detected_language_string = match_in_keys(get_all_source_languages(), language_or_code)
         | 
| 58 | 
            +
                    if not detected_language_string:
         | 
| 59 | 
            +
                        detected_language_string = match_in_values(get_all_source_languages(), language_or_code)
         | 
| 60 | 
            +
                    if detected_language_string:
         | 
| 61 | 
            +
                        return detected_language_string, False
         | 
| 62 | 
            +
                    else:
         | 
| 63 | 
            +
                        return DEFAULT_SOURCE_LANGUAGE, True
         | 
| 64 |  | 
| 65 |  | 
| 66 | 
             
            # Example usage:
         | 
    	
        project-notes.md
    CHANGED
    
    | @@ -1,4 +1,6 @@ | |
| 1 | 
             
            # Scope of project
         | 
| 2 | 
             
            1. Enable multiple languages translate based on helsinki models.✅
         | 
| 3 | 
             
            2. Enable auto detect langauge ✅
         | 
| 4 | 
            -
            3. Show error message instead of gradio error
         | 
|  | |
|  | 
|  | |
| 1 | 
             
            # Scope of project
         | 
| 2 | 
             
            1. Enable multiple languages translate based on helsinki models.✅
         | 
| 3 | 
             
            2. Enable auto detect langauge ✅
         | 
| 4 | 
            +
            3. Show error message instead of gradio error ✅
         | 
| 5 | 
            +
            4. Add examples ✅
         | 
| 6 | 
            +
            5. Auto detect on text change ✅
         | 
    	
        utils.py
    CHANGED
    
    | @@ -1,2 +1,18 @@ | |
|  | |
|  | |
|  | |
| 1 | 
             
            def convert_keys_to_lowercase(input_dict):
         | 
| 2 | 
            -
                return {key.lower(): value for key, value in input_dict.items()}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from functools import cache
         | 
| 2 | 
            +
             | 
| 3 | 
            +
             | 
| 4 | 
             
            def convert_keys_to_lowercase(input_dict):
         | 
| 5 | 
            +
                return {key.lower(): value for key, value in input_dict.items()}
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            def match_in_keys(dictionary, search_string):
         | 
| 8 | 
            +
                lowercase_dict = convert_keys_to_lowercase(dictionary)
         | 
| 9 | 
            +
                if search_string.lower() in list(lowercase_dict.keys()):
         | 
| 10 | 
            +
                    return lowercase_dict.get(search_string.lower())
         | 
| 11 | 
            +
                for l_key in lowercase_dict.keys():
         | 
| 12 | 
            +
                    if l_key.startswith(search_string.lower()):
         | 
| 13 | 
            +
                        return lowercase_dict.get(l_key)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            def match_in_values(dictionary, search_string):
         | 
| 16 | 
            +
                lowercase_dict = convert_keys_to_lowercase(dictionary)
         | 
| 17 | 
            +
                if search_string.lower() in list(lowercase_dict.values()):
         | 
| 18 | 
            +
                    return search_string
         |