Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Commit 
							
							·
						
						95b368a
	
1
								Parent(s):
							
							8ceca60
								
config
Browse files- modeling.py +29 -2
 
    	
        modeling.py
    CHANGED
    
    | 
         @@ -2,6 +2,7 @@ 
     | 
|
| 2 | 
         
             
            from __future__ import annotations
         
     | 
| 3 | 
         | 
| 4 | 
         
             
            import os
         
     | 
| 
         | 
|
| 5 | 
         
             
            from dataclasses import dataclass
         
     | 
| 6 | 
         | 
| 7 | 
         
             
            import config
         
     | 
| 
         @@ -58,6 +59,27 @@ def process_data( 
     | 
|
| 58 | 
         
             
                return data
         
     | 
| 59 | 
         | 
| 60 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 61 | 
         
             
            def process_output(
         
     | 
| 62 | 
         
             
                input_dir: str,
         
     | 
| 63 | 
         
             
                lang_pairs: list[str],
         
     | 
| 
         @@ -66,7 +88,8 @@ def process_output( 
     | 
|
| 66 | 
         
             
                """Load model outputs."""
         
     | 
| 67 | 
         
             
                # Load the data
         
     | 
| 68 | 
         
             
                data: list[str] = []
         
     | 
| 69 | 
         
            -
                 
     | 
| 
         | 
|
| 70 | 
         
             
                system_dir = os.path.join(input_dir, "evaluation", "system-outputs", model_path)
         
     | 
| 71 | 
         
             
                for lang_pair in lang_pairs:
         
     | 
| 72 | 
         
             
                    src_lang, trg_lang = lang_pair[:2], lang_pair[2:]
         
     | 
| 
         @@ -75,5 +98,9 @@ def process_output( 
     | 
|
| 75 | 
         
             
                    )
         
     | 
| 76 | 
         
             
                    with open(sys_file, "r") as sys_in:
         
     | 
| 77 | 
         
             
                        for sys_line in sys_in:
         
     | 
| 78 | 
         
            -
                             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 79 | 
         
             
                return data
         
     | 
| 
         | 
|
| 2 | 
         
             
            from __future__ import annotations
         
     | 
| 3 | 
         | 
| 4 | 
         
             
            import os
         
     | 
| 5 | 
         
            +
            import re
         
     | 
| 6 | 
         
             
            from dataclasses import dataclass
         
     | 
| 7 | 
         | 
| 8 | 
         
             
            import config
         
     | 
| 
         | 
|
| 59 | 
         
             
                return data
         
     | 
| 60 | 
         | 
| 61 | 
         | 
| 62 | 
         
            +
            def remove_leading_language(line: str) -> str:
         
     | 
| 63 | 
         
            +
                """Remove a language at the beginning of the string.
         
     | 
| 64 | 
         
            +
             
     | 
| 65 | 
         
            +
                Some zero-shot models output the name of the language at the beginning of the
         
     | 
| 66 | 
         
            +
                string. This is a manual post-processing function that removes the language name
         
     | 
| 67 | 
         
            +
                (partly as an example of how you can do simple fixes to issues that come up during
         
     | 
| 68 | 
         
            +
                analysis using Zeno).
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
                Args:
         
     | 
| 71 | 
         
            +
                    line: The line to process.
         
     | 
| 72 | 
         
            +
             
     | 
| 73 | 
         
            +
                Returns:
         
     | 
| 74 | 
         
            +
                    The line with the language removed.
         
     | 
| 75 | 
         
            +
                """
         
     | 
| 76 | 
         
            +
                return re.sub(
         
     | 
| 77 | 
         
            +
                    r"^(English|Japanese|Chinese|Hausa|Icelandic|French|German|Russian|Ukranian): ",
         
     | 
| 78 | 
         
            +
                    "",
         
     | 
| 79 | 
         
            +
                    line,
         
     | 
| 80 | 
         
            +
                )
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
             
     | 
| 83 | 
         
             
            def process_output(
         
     | 
| 84 | 
         
             
                input_dir: str,
         
     | 
| 85 | 
         
             
                lang_pairs: list[str],
         
     | 
| 
         | 
|
| 88 | 
         
             
                """Load model outputs."""
         
     | 
| 89 | 
         
             
                # Load the data
         
     | 
| 90 | 
         
             
                data: list[str] = []
         
     | 
| 91 | 
         
            +
                model_config = config.model_configs[model_preset]
         
     | 
| 92 | 
         
            +
                model_path = model_config.path
         
     | 
| 93 | 
         
             
                system_dir = os.path.join(input_dir, "evaluation", "system-outputs", model_path)
         
     | 
| 94 | 
         
             
                for lang_pair in lang_pairs:
         
     | 
| 95 | 
         
             
                    src_lang, trg_lang = lang_pair[:2], lang_pair[2:]
         
     | 
| 
         | 
|
| 98 | 
         
             
                    )
         
     | 
| 99 | 
         
             
                    with open(sys_file, "r") as sys_in:
         
     | 
| 100 | 
         
             
                        for sys_line in sys_in:
         
     | 
| 101 | 
         
            +
                            sys_line = sys_line.strip()
         
     | 
| 102 | 
         
            +
                            if model_config.post_processors is not None:
         
     | 
| 103 | 
         
            +
                                for postprocessor in model_config.post_processors:
         
     | 
| 104 | 
         
            +
                                    sys_line = postprocessor(sys_line)
         
     | 
| 105 | 
         
            +
                            data.append(sys_line)
         
     | 
| 106 | 
         
             
                return data
         
     |