import os from subprocess import Popen, PIPE import re def moses_to_file(translated_moses_file: str, source_lang: str, target_lang: str, tikal_folder: str, original_xliff_file_path: str): # put the translations into the xlf tikal_moses_to_xliff_command = [os.path.join(tikal_folder, "tikal.sh"), "-lm", original_xliff_file_path, "-sl", source_lang, "-tl", target_lang, "-from", translated_moses_file, "-totrg", "-noalttrans", "-to", original_xliff_file_path] Popen(tikal_moses_to_xliff_command).wait() # any tags that are still have not been paired between original and translated texts by tikal so we remove # them. This may happen if a word in the original language has been split in more that one words that have other # words in between, or an error in fastalign text = open(original_xliff_file_path).read() result = re.sub(r'(.*?)', r'\1', text) open(original_xliff_file_path, "w").write(result) # merge into a docx again tikal_merge_doc_command = [os.path.join(tikal_folder, "tikal.sh"), "-m", original_xliff_file_path] final_process = Popen(tikal_merge_doc_command, stdout=PIPE, stderr=PIPE) stdout, stderr = final_process.communicate() final_process.wait() # get the path to the output file output = stdout.decode('utf-8') return re.search(r'(?<=Output:\s)(.*)', output)[0] def file_to_moses(input_file: str, source_lang: str, target_lang: str, tikal_folder: str, original_xliff_file_path: str) -> str: """ Given a document, this function generates an xliff file and then a plain text file with the text contents while keeping style and formatting using tags like Parameters: input_file: Path to document to process source_lang: Source language of the document target_lang: Target language of the document tikal_folder: Folder where tikal.sh is located original_xliff_file_path: Path to xliff file to generate, which will be use later Returns: string: Path to plain text file """ tikal_xliff_command = [os.path.join(tikal_folder, "tikal.sh"), "-x", input_file, "-nocopy", "-sl", source_lang, "-tl", target_lang] Popen(tikal_xliff_command).wait() tikal_moses_command = [os.path.join(tikal_folder, "tikal.sh"), "-xm", original_xliff_file_path, "-sl", source_lang, "-tl", target_lang] Popen(tikal_moses_command).wait() return os.path.join(original_xliff_file_path + f".{source_lang}")