Spaces:
Sleeping
Sleeping
added execution time computation for alignments and cleanup old imports
Browse files- translate_docx.py +2 -5
translate_docx.py
CHANGED
|
@@ -3,8 +3,6 @@ import json
|
|
| 3 |
import requests
|
| 4 |
import tqdm
|
| 5 |
import os
|
| 6 |
-
import string
|
| 7 |
-
from collections import defaultdict
|
| 8 |
|
| 9 |
from docx import Document
|
| 10 |
from docx.text.hyperlink import Hyperlink
|
|
@@ -16,7 +14,6 @@ nltk.download('punkt')
|
|
| 16 |
nltk.download('punkt_tab')
|
| 17 |
|
| 18 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 19 |
-
from nltk.tokenize.treebank import TreebankWordDetokenizer
|
| 20 |
|
| 21 |
from subprocess import Popen, PIPE
|
| 22 |
|
|
@@ -318,16 +315,16 @@ def translate_document(input_file,
|
|
| 318 |
processed_original_paragraphs_with_runs = [preprocess_runs(runs) for runs in paragraphs_with_runs]
|
| 319 |
|
| 320 |
print("Generating alignments...")
|
|
|
|
| 321 |
translated_sentences_with_style = generate_alignments(processed_original_paragraphs_with_runs,
|
| 322 |
translated_paragraphs, aligner,
|
| 323 |
temp_folder, detokenizer)
|
| 324 |
-
print("Finished alignments")
|
| 325 |
|
| 326 |
# flatten the sentences into a list of tokens
|
| 327 |
translated_tokens_with_style = [item for sublist in translated_sentences_with_style for item in sublist]
|
| 328 |
# group the tokens by style/run
|
| 329 |
translated_runs_with_style = group_by_style(translated_tokens_with_style, detokenizer)
|
| 330 |
-
print("Grouped by style")
|
| 331 |
|
| 332 |
# group the runs by original paragraph
|
| 333 |
translated_paragraphs_with_style = dict()
|
|
|
|
| 3 |
import requests
|
| 4 |
import tqdm
|
| 5 |
import os
|
|
|
|
|
|
|
| 6 |
|
| 7 |
from docx import Document
|
| 8 |
from docx.text.hyperlink import Hyperlink
|
|
|
|
| 14 |
nltk.download('punkt_tab')
|
| 15 |
|
| 16 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
|
|
|
| 17 |
|
| 18 |
from subprocess import Popen, PIPE
|
| 19 |
|
|
|
|
| 315 |
processed_original_paragraphs_with_runs = [preprocess_runs(runs) for runs in paragraphs_with_runs]
|
| 316 |
|
| 317 |
print("Generating alignments...")
|
| 318 |
+
start_time = time.time()
|
| 319 |
translated_sentences_with_style = generate_alignments(processed_original_paragraphs_with_runs,
|
| 320 |
translated_paragraphs, aligner,
|
| 321 |
temp_folder, detokenizer)
|
| 322 |
+
print(f"Finished alignments in {time.time() - start_time} seconds")
|
| 323 |
|
| 324 |
# flatten the sentences into a list of tokens
|
| 325 |
translated_tokens_with_style = [item for sublist in translated_sentences_with_style for item in sublist]
|
| 326 |
# group the tokens by style/run
|
| 327 |
translated_runs_with_style = group_by_style(translated_tokens_with_style, detokenizer)
|
|
|
|
| 328 |
|
| 329 |
# group the runs by original paragraph
|
| 330 |
translated_paragraphs_with_style = dict()
|