Spaces:
Running
Running
Update src/Translate.py
Browse files- src/Translate.py +5 -2
src/Translate.py
CHANGED
|
@@ -71,20 +71,23 @@ class Translators:
|
|
| 71 |
|
| 72 |
# tell tokenizer the source language
|
| 73 |
tokenizer.src_lang = "en_XX"
|
| 74 |
-
tokenizer.tgt_lang = "ro_RO"
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# find the id for the target language and force it at generation
|
| 77 |
# forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
|
| 78 |
|
| 79 |
# create the pipeline (pass tokenizer and model explicitly)
|
| 80 |
pipe = pipeline("translation", model=model, tokenizer=tokenizer)
|
|
|
|
| 81 |
|
| 82 |
# call the pipeline; generation kwargs are forwarded to model.generate
|
| 83 |
src_text = "This is a test sentence."
|
| 84 |
result = pipe(
|
| 85 |
src_text,
|
| 86 |
num_beams=4,
|
| 87 |
-
max_length=
|
| 88 |
)
|
| 89 |
|
| 90 |
return result[0]["translation_text"], self.message
|
|
|
|
| 71 |
|
| 72 |
# tell tokenizer the source language
|
| 73 |
tokenizer.src_lang = "en_XX"
|
| 74 |
+
# tokenizer.tgt_lang = "ro_RO"
|
| 75 |
+
# set the target language as the model's forced BOS token so pipeline will use it implicitly
|
| 76 |
+
model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
|
| 77 |
|
| 78 |
# find the id for the target language and force it at generation
|
| 79 |
# forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
|
| 80 |
|
| 81 |
# create the pipeline (pass tokenizer and model explicitly)
|
| 82 |
pipe = pipeline("translation", model=model, tokenizer=tokenizer)
|
| 83 |
+
# "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
|
| 84 |
|
| 85 |
# call the pipeline; generation kwargs are forwarded to model.generate
|
| 86 |
src_text = "This is a test sentence."
|
| 87 |
result = pipe(
|
| 88 |
src_text,
|
| 89 |
num_beams=4,
|
| 90 |
+
max_length=256
|
| 91 |
)
|
| 92 |
|
| 93 |
return result[0]["translation_text"], self.message
|