FastAPIMT

Running

TiberiuCristianLeon commited on 26 days ago

Commit

79f676d

verified ·

1 Parent(s): dddf264

Update src/Translate.py

Files changed (1) hide show

src/Translate.py CHANGED Viewed

@@ -71,20 +71,23 @@ class Translators:
         # tell tokenizer the source language
         tokenizer.src_lang = "en_XX"
-        tokenizer.tgt_lang = "ro_RO"
         # find the id for the target language and force it at generation
         # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # create the pipeline (pass tokenizer and model explicitly)
         pipe = pipeline("translation", model=model, tokenizer=tokenizer)
         # call the pipeline; generation kwargs are forwarded to model.generate
         src_text = "This is a test sentence."
         result = pipe(
             src_text,
             num_beams=4,
-            max_length=512
         )
         return result[0]["translation_text"], self.message

         # tell tokenizer the source language
         tokenizer.src_lang = "en_XX"
+        # tokenizer.tgt_lang = "ro_RO"
+        # set the target language as the model's forced BOS token so pipeline will use it implicitly
+        model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # find the id for the target language and force it at generation
         # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # create the pipeline (pass tokenizer and model explicitly)
         pipe = pipeline("translation", model=model, tokenizer=tokenizer)
+        # "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
         # call the pipeline; generation kwargs are forwarded to model.generate
         src_text = "This is a test sentence."
         result = pipe(
             src_text,
             num_beams=4,
+            max_length=256
         )
         return result[0]["translation_text"], self.message