TiberiuCristianLeon commited on
Commit
79f676d
·
verified ·
1 Parent(s): dddf264

Update src/Translate.py

Browse files
Files changed (1) hide show
  1. src/Translate.py +5 -2
src/Translate.py CHANGED
@@ -71,20 +71,23 @@ class Translators:
71
 
72
  # tell tokenizer the source language
73
  tokenizer.src_lang = "en_XX"
74
- tokenizer.tgt_lang = "ro_RO"
 
 
75
 
76
  # find the id for the target language and force it at generation
77
  # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
78
 
79
  # create the pipeline (pass tokenizer and model explicitly)
80
  pipe = pipeline("translation", model=model, tokenizer=tokenizer)
 
81
 
82
  # call the pipeline; generation kwargs are forwarded to model.generate
83
  src_text = "This is a test sentence."
84
  result = pipe(
85
  src_text,
86
  num_beams=4,
87
- max_length=512
88
  )
89
 
90
  return result[0]["translation_text"], self.message
 
71
 
72
  # tell tokenizer the source language
73
  tokenizer.src_lang = "en_XX"
74
+ # tokenizer.tgt_lang = "ro_RO"
75
+ # set the target language as the model's forced BOS token so pipeline will use it implicitly
76
+ model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
77
 
78
  # find the id for the target language and force it at generation
79
  # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
80
 
81
  # create the pipeline (pass tokenizer and model explicitly)
82
  pipe = pipeline("translation", model=model, tokenizer=tokenizer)
83
+ # "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
84
 
85
  # call the pipeline; generation kwargs are forwarded to model.generate
86
  src_text = "This is a test sentence."
87
  result = pipe(
88
  src_text,
89
  num_beams=4,
90
+ max_length=256
91
  )
92
 
93
  return result[0]["translation_text"], self.message