SandaAbhishekSagar commited on
Commit
d24353d
1 Parent(s): 522fc1f

revamped code of translate.py

Browse files
Files changed (1) hide show
  1. translate.py +30 -4
translate.py CHANGED
@@ -12,18 +12,44 @@
12
  # input_text = "驴C贸mo est谩s?"
13
  # print("Translated Text:", translate_text(input_text, src_lang="es", tgt_lang="en"))
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  import spaces
16
- from transformers import MarianMTModel, MarianTokenizer
17
 
18
  # Preload the translation model globally
19
- model_name = "Helsinki-NLP/opus-mt-mul-en"
20
- tokenizer = MarianTokenizer.from_pretrained(model_name)
21
- translation_model = MarianMTModel.from_pretrained(model_name)
 
22
  @spaces.GPU
23
  def translate_text(text, src_lang="auto", tgt_lang="en"):
24
  """Translate text from any language to English."""
 
25
  inputs = tokenizer(text, return_tensors="pt", padding=True)
 
 
 
 
 
 
 
26
  translated = translation_model.generate(**inputs)
 
 
27
  return tokenizer.decode(translated[0], skip_special_tokens=True)
28
 
29
 
 
 
12
  # input_text = "驴C贸mo est谩s?"
13
  # print("Translated Text:", translate_text(input_text, src_lang="es", tgt_lang="en"))
14
 
15
+ # import spaces
16
+ # from transformers import MarianMTModel, MarianTokenizer
17
+
18
+ # # Preload the translation model globally
19
+ # model_name = "Helsinki-NLP/opus-mt-mul-en"
20
+ # tokenizer = MarianTokenizer.from_pretrained(model_name)
21
+ # translation_model = MarianMTModel.from_pretrained(model_name)
22
+ # @spaces.GPU
23
+ # def translate_text(text, src_lang="auto", tgt_lang="en"):
24
+ # """Translate text from any language to English."""
25
+ # inputs = tokenizer(text, return_tensors="pt", padding=True)
26
+ # translated = translation_model.generate(**inputs)
27
+ # return tokenizer.decode(translated[0], skip_special_tokens=True)
28
+
29
  import spaces
30
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
31
 
32
  # Preload the translation model globally
33
+ model_name = "facebook/m2m100_418M" # Facebook's multilingual model
34
+ tokenizer = M2M100Tokenizer.from_pretrained(model_name)
35
+ translation_model = M2M100ForConditionalGeneration.from_pretrained(model_name)
36
+
37
  @spaces.GPU
38
  def translate_text(text, src_lang="auto", tgt_lang="en"):
39
  """Translate text from any language to English."""
40
+ # Tokenize the input text
41
  inputs = tokenizer(text, return_tensors="pt", padding=True)
42
+
43
+ # Set the source language and target language for the model
44
+ # If source language is auto, the model will automatically detect it
45
+ tokenizer.src_lang = src_lang if src_lang != "auto" else None
46
+ tokenizer.tgt_lang = tgt_lang
47
+
48
+ # Generate the translation
49
  translated = translation_model.generate(**inputs)
50
+
51
+ # Decode the translated text
52
  return tokenizer.decode(translated[0], skip_special_tokens=True)
53
 
54
 
55
+