SandaAbhishekSagar
commited on
Commit
路
d24353d
1
Parent(s):
522fc1f
revamped code of translate.py
Browse files- translate.py +30 -4
translate.py
CHANGED
|
@@ -12,18 +12,44 @@
|
|
| 12 |
# input_text = "驴C贸mo est谩s?"
|
| 13 |
# print("Translated Text:", translate_text(input_text, src_lang="es", tgt_lang="en"))
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
import spaces
|
| 16 |
-
from transformers import
|
| 17 |
|
| 18 |
# Preload the translation model globally
|
| 19 |
-
model_name = "
|
| 20 |
-
tokenizer =
|
| 21 |
-
translation_model =
|
|
|
|
| 22 |
@spaces.GPU
|
| 23 |
def translate_text(text, src_lang="auto", tgt_lang="en"):
|
| 24 |
"""Translate text from any language to English."""
|
|
|
|
| 25 |
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
translated = translation_model.generate(**inputs)
|
|
|
|
|
|
|
| 27 |
return tokenizer.decode(translated[0], skip_special_tokens=True)
|
| 28 |
|
| 29 |
|
|
|
|
|
|
| 12 |
# input_text = "驴C贸mo est谩s?"
|
| 13 |
# print("Translated Text:", translate_text(input_text, src_lang="es", tgt_lang="en"))
|
| 14 |
|
| 15 |
+
# import spaces
|
| 16 |
+
# from transformers import MarianMTModel, MarianTokenizer
|
| 17 |
+
|
| 18 |
+
# # Preload the translation model globally
|
| 19 |
+
# model_name = "Helsinki-NLP/opus-mt-mul-en"
|
| 20 |
+
# tokenizer = MarianTokenizer.from_pretrained(model_name)
|
| 21 |
+
# translation_model = MarianMTModel.from_pretrained(model_name)
|
| 22 |
+
# @spaces.GPU
|
| 23 |
+
# def translate_text(text, src_lang="auto", tgt_lang="en"):
|
| 24 |
+
# """Translate text from any language to English."""
|
| 25 |
+
# inputs = tokenizer(text, return_tensors="pt", padding=True)
|
| 26 |
+
# translated = translation_model.generate(**inputs)
|
| 27 |
+
# return tokenizer.decode(translated[0], skip_special_tokens=True)
|
| 28 |
+
|
| 29 |
import spaces
|
| 30 |
+
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
| 31 |
|
| 32 |
# Preload the translation model globally
|
| 33 |
+
model_name = "facebook/m2m100_418M" # Facebook's multilingual model
|
| 34 |
+
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
|
| 35 |
+
translation_model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
| 36 |
+
|
| 37 |
@spaces.GPU
|
| 38 |
def translate_text(text, src_lang="auto", tgt_lang="en"):
|
| 39 |
"""Translate text from any language to English."""
|
| 40 |
+
# Tokenize the input text
|
| 41 |
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
| 42 |
+
|
| 43 |
+
# Set the source language and target language for the model
|
| 44 |
+
# If source language is auto, the model will automatically detect it
|
| 45 |
+
tokenizer.src_lang = src_lang if src_lang != "auto" else None
|
| 46 |
+
tokenizer.tgt_lang = tgt_lang
|
| 47 |
+
|
| 48 |
+
# Generate the translation
|
| 49 |
translated = translation_model.generate(**inputs)
|
| 50 |
+
|
| 51 |
+
# Decode the translated text
|
| 52 |
return tokenizer.decode(translated[0], skip_special_tokens=True)
|
| 53 |
|
| 54 |
|
| 55 |
+
|