Snigdhapaul2003 commited on
Commit
47a3422
·
verified ·
1 Parent(s): a39182b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -51
app.py CHANGED
@@ -1,65 +1,70 @@
1
  import gradio as gr
2
- from googletrans import Translator
3
 
4
- european_languages = {
5
- 'afrikaans': 'af',
6
- 'albanian': 'sq',
7
- 'armenian': 'hy',
8
- 'azerbaijani': 'az',
9
- 'basque': 'eu',
10
- 'belarusian': 'be',
11
- 'bosnian': 'bs',
12
- 'bulgarian': 'bg',
13
- 'catalan': 'ca',
14
- 'croatian': 'hr',
15
- 'czech': 'cs',
16
- 'danish': 'da',
17
- 'dutch': 'nl',
18
- 'english': 'en',
19
- 'estonian': 'et',
20
- 'finnish': 'fi',
21
- 'french': 'fr',
22
- 'georgian': 'ka',
23
- 'german': 'de',
24
- 'greek': 'el',
25
- 'hungarian': 'hu',
26
- 'icelandic': 'is',
27
- 'irish': 'ga',
28
- 'italian': 'it',
29
- 'latvian': 'lv',
30
- 'lithuanian': 'lt',
31
- 'luxembourgish': 'lb',
32
- 'macedonian': 'mk',
33
- 'maltese': 'mt',
34
- 'norwegian': 'no',
35
- 'polish': 'pl',
36
- 'portuguese': 'pt',
37
- 'romanian': 'ro',
38
- 'russian': 'ru',
39
- 'scots gaelic': 'gd',
40
- 'serbian': 'sr',
41
- 'slovak': 'sk',
42
- 'slovenian': 'sl',
43
- 'spanish': 'es',
44
- 'swedish': 'sv',
45
- 'ukrainian': 'uk',
46
- 'welsh': 'cy',
47
  }
48
 
49
- translator = Translator()
 
50
 
51
  def translate_article(article, language):
52
- lang = translator.detect(article).lang
53
- lang_code = european_languages[language]
54
- translated_text = translator.translate(text, src=lang, dest= lang_code)
55
- return translated_text.text
 
 
 
 
 
 
 
56
 
57
- language_choices = list(european_languages.keys())
58
 
59
  iface = gr.Interface(
60
  fn=translate_article,
61
  inputs=["text",gr.Dropdown(
62
- language_choices, value="english", multiselect=False, label="Choose the language.")],
63
  outputs="text",
64
  title="Translation Tool"
65
  )
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
+ languages = {
5
+ 'Afrikaans': 'afr_Latn',
6
+ 'Albanian': 'als_Latn',
7
+ 'Basque': 'eus_Latn',
8
+ 'Belarusian': 'bel_Cyrl',
9
+ 'Bosnian': 'bos_Latn',
10
+ 'Bulgarian': 'bul_Cyrl',
11
+ 'Catalan': 'cat_Latn',
12
+ 'Croatian': 'hrv_Latn',
13
+ 'Czech': 'ces_Latn',
14
+ 'Danish': 'dan_Latn',
15
+ 'Dutch': 'nld_Latn',
16
+ 'English': 'eng_Latn',
17
+ 'Estonian': 'est_Latn',
18
+ 'Finnish': 'fin_Latn',
19
+ 'French': 'fra_Latn',
20
+ 'Galician': 'glg_Latn',
21
+ 'German': 'deu_Latn',
22
+ 'Greek': 'ell_Grek',
23
+ 'Hungarian': 'hun_Latn',
24
+ 'Icelandic': 'isl_Latn',
25
+ 'Irish': 'gle_Latn',
26
+ 'Italian': 'ita_Latn',
27
+ 'Lithuanian': 'lit_Latn',
28
+ 'Luxembourgish': 'ltz_Latn',
29
+ 'Macedonian': 'mkd_Cyrl',
30
+ 'Maltese': 'mlt_Latn',
31
+ 'Norwegian Bokmål': 'nob_Latn',
32
+ 'Norwegian Nynorsk': 'nno_Latn',
33
+ 'Polish': 'pol_Latn',
34
+ 'Portuguese': 'por_Latn',
35
+ 'Romanian': 'ron_Latn',
36
+ 'Russian': 'rus_Cyrl',
37
+ 'Serbian': 'srp_Cyrl',
38
+ 'Slovak': 'slk_Latn',
39
+ 'Slovenian': 'slv_Latn',
40
+ 'Spanish': 'spa_Latn',
41
+ 'Swedish': 'swe_Latn',
42
+ 'Ukrainian': 'ukr_Cyrl',
43
+ 'Welsh': 'cym_Latn'
 
 
 
44
  }
45
 
46
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
47
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
48
 
49
  def translate_article(article, language):
50
+ inputs = tokenizer(article, return_tensors="pt")
51
+ if language=='English':
52
+ lang_code='eng_Latn'
53
+ print("Yes")
54
+ else:
55
+ lang_code = languages[language]
56
+ print("No")
57
+ print(lang_code)
58
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[lang_code])
59
+ result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
60
+ return result
61
 
62
+ language_choices = list(languages.keys())
63
 
64
  iface = gr.Interface(
65
  fn=translate_article,
66
  inputs=["text",gr.Dropdown(
67
+ language_choices, value="English", multiselect=False, label="Choose the language.")],
68
  outputs="text",
69
  title="Translation Tool"
70
  )