Spaces:

mrm8488
/

summarizer_mlsum

Runtime error

App Files Files Community

mrm8488 commited on Nov 12, 2021

Commit

242e710

1 Parent(s): 7fdee89

Load models before using it

Browse files

Files changed (1) hide show

app.py +13 -5

app.py CHANGED Viewed

@@ -3,19 +3,27 @@ import torch
 from transformers import RobertaTokenizerFast, BertTokenizerFast, EncoderDecoderModel
 models_paths = dict()
 models_paths["fr"] = "mrm8488/camembert2camembert_shared-finetuned-french-summarization"
 models_paths["de"] = "mrm8488/bert2bert_shared-german-finetuned-summarization"
 models_paths["tu"] = "mrm8488/bert2bert_shared-turkish-summarization"
 models_paths["es"] = "Narrativa/bsc_roberta2roberta_shared-spanish-finetuned-mlsum-summarization"
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 def summarize(lang, text):
-    tokenizer = RobertaTokenizerFast.from_pretrained(models_paths[lang]) if lang == "fr" or lang == "es" else BertTokenizerFast.from_pretrained(models_paths[lang])
-    model = EncoderDecoderModel.from_pretrained(models_paths[lang]).to(device)
     inputs = tokenizer([text], padding="max_length",
                        truncation=True, max_length=512, return_tensors="pt")
     input_ids = inputs.input_ids.to(device)
@@ -31,8 +39,8 @@ title = "Multilingual Summarization model (MLSUM)"
 description = "Gradio Demo for Summarization models trained on MLSUM dataset by Manuel Romero"
-article = "<p style='text-align: center'><a href='https://hf.com/mrm8488' target='_blank'>More models</a></p>"
-gr.Interface(fn=summarize, inputs=[gr.inputs.Radio(["fr", "de", "tu", "es"]), gr.inputs.Textbox(
     lines=7, label="Input Text")], outputs="text", theme=theme, title=title, description=description, article=article, enable_queue=True).launch(inline=False)

 from transformers import RobertaTokenizerFast, BertTokenizerFast, EncoderDecoderModel
+LANGUAGES = ["fr", "de", "tu", "es"]
+models = dict()
+tokenizers = dict()
 models_paths = dict()
 models_paths["fr"] = "mrm8488/camembert2camembert_shared-finetuned-french-summarization"
 models_paths["de"] = "mrm8488/bert2bert_shared-german-finetuned-summarization"
 models_paths["tu"] = "mrm8488/bert2bert_shared-turkish-summarization"
 models_paths["es"] = "Narrativa/bsc_roberta2roberta_shared-spanish-finetuned-mlsum-summarization"
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+for lang in LANGUAGES:
+    tokenizers[lang] = RobertaTokenizerFast.from_pretrained(models_paths[lang]) if lang in ["fr", "es"] else BertTokenizerFast.from_pretrained(models_paths[lang])
+    models[lang] = EncoderDecoderModel.from_pretrained(models_paths[lang]).to(device)
 def summarize(lang, text):
+    tokenizer = tokenizers[lang]
+    model = models[lang]
     inputs = tokenizer([text], padding="max_length",
                        truncation=True, max_length=512, return_tensors="pt")
     input_ids = inputs.input_ids.to(device)
 description = "Gradio Demo for Summarization models trained on MLSUM dataset by Manuel Romero"
+article = "<p style='text-align: center'><a href='https://hf.co/mrm8488' target='_blank'>More models</a></p>"
+gr.Interface(fn=summarize, inputs=[gr.inputs.Radio(LANGUAGES), gr.inputs.Textbox(
     lines=7, label="Input Text")], outputs="text", theme=theme, title=title, description=description, article=article, enable_queue=True).launch(inline=False)