Spaces:
Runtime error
Runtime error
| """ | |
| Shared utility methods for this module. | |
| """ | |
| from ctypes import Array | |
| import datetime | |
| import re | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, MBartForConditionalGeneration, MBartTokenizer, pipeline | |
| from transformers import PegasusTokenizer, PegasusForConditionalGeneration | |
| def lowercase_string(string: str) -> str: | |
| """Returns a lowercased string | |
| Args: | |
| string: String to lowercase | |
| Returns: | |
| String in lowercase | |
| """ | |
| if isinstance(string, str): | |
| return string.lower() | |
| return None | |
| from functools import lru_cache | |
| def get_sentiment_pipeline(): | |
| model_name = "nlptown/bert-base-multilingual-uncased-sentiment" | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
| return sentiment_pipeline | |
| def score_sentiment(input: str): | |
| """Score sentiment of an input string with a pretrained Transformers Pipeline | |
| Args: | |
| input (str): Text to be scored | |
| Returns: | |
| tuple: (label, score) | |
| """ | |
| sentiment_pipeline = get_sentiment_pipeline() | |
| result = sentiment_pipeline(input.lower())[0] | |
| # print("label:{0} input:{1}".format(result['label'], input)) | |
| return result['label'], result['score'] | |
| def get_summarization_pipeline_nl(): | |
| undisputed_best_model = MBartForConditionalGeneration.from_pretrained( | |
| "ml6team/mbart-large-cc25-cnn-dailymail-nl" | |
| ) | |
| tokenizer = MBartTokenizer.from_pretrained("facebook/mbart-large-cc25") | |
| summarization_pipeline = pipeline( | |
| task="summarization", | |
| model=undisputed_best_model, | |
| tokenizer=tokenizer, | |
| ) | |
| summarization_pipeline.model.config.decoder_start_token_id = tokenizer.lang_code_to_id[ | |
| "nl_XX" | |
| ] | |
| return summarization_pipeline | |
| def summarize_nl(input: str) -> str: | |
| summarization_pipeline = get_summarization_pipeline_nl() | |
| summary = summarization_pipeline( | |
| input, | |
| do_sample=True, | |
| top_p=0.75, | |
| top_k=50, | |
| # num_beams=4, | |
| min_length=50, | |
| early_stopping=True, | |
| truncation=True, | |
| )[0]["summary_text"] | |
| return summary | |
| def get_pegasus(): | |
| model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum") | |
| tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum") | |
| return model, tokenizer | |
| def summarize_en(input: str) -> str: | |
| model, tokenizer = get_pegasus() | |
| inputs = tokenizer(input, max_length=1024, return_tensors="pt") | |
| # Generate Summary | |
| summary_ids = model.generate(inputs["input_ids"]) | |
| result = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
| return result |