Spaces:
Build error
Build error
| from transformers import AutoTokenizer, BartForConditionalGeneration | |
| import torch | |
| import math | |
| class Summarizer: | |
| def __init__(self): | |
| self.tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
| self.model = BartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
| def split_text(self, text, max_tokens=1024): | |
| words = text.split() | |
| chunks = [' '.join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)] | |
| return chunks | |
| def summarize(self, text): | |
| chunks = self.split_text(text) | |
| partial_summaries = [] | |
| for chunk in chunks: | |
| inputs = self.tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024) | |
| summary_ids = self.model.generate(inputs["input_ids"], max_new_tokens=200) | |
| summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| partial_summaries.append(summary) | |
| # Final merged summary | |
| full_summary = " ".join(partial_summaries) | |
| return full_summary | |