Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import re | |
| # --------------------- | |
| # Disclaimer | |
| # --------------------- | |
| DISCLAIMER = """ | |
| **Disclaimer:** | |
| This application is provided for **research and educational purposes only**. | |
| All summaries are generated using an automated language model and may contain inaccuracies or omissions. | |
| It is not intended to replace professional judgment, peer-reviewed references, or expert consultation. | |
| The authors and developers assume no legal liability for any misuse, misinterpretation, or unintended consequences | |
| arising from the use of this tool. Please use responsibly and cross-check results with credible sources. | |
| """ | |
| # --------------------- | |
| # Model Setup | |
| # --------------------- | |
| MODEL_NAME = "allenai/scibert_scivocab_cased" # Example model for tokenization/embedding | |
| SUMMARIZATION_MODEL = "allenai/led-base-16384" # Example summarization model with a large context window | |
| # Load summarization model and tokenizer | |
| summarizer_tokenizer = AutoTokenizer.from_pretrained(SUMMARIZATION_MODEL) | |
| summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZATION_MODEL) | |
| # --------------------- | |
| # Utility Functions | |
| # --------------------- | |
| def extract_text_from_pdf(pdf_file): | |
| # This function extracts text from a PDF file. Requires PyPDF2 or similar library. | |
| # For Hugging Face Spaces, PyPDF2 often works. | |
| try: | |
| import PyPDF2 | |
| reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error reading PDF: {e}" | |
| def clean_text(text): | |
| # Basic cleaning function | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| return text | |
| def summarize_text(text): | |
| # Summarize the given text | |
| inputs = summarizer_tokenizer(text, return_tensors="pt", truncation=True, max_length=16384) | |
| with torch.no_grad(): | |
| summary_ids = summarizer_model.generate( | |
| inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| num_beams=4, | |
| length_penalty=2.0, | |
| max_length=512, | |
| early_stopping=True | |
| ) | |
| summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| return summary | |
| def analyze_text(text): | |
| # In a more elaborate system, you might: | |
| # 1. Extract main findings using IE or NER. | |
| # 2. Identify methods mentioned. | |
| # 3. Extract references (regex patterns for citations). | |
| # Here we just do a simple summarization. | |
| text_clean = clean_text(text) | |
| if len(text_clean) < 50: | |
| return "Please provide a longer text snippet or PDF." | |
| summary = summarize_text(text_clean) | |
| # Dummy logic for key methods and references (in a real app, use NLP-based extraction) | |
| methods = "Key methods extraction is not yet implemented." | |
| references = "Reference extraction is not yet implemented." | |
| return summary, methods, references | |
| def process_input(pdf_file, text_snippet): | |
| # If PDF is provided, extract text from PDF | |
| input_text = "" | |
| if pdf_file is not None: | |
| input_text = extract_text_from_pdf(pdf_file) | |
| # If a text snippet is provided, append it. | |
| if text_snippet is not None and text_snippet.strip(): | |
| input_text = input_text + " " + text_snippet.strip() | |
| if not input_text.strip(): | |
| return "No input provided.", "", "" | |
| summary, methods, references = analyze_text(input_text) | |
| return summary, methods, references | |
| # --------------------- | |
| # Gradio Interface | |
| # --------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# NeuroLit Explorer") | |
| gr.Markdown(DISCLAIMER) | |
| gr.Markdown("**Instructions:** Upload a PDF or paste a text snippet from a neuroscience article, then click 'Summarize'.") | |
| with gr.Row(): | |
| pdf_input = gr.File(label="Upload PDF") | |
| text_input = gr.Textbox(label="Or Paste Article Text") | |
| summarize_button = gr.Button("Summarize") | |
| summary_output = gr.Textbox(label="Summary") | |
| methods_output = gr.Textbox(label="Key Methods") | |
| references_output = gr.Textbox(label="Relevant References") | |
| summarize_button.click(fn=process_input, inputs=[pdf_input, text_input], outputs=[summary_output, methods_output, references_output]) | |
| demo.launch() | |