Spaces:

Rogerjs
/

NeuroLitExplorer

Sleeping

App Files Files Community

Rogerjs commited on Dec 10, 2024

Commit

2912f11

verified ·

1 Parent(s): d538703

Create app.py

Browse files

Files changed (1) hide show

app.py +115 -0

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import re
+# ---------------------
+# Disclaimer
+# ---------------------
+DISCLAIMER = """
+**Disclaimer:**
+This application is provided for **research and educational purposes only**.
+All summaries are generated using an automated language model and may contain inaccuracies or omissions.
+It is not intended to replace professional judgment, peer-reviewed references, or expert consultation.
+The authors and developers assume no legal liability for any misuse, misinterpretation, or unintended consequences
+arising from the use of this tool. Please use responsibly and cross-check results with credible sources.
+"""
+# ---------------------
+# Model Setup
+# ---------------------
+MODEL_NAME = "allenai/scibert_scivocab_cased"  # Example model for tokenization/embedding
+SUMMARIZATION_MODEL = "allenai/led-base-16384"  # Example summarization model with a large context window
+# Load summarization model and tokenizer
+summarizer_tokenizer = AutoTokenizer.from_pretrained(SUMMARIZATION_MODEL)
+summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZATION_MODEL)
+# ---------------------
+# Utility Functions
+# ---------------------
+def extract_text_from_pdf(pdf_file):
+    # This function extracts text from a PDF file. Requires PyPDF2 or similar library.
+    # For Hugging Face Spaces, PyPDF2 often works.
+    try:
+        import PyPDF2
+        reader = PyPDF2.PdfReader(pdf_file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text() + "\n"
+        return text
+    except Exception as e:
+        return f"Error reading PDF: {e}"
+def clean_text(text):
+    # Basic cleaning function
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def summarize_text(text):
+    # Summarize the given text
+    inputs = summarizer_tokenizer(text, return_tensors="pt", truncation=True, max_length=16384)
+    with torch.no_grad():
+        summary_ids = summarizer_model.generate(
+            inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            num_beams=4,
+            length_penalty=2.0,
+            max_length=512,
+            early_stopping=True
+        )
+    summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    return summary
+def analyze_text(text):
+    # In a more elaborate system, you might:
+    # 1. Extract main findings using IE or NER.
+    # 2. Identify methods mentioned.
+    # 3. Extract references (regex patterns for citations).
+    # Here we just do a simple summarization.
+    text_clean = clean_text(text)
+    if len(text_clean) < 50:
+        return "Please provide a longer text snippet or PDF."
+    summary = summarize_text(text_clean)
+    # Dummy logic for key methods and references (in a real app, use NLP-based extraction)
+    methods = "Key methods extraction is not yet implemented."
+    references = "Reference extraction is not yet implemented."
+    return summary, methods, references
+def process_input(pdf_file, text_snippet):
+    # If PDF is provided, extract text from PDF
+    input_text = ""
+    if pdf_file is not None:
+        input_text = extract_text_from_pdf(pdf_file)
+    # If a text snippet is provided, append it.
+    if text_snippet is not None and text_snippet.strip():
+        input_text = input_text + " " + text_snippet.strip()
+    if not input_text.strip():
+        return "No input provided.", "", ""
+    summary, methods, references = analyze_text(input_text)
+    return summary, methods, references
+# ---------------------
+# Gradio Interface
+# ---------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# NeuroLit Explorer")
+    gr.Markdown(DISCLAIMER)
+    gr.Markdown("**Instructions:** Upload a PDF or paste a text snippet from a neuroscience article, then click 'Summarize'.")
+    with gr.Row():
+        pdf_input = gr.File(label="Upload PDF")
+        text_input = gr.Textbox(label="Or Paste Article Text")
+    summarize_button = gr.Button("Summarize")
+    summary_output = gr.Textbox(label="Summary")
+    methods_output = gr.Textbox(label="Key Methods")
+    references_output = gr.Textbox(label="Relevant References")
+    summarize_button.click(fn=process_input, inputs=[pdf_input, text_input], outputs=[summary_output, methods_output, references_output])
+demo.launch()