Spaces:
Build error
Build error
| """ | |
| Ryan Tietjen | |
| Sep 2024 | |
| Demo application for paper abstract fragmentaion demonstration | |
| """ | |
| import gradio as gr | |
| import tensorflow as tf | |
| from tensorflow import keras | |
| from keras import layers | |
| from timeit import default_timer as timer | |
| from process_input import split_abstract | |
| from process_input import split_abstract_original | |
| from process_input import split_sentences_by_characters | |
| import pandas as pd | |
| import tensorflow_hub as hub | |
| from model import EmbeddingLayer | |
| from process_input import encode_labels | |
| sample_list = [] | |
| example1 = f"""The aim of this study was to describe the electrocardiographic ( ECG ) evolutionary changes after an acute myocardial infarction ( AMI ) and to evaluate their correlation with left ventricular function and remodeling. | |
| The QRS complex changes after AMI have been correlated with infarct size and left ventricular function. | |
| By contrast , the significance of T wave changes is controversial. | |
| We studied 536 patients enrolled in the GISSI-3-Echo substudy who underwent ECG and echocardiographic studies at 24 to 48 h ( S1 ) , at hospital discharge ( S2 ) , at six weeks ( S3 ) and six months ( S4 ) after AMI. | |
| The number of Qwaves ( nQ ) and QRS quantitative score ( QRSs ) did not change over time. | |
| From S2 to S4 , the number of negative T waves ( nT NEG ) decreased ( p < 0.0001 ) , wall motion abnormalities ( % WMA ) improved ( p < 0.001 ) , ventricular volumes increased ( p < 0.0001 ) while ejection fraction remained stable. | |
| According to the T wave changes after hospital discharge , patients were divided into four groups : stable positive T waves ( group 1 , n = 35 ) , patients who showed a decrease > or = 1 in nT NEG ( group 2 , n = 361 ) , patients with no change in nT NEG ( group 3 , n = 64 ) and those with an increase > or = 1 in nT NEG ( group 4 , n = 76 ). | |
| The QRSs and nQ remained stable in all groups. | |
| Groups 3 and 4 showed less recovery in % WMA , more pronounced ventricular enlargement and progressive decline in ejection fraction than groups 1 and 2 ( interaction time x groups p < 0.0001 ). | |
| The analysis of serial ECG can predict postinfarct left ventricular remodeling. | |
| Normalization of negative T waves during the follow-up appears more strictly related to recovery of regional dysfunction than QRS changes. | |
| Lack of resolution and late appearance of new negative T predict unfavorable remodeling with progressive deterioration of ventricular function.""" | |
| sample_list.append(example1) | |
| def format_non_empty_lists(objective, background, methods, results, conclusion): | |
| """ | |
| This function checks each provided list and formats a string with the list name and its contents | |
| only if the list is not empty. | |
| Parameters: | |
| - objective (list): List containing sentences classified as 'Objective'. | |
| - background (list): List containing sentences classified as 'Background'. | |
| - methods (list): List containing sentences classified as 'Methods'. | |
| - results (list): List containing sentences classified as 'Results'. | |
| - conclusion (list): List containing sentences classified as 'Conclusion'. | |
| Returns: | |
| - str: A formatted string that contains the non-empty list names and their contents. | |
| """ | |
| output = "" | |
| lists = { | |
| 'Objective': objective, | |
| 'Background': background, | |
| 'Methods': methods, | |
| 'Results': results, | |
| 'Conclusion': conclusion | |
| } | |
| for name, content in lists.items(): | |
| if content: # Check if the list is not empty | |
| output += f"{name}:\n" # Append the category name followed by a newline | |
| for item in content: | |
| output += f" - {item}\n" # Append each item in the list, formatted as a list | |
| output += "\n" # Append a newline for better separation between categories | |
| return output.strip() | |
| def fragment_single_abstract(abstract): | |
| """ | |
| Processes a single abstract by fragmenting it into structured sections based on predefined categories | |
| such as Objective, Methods, Results, Conclusions, and Background. The function utilizes a pre-trained Keras model | |
| to predict the category of each sentence in the abstract. | |
| The process involves several steps: | |
| 1. Splitting the abstract into sentences. | |
| 2. Encoding these sentences using a custom embedding layer. | |
| 3. Classifying each sentence into one of the predefined categories. | |
| 4. Grouping the sentences by their predicted categories. | |
| Parameters: | |
| abstract (str): The abstract text that needs to be processed and categorized. | |
| Returns: | |
| tuple: A tuple containing two elements: | |
| - A dictionary with keys as the category names ('Objective', 'Background', 'Methods', 'Results', 'Conclusions') | |
| and values as lists of sentences belonging to these categories. Only non-empty categories are returned. | |
| - The time taken to process the abstract (in seconds). | |
| Example: | |
| ```python | |
| abstract_text = "This study aims to evaluate the effectiveness of..." | |
| categorized_abstract, processing_time = fragment_single_abstract(abstract_text) | |
| print("Categorized Abstract:", categorized_abstract) | |
| print("Processing Time:", processing_time) | |
| ``` | |
| Note: | |
| - This function assumes that a Keras model 'test.keras' and a custom embedding layer 'EmbeddingLayer' | |
| are available and correctly configured to be loaded. | |
| - The function uses pandas for data manipulation, TensorFlow for machine learning operations, | |
| and TensorFlow's data API for batching and prefetching data for model predictions. | |
| """ | |
| start_time = timer() | |
| original_abstract = split_abstract_original(abstract) | |
| df_original = pd.DataFrame(original_abstract) | |
| sentences_original = df_original["text"].tolist() | |
| abstract_split = split_abstract(abstract) | |
| df = pd.DataFrame(abstract_split) | |
| sentences = df["text"].tolist() | |
| labels = encode_labels(df["target"]) | |
| objective = [] | |
| background = [] | |
| methods = [] | |
| results = [] | |
| conclusion = [] | |
| embed_layer = EmbeddingLayer() | |
| model = tf.keras.models.load_model("20k_5_epochs.keras", custom_objects={'EmbeddingLayer': embed_layer}) | |
| data_by_character = split_sentences_by_characters(sentences) | |
| line_numbers = tf.one_hot(df["line_number"].to_numpy(), depth=15) | |
| total_line_numbers = tf.one_hot(df["total_lines"].to_numpy(), depth=20) | |
| sentences_dataset = tf.data.Dataset.from_tensor_slices((line_numbers, total_line_numbers, sentences, data_by_character)) | |
| labels_dataset = tf.data.Dataset.from_tensor_slices(labels) | |
| dataset = tf.data.Dataset.zip((sentences_dataset, labels_dataset)).batch(32).prefetch(tf.data.AUTOTUNE) | |
| predictions = tf.argmax(model.predict(dataset), axis=1) | |
| for i, prediction in enumerate(predictions): | |
| if prediction == 0: | |
| objective.append(sentences_original[i]) | |
| elif prediction == 1: | |
| methods.append(sentences_original[i]) | |
| elif prediction == 2: | |
| results.append(sentences_original[i]) | |
| elif prediction == 3: | |
| conclusion.append(sentences_original[i]) | |
| elif prediction == 4: | |
| background.append(sentences_original[i]) | |
| end_time = timer() | |
| return format_non_empty_lists(objective, background, methods, results, conclusion), end_time - start_time | |
| title = "Paper Abstract Fragmentation With TensorFlow by Ryan Tietjen" | |
| description = f""" | |
| This app will take the abstract of a paper and break it down into five categories: objective, background, methods, results, and conclusion. | |
| The dataset used can be found in the [PubMed 200k RCT]("https://arxiv.org/abs/1710.06071") and in [this repo](https://github.com/Franck-Dernoncourt/pubmed-rct). The model architecture | |
| was based off of ["Neural Networks for Joint Sentence Classification in Medical Paper Abstracts."](https://arxiv.org/pdf/1612.05251) | |
| This project achieved a testing accuracy of 88.12% and a F1 score of 87.92%. For the whole project, please visit [my GitHub](https://github.com/RyanTietjen/Paper-Fragmentation). | |
| How to use: | |
| -Paste the given abstract into the box below. | |
| -Make sure to separate each sentence by a new line (this helps avoid ambiguity). | |
| -Click submit, and allow the model to run! | |
| """ | |
| demo = gr.Interface( | |
| fn=fragment_single_abstract, | |
| inputs=gr.Textbox(lines=10, placeholder="Enter abstract here..."), | |
| outputs=[ | |
| gr.Textbox(label="Fragmented Abstract"), | |
| gr.Number(label="Time to process (s)"), | |
| ], | |
| examples=sample_list, | |
| title=title, | |
| description=description, | |
| ) | |
| demo.launch(share=False) |