Spaces:
Build error
Build error
| """ | |
| Ryan Tietjen | |
| Sep 2024 | |
| Demo application for paper abstract fragmentaion demonstration | |
| """ | |
| import gradio as gr | |
| import tensorflow as tf | |
| from tensorflow import keras | |
| from keras import layers | |
| from timeit import default_timer as timer | |
| from process_input import split_abstract | |
| from process_input import split_abstract_original | |
| from process_input import split_sentences_by_characters | |
| import pandas as pd | |
| import tensorflow_hub as hub | |
| from model import EmbeddingLayer | |
| from process_input import encode_labels | |
| sample_list = [] | |
| example1 = f"""The aim of this study was to verify in bruxism patients the possible efficacy of auricular stimulation in reducing the hypertonicity of some masticatory muscles. | |
| Forty-three bruxism patients were randomly allocated to 3 groups : acupuncture , needle contact for 10 seconds , no treatment ( control ). | |
| Helkimo 's clinical dysfunction index ( CDI ) and anamnestic dysfunction index ( ADI ) were used to assess the functional state of the masticatory system. | |
| The resting electrical activity of the anterior temporalis ( AT ) , masseter ( MM ) , digastric ( DA ) and sternocleidomastoid ( SCM ) muscles was measured , according to Jankelson , with surface electrodes at baseline , after stimulation and continually for 30 minutes ( 120 measurements in total ). | |
| The electromyographical variations in the 3 groups were studied with t test for independent samples. | |
| Acupuncture and needle contact were superior to control in reducing the muscle hypertonicity of all muscles except SCM. | |
| In the comparison between acupuncture and needle contact the former showed better results only for the right TA and left DA ( p = 0.000 ). | |
| In this study it was possible to measure the efficacy of the stimulation of only one point or area , which is an ideal model for research in acupuncture. | |
| The auricular area we chose for stimulation was never used before for the purpose of relaxing masticatory muscles. | |
| Acupuncture and needle contact for 10 seconds showed similar effects.""" | |
| example2 = """To investigate the efficacy of 6 weeks of daily low-dose oral prednisolone in improving pain , mobility , and systemic low-grade inflammation in the short term and whether the effect would be sustained at 12 weeks in older adults with moderate to severe knee osteoarthritis ( OA ) . | |
| A total of 125 patients with primary knee OA were randomized 1:1 ; 63 received 7.5 mg/day of prednisolone and 62 received placebo for 6 weeks . | |
| Outcome measures included pain reduction and improvement in function scores and systemic inflammation markers . | |
| Pain was assessed using the visual analog pain scale ( 0-100 mm ) . | |
| Secondary outcome measures included the Western Ontario and McMaster Universities Osteoarthritis Index scores , patient global assessment ( PGA ) of the severity of knee OA , and 6-min walk distance ( 6MWD ) . | |
| Serum levels of interleukin 1 ( IL-1 ) , IL-6 , tumor necrosis factor ( TNF ) - , and high-sensitivity C-reactive protein ( hsCRP ) were measured . | |
| There was a clinically relevant reduction in the intervention group compared to the placebo group for knee pain , physical function , PGA , and 6MWD at 6 weeks . | |
| The mean difference between treatment arms ( 95 % CI ) was 10.9 ( 4.8-18 .0 ) , p < 0.001 ; 9.5 ( 3.7-15 .4 ) , p < 0.05 ; 15.7 ( 5.3-26 .1 ) , p < 0.001 ; and 86.9 ( 29.8-144 .1 ) , p < 0.05 , respectively . | |
| Further , there was a clinically relevant reduction in the serum levels of IL-1 , IL-6 , TNF - , and hsCRP at 6 weeks in the intervention group when compared to the placebo group . | |
| These differences remained significant at 12 weeks . | |
| The Outcome Measures in Rheumatology Clinical Trials-Osteoarthritis Research Society International responder rate was 65 % in the intervention group and 34 % in the placebo group ( p < 0.05 ) . | |
| Low-dose oral prednisolone had both a short-term and a longer sustained effect resulting in less knee pain , better physical function , and attenuation of systemic inflammation in older patients with knee OA ( ClinicalTrials.gov identifier NCT01619163 ) .""" | |
| sample_list.append(example1) | |
| sample_list.append(example2) | |
| def format_non_empty_lists(objective, background, methods, results, conclusion): | |
| """ | |
| This function checks each provided list and formats a string with the list name and its contents | |
| only if the list is not empty. | |
| Parameters: | |
| - objective (list): List containing sentences classified as 'Objective'. | |
| - background (list): List containing sentences classified as 'Background'. | |
| - methods (list): List containing sentences classified as 'Methods'. | |
| - results (list): List containing sentences classified as 'Results'. | |
| - conclusion (list): List containing sentences classified as 'Conclusion'. | |
| Returns: | |
| - str: A formatted string that contains the non-empty list names and their contents. | |
| """ | |
| output = "" | |
| lists = { | |
| 'Objective': objective, | |
| 'Background': background, | |
| 'Methods': methods, | |
| 'Results': results, | |
| 'Conclusion': conclusion | |
| } | |
| for name, content in lists.items(): | |
| if content: # Check if the list is not empty | |
| output += f"{name}:\n" # Append the category name followed by a newline | |
| for item in content: | |
| output += f" - {item}\n" # Append each item in the list, formatted as a list | |
| output += "\n" # Append a newline for better separation between categories | |
| return output.strip() | |
| def fragment_single_abstract(abstract): | |
| """ | |
| Processes a single abstract by fragmenting it into structured sections based on predefined categories | |
| such as Objective, Methods, Results, Conclusions, and Background. The function utilizes a pre-trained Keras model | |
| to predict the category of each sentence in the abstract. | |
| The process involves several steps: | |
| 1. Splitting the abstract into sentences. | |
| 2. Encoding these sentences using a custom embedding layer. | |
| 3. Classifying each sentence into one of the predefined categories. | |
| 4. Grouping the sentences by their predicted categories. | |
| Parameters: | |
| abstract (str): The abstract text that needs to be processed and categorized. | |
| Returns: | |
| tuple: A tuple containing two elements: | |
| - A dictionary with keys as the category names ('Objective', 'Background', 'Methods', 'Results', 'Conclusions') | |
| and values as lists of sentences belonging to these categories. Only non-empty categories are returned. | |
| - The time taken to process the abstract (in seconds). | |
| Example: | |
| ```python | |
| abstract_text = "This study aims to evaluate the effectiveness of..." | |
| categorized_abstract, processing_time = fragment_single_abstract(abstract_text) | |
| print("Categorized Abstract:", categorized_abstract) | |
| print("Processing Time:", processing_time) | |
| ``` | |
| Note: | |
| - This function assumes that a Keras model 'test.keras' and a custom embedding layer 'EmbeddingLayer' | |
| are available and correctly configured to be loaded. | |
| - The function uses pandas for data manipulation, TensorFlow for machine learning operations, | |
| and TensorFlow's data API for batching and prefetching data for model predictions. | |
| """ | |
| start_time = timer() | |
| original_abstract = split_abstract_original(abstract) | |
| df_original = pd.DataFrame(original_abstract) | |
| sentences_original = df_original["text"].tolist() | |
| abstract_split = split_abstract(abstract) | |
| df = pd.DataFrame(abstract_split) | |
| sentences = df["text"].tolist() | |
| labels = encode_labels(df["target"]) | |
| objective = [] | |
| background = [] | |
| methods = [] | |
| results = [] | |
| conclusion = [] | |
| embed_layer = EmbeddingLayer() | |
| model = tf.keras.models.load_model("200k_10_epochs.keras", custom_objects={'EmbeddingLayer': embed_layer}) | |
| data_by_character = split_sentences_by_characters(sentences) | |
| line_numbers = tf.one_hot(df["line_number"].to_numpy(), depth=15) | |
| total_line_numbers = tf.one_hot(df["total_lines"].to_numpy(), depth=20) | |
| sentences_dataset = tf.data.Dataset.from_tensor_slices((line_numbers, total_line_numbers, sentences, data_by_character)) | |
| labels_dataset = tf.data.Dataset.from_tensor_slices(labels) | |
| dataset = tf.data.Dataset.zip((sentences_dataset, labels_dataset)).batch(32).prefetch(tf.data.AUTOTUNE) | |
| predictions = tf.argmax(model.predict(dataset), axis=1) | |
| for i, prediction in enumerate(predictions): | |
| if prediction == 3: | |
| objective.append(sentences_original[i]) | |
| elif prediction == 2: | |
| methods.append(sentences_original[i]) | |
| elif prediction == 4: | |
| results.append(sentences_original[i]) | |
| elif prediction == 1: | |
| conclusion.append(sentences_original[i]) | |
| elif prediction == 0: | |
| background.append(sentences_original[i]) | |
| end_time = timer() | |
| return format_non_empty_lists(objective, background, methods, results, conclusion), end_time - start_time | |
| title = "Paper Abstract Fragmentation With TensorFlow by Ryan Tietjen" | |
| description = f""" | |
| This app will take the abstract of a paper and break it down into five categories: objective, background, methods, results, and conclusion. | |
| The dataset used can be found in the [PubMed 200k RCT]("https://arxiv.org/pdf/1710.06071") and in [this repo](https://github.com/Franck-Dernoncourt/pubmed-rct). The model architecture | |
| was based off of ["Neural Networks for Joint Sentence Classification in Medical Paper Abstracts."](https://arxiv.org/pdf/1612.05251) | |
| This model achieved a testing accuracy of 88.2% and a F1 score of 88%. For the whole project, please visit [my GitHub](https://github.com/RyanTietjen/Paper-Fragmentation). | |
| How to use: | |
| -Paste the given abstract into the box below. | |
| -Make sure to separate each sentence by a new line (this helps avoid ambiguity). | |
| -Click submit, and allow the model to run! | |
| """ | |
| demo = gr.Interface( | |
| fn=fragment_single_abstract, | |
| inputs=gr.Textbox(lines=10, placeholder="Enter abstract here..."), | |
| outputs=[ | |
| gr.Textbox(label="Fragmented Abstract"), | |
| gr.Number(label="Time to process (s)"), | |
| ], | |
| examples=sample_list, | |
| title=title, | |
| description=description, | |
| ) | |
| demo.launch(share=False) |