Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

App Files Files Community

sashtech commited on Sep 25, 2024

Commit

c7c1d09

verified ·

1 Parent(s): fdbab88

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -39

app.py CHANGED Viewed

@@ -17,12 +17,10 @@ nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('averaged_perceptron_tagger')
 nltk.download('averaged_perceptron_tagger_eng')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 nltk.download('punkt_tab')
 # Initialize stopwords
 stop_words = set(stopwords.words("english"))
@@ -45,7 +43,6 @@ except OSError:
 def plagiarism_removal(text):
     def plagiarism_remover(word):
-        # Handle stopwords, punctuation, and excluded words
         if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
             return word
@@ -53,60 +50,48 @@ def plagiarism_removal(text):
         synonyms = set()
         for syn in wordnet.synsets(word):
             for lemma in syn.lemmas():
-                # Exclude overly technical synonyms or words with underscores
                 if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
                     synonyms.add(lemma.name())
-        # Get part of speech for word and filter synonyms with the same POS
         pos_tag_word = nltk.pos_tag([word])[0]
-        # Avoid replacing certain parts of speech
         if pos_tag_word[1] in exclude_tags:
             return word
         filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
-        # Return original word if no appropriate synonyms found
         if not filtered_synonyms:
             return word
-        # Select a random synonym from the filtered list
         synonym_choice = random.choice(filtered_synonyms)
-        # Retain original capitalization
         if word.istitle():
             return synonym_choice.title()
         return synonym_choice
-    # Tokenize, replace words, and join them back
     para_split = word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
-    # Handle spacing around punctuation correctly
     corrected_text = []
     for i in range(len(final_text)):
         if final_text[i] in string.punctuation and i > 0:
-            corrected_text[-1] += final_text[i]  # Append punctuation to previous word
         else:
             corrected_text.append(final_text[i])
     return " ".join(corrected_text)
-# Function to predict the label and score for English text (AI Detection)
 def predict_en(text):
     res = pipeline_en(text)[0]
     return res['label'], res['score']
-# Function to remove redundant and meaningless words
 def remove_redundant_words(text):
     doc = nlp(text)
     meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
     filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
     return ' '.join(filtered_text)
-# Function to fix spacing before punctuation
 def fix_punctuation_spacing(text):
-    # Split the text into words and punctuation
     words = text.split(' ')
     cleaned_words = []
     punctuation_marks = {',', '.', "'", '!', '?', ':'}
@@ -120,12 +105,10 @@ def fix_punctuation_spacing(text):
     return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
                                     .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
-# Function to fix possessives like "Earth's"
 def fix_possessives(text):
     text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
     return text
-# Function to capitalize the first letter of sentences and proper nouns
 def capitalize_sentences_and_nouns(text):
     doc = nlp(text)
     corrected_text = []
@@ -143,7 +126,6 @@ def capitalize_sentences_and_nouns(text):
     return ' '.join(corrected_text)
-# Function to force capitalization of the first letter of every sentence and ensure full stops
 def force_first_letter_capital(text):
     sentences = re.split(r'(?<=\w[.!?])\s+', text)
     capitalized_sentences = []
@@ -157,7 +139,6 @@ def force_first_letter_capital(text):
     return " ".join(capitalized_sentences)
-# Function to correct tense errors in a sentence
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
@@ -169,7 +150,6 @@ def correct_tense_errors(text):
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to check and correct article errors
 def correct_article_errors(text):
     doc = nlp(text)
     corrected_text = []
@@ -186,7 +166,6 @@ def correct_article_errors(text):
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to ensure subject-verb agreement
 def ensure_subject_verb_agreement(text):
     doc = nlp(text)
     corrected_text = []
@@ -199,7 +178,6 @@ def ensure_subject_verb_agreement(text):
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to correct spelling errors
 def correct_spelling(text):
     words = text.split()
     corrected_words = []
@@ -211,21 +189,25 @@ def correct_spelling(text):
             corrected_words.append(word)
     return ' '.join(corrected_words)
-# Main function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
-    # Add synonym replacement here
-    cleaned_text = remove_redundant_words(text)
-    plag_removed = plagiarism_removal(cleaned_text)
-    paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
-    paraphrased_text = force_first_letter_capital(paraphrased_text)
-    paraphrased_text = correct_article_errors(paraphrased_text)
-    paraphrased_text = correct_tense_errors(paraphrased_text)
-    paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
-    paraphrased_text = fix_possessives(paraphrased_text)
-    paraphrased_text = correct_spelling(paraphrased_text)
-    paraphrased_text = fix_punctuation_spacing(paraphrased_text)
-    return paraphrased_text
 # Gradio app setup
 with gr.Blocks() as demo:
@@ -244,4 +226,4 @@ with gr.Blocks() as demo:
         button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
-demo.launch(share=True)

 nltk.download('stopwords')
 nltk.download('averaged_perceptron_tagger')
 nltk.download('averaged_perceptron_tagger_eng')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 nltk.download('punkt_tab')
 # Initialize stopwords
 stop_words = set(stopwords.words("english"))
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         synonyms = set()
         for syn in wordnet.synsets(word):
             for lemma in syn.lemmas():
                 if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
                     synonyms.add(lemma.name())
         pos_tag_word = nltk.pos_tag([word])[0]
         if pos_tag_word[1] in exclude_tags:
             return word
         filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
         if not filtered_synonyms:
             return word
         synonym_choice = random.choice(filtered_synonyms)
         if word.istitle():
             return synonym_choice.title()
         return synonym_choice
     para_split = word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     corrected_text = []
     for i in range(len(final_text)):
         if final_text[i] in string.punctuation and i > 0:
+            corrected_text[-1] += final_text[i]
         else:
             corrected_text.append(final_text[i])
     return " ".join(corrected_text)
 def predict_en(text):
     res = pipeline_en(text)[0]
     return res['label'], res['score']
 def remove_redundant_words(text):
     doc = nlp(text)
     meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
     filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
     return ' '.join(filtered_text)
 def fix_punctuation_spacing(text):
     words = text.split(' ')
     cleaned_words = []
     punctuation_marks = {',', '.', "'", '!', '?', ':'}
     return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
                                     .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
 def fix_possessives(text):
     text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
     return text
 def capitalize_sentences_and_nouns(text):
     doc = nlp(text)
     corrected_text = []
     return ' '.join(corrected_text)
 def force_first_letter_capital(text):
     sentences = re.split(r'(?<=\w[.!?])\s+', text)
     capitalized_sentences = []
     return " ".join(capitalized_sentences)
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def correct_article_errors(text):
     doc = nlp(text)
     corrected_text = []
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def ensure_subject_verb_agreement(text):
     doc = nlp(text)
     corrected_text = []
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def correct_spelling(text):
     words = text.split()
     corrected_words = []
             corrected_words.append(word)
     return ' '.join(corrected_words)
 def paraphrase_and_correct(text):
+    paragraphs = text.split("\n\n")  # Split by paragraphs
+    # Process each paragraph separately
+    processed_paragraphs = []
+    for paragraph in paragraphs:
+        cleaned_text = remove_redundant_words(paragraph)
+        plag_removed = plagiarism_removal(cleaned_text)
+        paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
+        paraphrased_text = force_first_letter_capital(paraphrased_text)
+        paraphrased_text = correct_article_errors(paraphrased_text)
+        paraphrased_text = correct_tense_errors(paraphrased_text)
+        paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
+        paraphrased_text = fix_possessives(paraphrased_text)
+        paraphrased_text = correct_spelling(paraphrased_text)
+        paraphrased_text = fix_punctuation_spacing(paraphrased_text)
+        processed_paragraphs.append(paraphrased_text)
+    return "\n\n".join(processed_paragraphs)  # Reassemble the text with paragraphs
 # Gradio app setup
 with gr.Blocks() as demo:
         button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
+demo.launch(share=True)