Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

App Files Files Community

sashtech commited on Sep 25, 2024

Commit

94cbde8

verified ·

1 Parent(s): d45f7be

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -64

app.py CHANGED Viewed

@@ -4,49 +4,32 @@ from transformers import pipeline
 import spacy
 import subprocess
 import nltk
-from nltk.corpus import wordnet
-from nltk.corpus import stopwords
-from nltk.tokenize import word_tokenize
 from spellchecker import SpellChecker
 import re
-import string
 import random
-# Download necessary NLTK data
-nltk.download('punkt')
-nltk.download('stopwords')
-nltk.download('averaged_perceptron_tagger')
-nltk.download('averaged_perceptron_tagger_eng')
-nltk.download('wordnet')
-nltk.download('omw-1.4')
-nltk.download('punkt_tab')
-# Initialize stopwords
-stop_words = set(stopwords.words("english"))
-# Words we don't want to replace
-exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
-exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
-# Initialize the English text classification pipeline for AI detection
-pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
-# Initialize the spell checker
-spell = SpellChecker()
-# Ensure the SpaCy model is installed
-try:
-    nlp = spacy.load("en_core_web_sm")
-except OSError:
-    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
-    nlp = spacy.load("en_core_web_sm")
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         # Handle stopwords, punctuation, and excluded words
-        if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         # Find synonyms
@@ -79,7 +62,7 @@ def plagiarism_removal(text):
         return synonym_choice
     # Tokenize, replace words, and join them back
-    para_split = word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     # Handle spacing around punctuation correctly
@@ -92,6 +75,23 @@ def plagiarism_removal(text):
     return " ".join(corrected_text)
 # Function to predict the label and score for English text (AI Detection)
 def predict_en(text):
     res = pipeline_en(text)[0]
@@ -205,43 +205,37 @@ def correct_spelling(text):
     corrected_words = []
     for word in words:
         corrected_word = spell.correction(word)
-        if corrected_word is not None:
-            corrected_words.append(corrected_word)
-        else:
-            corrected_words.append(word)
     return ' '.join(corrected_words)
-# Main function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
-    # Add synonym replacement here
     cleaned_text = remove_redundant_words(text)
     plag_removed = plagiarism_removal(cleaned_text)
-    paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
-    paraphrased_text = force_first_letter_capital(paraphrased_text)
-    paraphrased_text = correct_article_errors(paraphrased_text)
-    paraphrased_text = correct_tense_errors(paraphrased_text)
-    paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
-    paraphrased_text = fix_possessives(paraphrased_text)
-    paraphrased_text = correct_spelling(paraphrased_text)
-    paraphrased_text = fix_punctuation_spacing(paraphrased_text)
-    return paraphrased_text
-# Gradio app setup
 with gr.Blocks() as demo:
     with gr.Tab("AI Detection"):
-        t1 = gr.Textbox(lines=5, label='Text')
-        button1 = gr.Button("🤖 Predict!")
-        label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
-        score1 = gr.Textbox(lines=1, label='Prob')
-        button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
-    with gr.Tab("Paraphrasing & Grammar Correction"):
-        t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
-        button2 = gr.Button("🔄 Paraphrase and Correct")
-        result2 = gr.Textbox(lines=5, label='Corrected Text')
-        button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
-demo.launch(share=True)

 import spacy
 import subprocess
 import nltk
+from nltk.corpus import wordnet, stopwords  # Import stopwords here
 from spellchecker import SpellChecker
 import re
 import random
+import string
+# Ensure necessary NLTK data is downloaded
+def download_nltk_resources():
+    try:
+        nltk.download('punkt')  # Tokenizer for English text
+        nltk.download('stopwords')  # Stop words
+        nltk.download('averaged_perceptron_tagger')  # POS tagger
+        nltk.download('wordnet')  # WordNet
+        nltk.download('omw-1.4')  # Open Multilingual Wordnet
+    except Exception as e:
+        print(f"Error downloading NLTK resources: {e}")
+# Call the download function
+download_nltk_resources()
+top_words = set(stopwords.words("english"))  # More efficient as a set
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         # Handle stopwords, punctuation, and excluded words
+        if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         # Find synonyms
         return synonym_choice
     # Tokenize, replace words, and join them back
+    para_split = nltk.word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     # Handle spacing around punctuation correctly
     return " ".join(corrected_text)
+# Words we don't want to replace
+exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
+exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
+# Initialize the English text classification pipeline for AI detection
+pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
+# Initialize the spell checker
+spell = SpellChecker()
+# Ensure the SpaCy model is installed
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
+    nlp = spacy.load("en_core_web_sm")
 # Function to predict the label and score for English text (AI Detection)
 def predict_en(text):
     res = pipeline_en(text)[0]
     corrected_words = []
     for word in words:
         corrected_word = spell.correction(word)
+        corrected_words.append(corrected_word)
     return ' '.join(corrected_words)
+# Main processing function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
     cleaned_text = remove_redundant_words(text)
+    cleaned_text = fix_punctuation_spacing(cleaned_text)
+    cleaned_text = fix_possessives(cleaned_text)
+    cleaned_text = capitalize_sentences_and_nouns(cleaned_text)
+    cleaned_text = force_first_letter_capital(cleaned_text)
+    cleaned_text = correct_tense_errors(cleaned_text)
+    cleaned_text = correct_article_errors(cleaned_text)
+    cleaned_text = ensure_subject_verb_agreement(cleaned_text)
+    cleaned_text = correct_spelling(cleaned_text)
     plag_removed = plagiarism_removal(cleaned_text)
+    return plag_removed
+# Create the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# AI Text Processor")
     with gr.Tab("AI Detection"):
+        t1 = gr.Textbox(lines=5, label='Input Text')
+        output1 = gr.Label()
+        button1 = gr.Button("🚀 Process!")
+        button1.click(fn=predict_en, inputs=t1, outputs=output1)
+    with gr.Tab("Paraphrasing and Grammar Correction"):
+        t2 = gr.Textbox(lines=5, label='Input Text')
+        button2 = gr.Button("🚀 Process!")
+        output2 = gr.Textbox(lines=5, label='Processed Text')
+        button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=output2)
+demo.launch()