Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

App Files Files Community

sashtech commited on Sep 25, 2024

Commit

af4412c

verified ·

1 Parent(s): 2bc5696

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -47

app.py CHANGED Viewed

@@ -27,9 +27,6 @@ download_nltk_resources()
 top_words = set(stopwords.words("english"))  # More efficient as a set
-import os
-import json
 # Path to the thesaurus file
 thesaurus_file_path = 'en_thesaurus.jsonl'  # Ensure the file path is correct
@@ -53,9 +50,33 @@ def load_thesaurus(file_path):
 # Load the thesaurus
 synonym_dict = load_thesaurus(thesaurus_file_path)
 # Modified plagiarism_remover function to use the loaded thesaurus
 def plagiarism_remover(word):
-    # Handle stopwords, punctuation, and excluded words
     if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
         return word
@@ -66,54 +87,25 @@ def plagiarism_remover(word):
     if not synonyms:
         for syn in wordnet.synsets(word):
             for lemma in syn.lemmas():
-                # Exclude overly technical synonyms or words with underscores
                 if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
                     synonyms.add(lemma.name())
-    # Get part of speech for word and filter synonyms with the same POS
     pos_tag_word = nltk.pos_tag([word])[0]
-    # Avoid replacing certain parts of speech
     if pos_tag_word[1] in exclude_tags:
         return word
     filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
-    # Return original word if no appropriate synonyms found
     if not filtered_synonyms:
         return word
-    # Select a random synonym from the filtered list
     synonym_choice = random.choice(filtered_synonyms)
-    # Retain original capitalization
     if word.istitle():
         return synonym_choice.title()
     return synonym_choice
-# Words we don't want to replace
-exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
-exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
-# Initialize the English text classification pipeline for AI detection
-pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
-# Initialize the spell checker
-spell = SpellChecker()
-# Ensure the SpaCy model is installed
-try:
-    nlp = spacy.load("en_core_web_sm")
-except OSError:
-    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
-    nlp = spacy.load("en_core_web_sm")
-# Function to predict the label and score for English text (AI Detection)
-def predict_en(text):
-    res = pipeline_en(text)[0]
-    return res['label'], res['score']
 # Function to remove redundant and meaningless words
 def remove_redundant_words(text):
     doc = nlp(text)
@@ -123,7 +115,6 @@ def remove_redundant_words(text):
 # Function to fix spacing before punctuation
 def fix_punctuation_spacing(text):
-    # Split the text into words and punctuation
     words = text.split(' ')
     cleaned_words = []
     punctuation_marks = {',', '.', "'", '!', '?', ':'}
@@ -139,8 +130,7 @@ def fix_punctuation_spacing(text):
 # Function to fix possessives like "Earth's"
 def fix_possessives(text):
-    text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
-    return text
 # Function to capitalize the first letter of sentences and proper nouns
 def capitalize_sentences_and_nouns(text):
@@ -216,18 +206,15 @@ def ensure_subject_verb_agreement(text):
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to correct spelling errors
 # Function to correct spelling errors
 def correct_spelling(text):
     words = text.split()
     corrected_words = []
     for word in words:
         corrected_word = spell.correction(word)
-        # If correction returns None, keep the original word
         corrected_words.append(corrected_word if corrected_word is not None else word)
     return ' '.join(corrected_words)
 # Main processing function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
     cleaned_text = remove_redundant_words(text)
@@ -239,7 +226,7 @@ def paraphrase_and_correct(text):
     cleaned_text = correct_article_errors(cleaned_text)
     cleaned_text = ensure_subject_verb_agreement(cleaned_text)
     cleaned_text = correct_spelling(cleaned_text)
-    plag_removed = plagiarism_removal(cleaned_text)
     return plag_removed
 # Create the Gradio interface
@@ -247,15 +234,15 @@ with gr.Blocks() as demo:
     gr.Markdown("# AI Text Processor")
     with gr.Tab("AI Detection"):
         t1 = gr.Textbox(lines=5, label='Input Text')
-        output1 = gr.Label()
-        button1 = gr.Button("🚀 Process!")
-        button1.click(fn=predict_en, inputs=t1, outputs=output1)
     with gr.Tab("Paraphrasing and Grammar Correction"):
         t2 = gr.Textbox(lines=5, label='Input Text')
-        button2 = gr.Button("🚀 Process!")
-        output2 = gr.Textbox(lines=5, label='Processed Text')
-        button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=output2)
 demo.launch()

 top_words = set(stopwords.words("english"))  # More efficient as a set
 # Path to the thesaurus file
 thesaurus_file_path = 'en_thesaurus.jsonl'  # Ensure the file path is correct
 # Load the thesaurus
 synonym_dict = load_thesaurus(thesaurus_file_path)
+# Words and POS tags we don't want to replace
+exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
+exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
+# Initialize the English text classification pipeline for AI detection
+pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
+# Initialize the spell checker
+spell = SpellChecker()
+# Ensure the SpaCy model is installed
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
+    nlp = spacy.load("en_core_web_sm")
+# Function to predict the label and score for English text (AI Detection)
+def predict_en(text):
+    try:
+        res = pipeline_en(text)[0]
+        return res['label'], res['score']
+    except Exception as e:
+        return f"Error during AI detection: {e}"
 # Modified plagiarism_remover function to use the loaded thesaurus
 def plagiarism_remover(word):
     if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
         return word
     if not synonyms:
         for syn in wordnet.synsets(word):
             for lemma in syn.lemmas():
                 if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
                     synonyms.add(lemma.name())
     pos_tag_word = nltk.pos_tag([word])[0]
     if pos_tag_word[1] in exclude_tags:
         return word
     filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
     if not filtered_synonyms:
         return word
     synonym_choice = random.choice(filtered_synonyms)
     if word.istitle():
         return synonym_choice.title()
     return synonym_choice
 # Function to remove redundant and meaningless words
 def remove_redundant_words(text):
     doc = nlp(text)
 # Function to fix spacing before punctuation
 def fix_punctuation_spacing(text):
     words = text.split(' ')
     cleaned_words = []
     punctuation_marks = {',', '.', "'", '!', '?', ':'}
 # Function to fix possessives like "Earth's"
 def fix_possessives(text):
+    return re.sub(r'(\w)\s\'\s?s', r"\1's", text)
 # Function to capitalize the first letter of sentences and proper nouns
 def capitalize_sentences_and_nouns(text):
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
 # Function to correct spelling errors
 def correct_spelling(text):
     words = text.split()
     corrected_words = []
     for word in words:
         corrected_word = spell.correction(word)
         corrected_words.append(corrected_word if corrected_word is not None else word)
     return ' '.join(corrected_words)
 # Main processing function for paraphrasing and grammar correction
 def paraphrase_and_correct(text):
     cleaned_text = remove_redundant_words(text)
     cleaned_text = correct_article_errors(cleaned_text)
     cleaned_text = ensure_subject_verb_agreement(cleaned_text)
     cleaned_text = correct_spelling(cleaned_text)
+    plag_removed = plagiarism_remover(cleaned_text)
     return plag_removed
 # Create the Gradio interface
     gr.Markdown("# AI Text Processor")
     with gr.Tab("AI Detection"):
         t1 = gr.Textbox(lines=5, label='Input Text')
+        btn1 = gr.Button("Detect AI")
+        out1 = gr.Textbox(label='Prediction', interactive=False)
+        out2 = gr.Textbox(label='Confidence', interactive=False)
+        btn1.click(fn=predict_en, inputs=t1, outputs=[out1, out2])
     with gr.Tab("Paraphrasing and Grammar Correction"):
         t2 = gr.Textbox(lines=5, label='Input Text')
+        btn2 = gr.Button("Process Text")
+        out3 = gr.Textbox(label='Processed Text', interactive=False)
+        btn2.click(fn=paraphrase_and_correct, inputs=t2, outputs=out3)
 demo.launch()