Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

App Files Files Community

sashtech commited on Sep 26, 2024

Commit

f294823

verified ·

1 Parent(s): ffbdb95

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -11

app.py CHANGED Viewed

@@ -19,7 +19,6 @@ nltk.download('averaged_perceptron_tagger')
 nltk.download('averaged_perceptron_tagger_eng')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
-nltk.download('punkt_tab')
 # Initialize stopwords
 stop_words = set(stopwords.words("english"))
@@ -41,11 +40,12 @@ except OSError:
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
     nlp = spacy.load("en_core_web_sm")
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         # Find synonyms
         synonyms = set()
         for syn in wordnet.synsets(word):
@@ -57,7 +57,7 @@ def plagiarism_removal(text):
         if pos_tag_word[1] in exclude_tags:
             return word
         filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
         if not filtered_synonyms:
@@ -71,26 +71,29 @@ def plagiarism_removal(text):
     para_split = word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     corrected_text = []
     for i in range(len(final_text)):
         if final_text[i] in string.punctuation and i > 0:
-            corrected_text[-1] += final_text[i]
         else:
             corrected_text.append(final_text[i])
     return " ".join(corrected_text)
 def predict_en(text):
     res = pipeline_en(text)[0]
     return res['label'], res['score']
 def remove_redundant_words(text):
     doc = nlp(text)
     meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
     filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
     return ' '.join(filtered_text)
 def fix_punctuation_spacing(text):
     words = text.split(' ')
     cleaned_words = []
@@ -103,12 +106,14 @@ def fix_punctuation_spacing(text):
             cleaned_words.append(word)
     return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
-                                    .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
 def fix_possessives(text):
     text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
     return text
 def capitalize_sentences_and_nouns(text):
     doc = nlp(text)
     corrected_text = []
@@ -126,30 +131,41 @@ def capitalize_sentences_and_nouns(text):
     return ' '.join(corrected_text)
 def force_first_letter_capital(text):
     sentences = re.split(r'(?<=\w[.!?])\s+', text)
     capitalized_sentences = []
     for sentence in sentences:
         if sentence:
             capitalized_sentence = sentence[0].capitalize() + sentence[1:]
             if not re.search(r'[.!?]$', capitalized_sentence):
                 capitalized_sentence += '.'
             capitalized_sentences.append(capitalized_sentence)
     return " ".join(capitalized_sentences)
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
-        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
-            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
-            corrected_text.append(lemma)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def correct_article_errors(text):
     doc = nlp(text)
     corrected_text = []
@@ -166,6 +182,7 @@ def correct_article_errors(text):
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def ensure_subject_verb_agreement(text):
     doc = nlp(text)
     corrected_text = []
@@ -178,6 +195,7 @@ def ensure_subject_verb_agreement(text):
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def correct_spelling(text):
     words = text.split()
     corrected_words = []
@@ -189,6 +207,7 @@ def correct_spelling(text):
             corrected_words.append(word)
     return ' '.join(corrected_words)
 def paraphrase_and_correct(text):
     paragraphs = text.split("\n\n")  # Split by paragraphs
@@ -209,6 +228,7 @@ def paraphrase_and_correct(text):
     return "\n\n".join(processed_paragraphs)  # Reassemble the text with paragraphs
 # Gradio app setup
 with gr.Blocks() as demo:
     with gr.Tab("AI Detection"):

 nltk.download('averaged_perceptron_tagger_eng')
 nltk.download('wordnet')
 nltk.download('omw-1.4')
 # Initialize stopwords
 stop_words = set(stopwords.words("english"))
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
     nlp = spacy.load("en_core_web_sm")
 def plagiarism_removal(text):
     def plagiarism_remover(word):
         if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
             return word
         # Find synonyms
         synonyms = set()
         for syn in wordnet.synsets(word):
         if pos_tag_word[1] in exclude_tags:
             return word
         filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
         if not filtered_synonyms:
     para_split = word_tokenize(text)
     final_text = [plagiarism_remover(word) for word in para_split]
     corrected_text = []
     for i in range(len(final_text)):
         if final_text[i] in string.punctuation and i > 0:
+            corrected_text[-1] += final_text[i]
         else:
             corrected_text.append(final_text[i])
     return " ".join(corrected_text)
 def predict_en(text):
     res = pipeline_en(text)[0]
     return res['label'], res['score']
 def remove_redundant_words(text):
     doc = nlp(text)
     meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
     filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
     return ' '.join(filtered_text)
 def fix_punctuation_spacing(text):
     words = text.split(' ')
     cleaned_words = []
             cleaned_words.append(word)
     return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
+        .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
 def fix_possessives(text):
     text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
     return text
 def capitalize_sentences_and_nouns(text):
     doc = nlp(text)
     corrected_text = []
     return ' '.join(corrected_text)
 def force_first_letter_capital(text):
     sentences = re.split(r'(?<=\w[.!?])\s+', text)
     capitalized_sentences = []
     for sentence in sentences:
         if sentence:
             capitalized_sentence = sentence[0].capitalize() + sentence[1:]
             if not re.search(r'[.!?]$', capitalized_sentence):
                 capitalized_sentence += '.'
             capitalized_sentences.append(capitalized_sentence)
     return " ".join(capitalized_sentences)
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
+        if token.pos_ == "VERB":
+            tense = token.morph.get("Tense")
+            if tense:
+                if 'Past' in tense:
+                    corrected_text.append(token.lemma_ + "ed")
+                elif 'Present' in tense and token.tag_ == 'VBZ':
+                    corrected_text.append(token.lemma_ + "s")
+                else:
+                    corrected_text.append(token.lemma_)
+            else:
+                corrected_text.append(token.text)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def correct_article_errors(text):
     doc = nlp(text)
     corrected_text = []
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def ensure_subject_verb_agreement(text):
     doc = nlp(text)
     corrected_text = []
         corrected_text.append(token.text)
     return ' '.join(corrected_text)
 def correct_spelling(text):
     words = text.split()
     corrected_words = []
             corrected_words.append(word)
     return ' '.join(corrected_words)
 def paraphrase_and_correct(text):
     paragraphs = text.split("\n\n")  # Split by paragraphs
     return "\n\n".join(processed_paragraphs)  # Reassemble the text with paragraphs
 # Gradio app setup
 with gr.Blocks() as demo:
     with gr.Tab("AI Detection"):