Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import spacy
|
|
| 5 |
import subprocess
|
| 6 |
import json
|
| 7 |
import nltk
|
| 8 |
-
from nltk.corpus import wordnet, stopwords
|
| 9 |
from spellchecker import SpellChecker
|
| 10 |
import re
|
| 11 |
import random
|
|
@@ -13,24 +13,22 @@ import string
|
|
| 13 |
|
| 14 |
# Ensure necessary NLTK data is downloaded
|
| 15 |
def download_nltk_resources():
|
| 16 |
-
try:
|
|
|
|
| 17 |
nltk.download('stopwords')
|
| 18 |
nltk.download('averaged_perceptron_tagger')
|
| 19 |
-
nltk.download('averaged_perceptron_tagger_eng')
|
| 20 |
nltk.download('wordnet')
|
| 21 |
nltk.download('omw-1.4')
|
| 22 |
-
nltk.download('punkt_tab')
|
| 23 |
-
|
| 24 |
except Exception as e:
|
| 25 |
print(f"Error downloading NLTK resources: {e}")
|
| 26 |
|
| 27 |
# Call the download function
|
| 28 |
download_nltk_resources()
|
| 29 |
|
| 30 |
-
top_words = set(stopwords.words("english"))
|
| 31 |
|
| 32 |
# Path to the thesaurus file
|
| 33 |
-
thesaurus_file_path = 'en_thesaurus.jsonl
|
| 34 |
|
| 35 |
# Function to load the thesaurus into a dictionary
|
| 36 |
def load_thesaurus(file_path):
|
|
@@ -38,7 +36,6 @@ def load_thesaurus(file_path):
|
|
| 38 |
try:
|
| 39 |
with open(file_path, 'r', encoding='utf-8') as file:
|
| 40 |
for line in file:
|
| 41 |
-
# Parse each line as a JSON object
|
| 42 |
entry = json.loads(line.strip())
|
| 43 |
word = entry.get("word")
|
| 44 |
synonyms = entry.get("synonyms", [])
|
|
@@ -77,7 +74,7 @@ def predict_en(text):
|
|
| 77 |
except Exception as e:
|
| 78 |
return f"Error during AI detection: {e}"
|
| 79 |
|
| 80 |
-
#
|
| 81 |
def plagiarism_remover(word):
|
| 82 |
if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
|
| 83 |
return word
|
|
@@ -234,6 +231,7 @@ def paraphrase_and_correct(text):
|
|
| 234 |
# Create the Gradio interface
|
| 235 |
with gr.Blocks() as demo:
|
| 236 |
gr.Markdown("# AI Text Processor")
|
|
|
|
| 237 |
with gr.Tab("AI Detection"):
|
| 238 |
t1 = gr.Textbox(lines=5, label='Input Text')
|
| 239 |
btn1 = gr.Button("Detect AI")
|
|
|
|
| 5 |
import subprocess
|
| 6 |
import json
|
| 7 |
import nltk
|
| 8 |
+
from nltk.corpus import wordnet, stopwords
|
| 9 |
from spellchecker import SpellChecker
|
| 10 |
import re
|
| 11 |
import random
|
|
|
|
| 13 |
|
| 14 |
# Ensure necessary NLTK data is downloaded
|
| 15 |
def download_nltk_resources():
|
| 16 |
+
try:
|
| 17 |
+
nltk.download('punkt')
|
| 18 |
nltk.download('stopwords')
|
| 19 |
nltk.download('averaged_perceptron_tagger')
|
|
|
|
| 20 |
nltk.download('wordnet')
|
| 21 |
nltk.download('omw-1.4')
|
|
|
|
|
|
|
| 22 |
except Exception as e:
|
| 23 |
print(f"Error downloading NLTK resources: {e}")
|
| 24 |
|
| 25 |
# Call the download function
|
| 26 |
download_nltk_resources()
|
| 27 |
|
| 28 |
+
top_words = set(stopwords.words("english"))
|
| 29 |
|
| 30 |
# Path to the thesaurus file
|
| 31 |
+
thesaurus_file_path = 'en_thesaurus.jsonl' # Ensure the file path is correct
|
| 32 |
|
| 33 |
# Function to load the thesaurus into a dictionary
|
| 34 |
def load_thesaurus(file_path):
|
|
|
|
| 36 |
try:
|
| 37 |
with open(file_path, 'r', encoding='utf-8') as file:
|
| 38 |
for line in file:
|
|
|
|
| 39 |
entry = json.loads(line.strip())
|
| 40 |
word = entry.get("word")
|
| 41 |
synonyms = entry.get("synonyms", [])
|
|
|
|
| 74 |
except Exception as e:
|
| 75 |
return f"Error during AI detection: {e}"
|
| 76 |
|
| 77 |
+
# Function to remove plagiarism
|
| 78 |
def plagiarism_remover(word):
|
| 79 |
if word.lower() in top_words or word.lower() in exclude_words or word in string.punctuation:
|
| 80 |
return word
|
|
|
|
| 231 |
# Create the Gradio interface
|
| 232 |
with gr.Blocks() as demo:
|
| 233 |
gr.Markdown("# AI Text Processor")
|
| 234 |
+
|
| 235 |
with gr.Tab("AI Detection"):
|
| 236 |
t1 = gr.Textbox(lines=5, label='Input Text')
|
| 237 |
btn1 = gr.Button("Detect AI")
|