huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Aug 31, 2024

Commit

4d1390a

verified ·

1 Parent(s): b3aee5e

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -72

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
 import torch
 import nltk
-import spacy
 from nltk.corpus import wordnet
 import subprocess
@@ -12,13 +11,6 @@ nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('wordnet')  # Download WordNet
-# Download spaCy model if not already installed
-try:
-    nlp = spacy.load("en_core_web_sm")
-except OSError:
-    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
-    nlp = spacy.load("en_core_web_sm")
 # Check for GPU and set the device accordingly
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -26,32 +18,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
-# Load SRDdev Paraphrase model and tokenizer for humanizing text
-paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
-paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase").to(device)
-# Function to find synonyms using WordNet via NLTK
-def get_synonyms(word):
-    synonyms = set()
-    for syn in wordnet.synsets(word):
-        for lemma in syn.lemmas():
-            synonyms.add(lemma.name())
-    return list(synonyms)
-# Replace words with synonyms using spaCy and WordNet
-def replace_with_synonyms(text):
-    doc = nlp(text)
-    processed_text = []
-    for token in doc:
-        synonyms = get_synonyms(token.text.lower())
-        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:  # Only replace certain types of words
-            replacement = synonyms[0]  # Replace with the first synonym
-            if token.is_title:
-                replacement = replacement.capitalize()
-            processed_text.append(replacement)
-        else:
-            processed_text.append(token.text)
-    return " ".join(processed_text)
 # AI detection function using DistilBERT
 def detect_ai_generated(text):
@@ -59,49 +28,46 @@ def detect_ai_generated(text):
     with torch.no_grad():
         outputs = model(**inputs)
         probabilities = torch.softmax(outputs.logits, dim=1)
-    return probabilities[0][1].item()  # Probability of being AI-generated
-# Humanize the AI-detected text using the SRDdev Paraphrase model
 def humanize_text(AI_text):
-    paragraphs = AI_text.split("\n")
-    paraphrased_paragraphs = []
-    for paragraph in paragraphs:
-        if paragraph.strip():
-            inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True).to(device)
-            with torch.no_grad():  # Avoid gradient calculations for faster inference
-                paraphrased_ids = paraphrase_model.generate(
-                    inputs['input_ids'],
-                    max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
-                    num_beams=4,
-                    early_stopping=True,
-                    length_penalty=1.0,
-                    no_repeat_ngram_size=3,
-                )
-            paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
-            paraphrased_paragraphs.append(paraphrased_text)
-    return "\n\n".join(paraphrased_paragraphs)
-# Main function to handle the overall process
-def main_function(AI_text):
-    # Replace words with synonyms
-    text_with_synonyms = replace_with_synonyms(AI_text)
-    # Detect AI-generated content
-    ai_probability = detect_ai_generated(text_with_synonyms)
-    # Humanize AI text
-    humanized_text = humanize_text(text_with_synonyms)
-    return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"
 # Gradio interface definition
-interface = gr.Interface(
-    fn=main_function,
     inputs="textbox",
-    outputs="textbox",
-    title="AI Text Humanizer with Synonym Replacement",
-    description="Enter AI-generated text and get a human-written version, with synonyms replaced for more natural output. This space uses models from Hugging Face directly."
 )
 # Launch the Gradio app
-interface.launch(debug=False)  # Turn off debug mode for production

 from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
 import torch
 import nltk
 from nltk.corpus import wordnet
 import subprocess
 nltk.download('stopwords')
 nltk.download('wordnet')  # Download WordNet
 # Check for GPU and set the device accordingly
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
+# Load Parrot Paraphraser model and tokenizer for humanizing text
+paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
+paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)
 # AI detection function using DistilBERT
 def detect_ai_generated(text):
     with torch.no_grad():
         outputs = model(**inputs)
         probabilities = torch.softmax(outputs.logits, dim=1)
+    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
+    return f"AI-Generated Content Probability: {ai_probability:.2f}%"
+# Humanize the AI-detected text using the Parrot Paraphraser model
 def humanize_text(AI_text):
+    inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
+    with torch.no_grad():  # Avoid gradient calculations for faster inference
+        paraphrased_ids = paraphrase_model.generate(
+            inputs['input_ids'],
+            max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
+            num_beams=4,
+            early_stopping=True,
+            length_penalty=1.0,
+            no_repeat_ngram_size=3,
+        )
+    paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
+    return f"Humanized Text:\n{paraphrased_text}"
 # Gradio interface definition
+ai_detection_interface = gr.Interface(
+    fn=detect_ai_generated,
     inputs="textbox",
+    outputs="text",
+    title="AI Text Detection",
+    description="Enter text to determine the probability of it being AI-generated."
+)
+humanization_interface = gr.Interface(
+    fn=humanize_text,
+    inputs="textbox",
+    outputs="text",
+    title="Text Humanizer",
+    description="Enter text to get a human-written version, paraphrased for natural output."
+)
+# Combine both interfaces into a single Gradio app with tabs
+interface = gr.TabbedInterface(
+    [ai_detection_interface, humanization_interface],
+    ["AI Detection", "Humanization"]
 )
 # Launch the Gradio app
+interface.launch(debug=False)