huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Aug 31, 2024

Commit

7feda08

verified ·

1 Parent(s): 6b18ba5

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -40

app.py CHANGED Viewed

@@ -1,16 +1,25 @@
 # Import dependencies
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
 import torch
 import nltk
 from nltk.corpus import wordnet
-import subprocess
 # Download NLTK data (if not already downloaded)
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('wordnet')  # Download WordNet
 # Check for GPU and set the device accordingly
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -18,10 +27,6 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
-# Load Parrot Paraphraser model and tokenizer for humanizing text
-paraphrase_tokenizer = T5Tokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5")
-paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(device)
 # AI detection function using DistilBERT
 def detect_ai_generated(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
@@ -31,43 +36,52 @@ def detect_ai_generated(text):
     ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
     return f"AI-Generated Content Probability: {ai_probability:.2f}%"
-# Humanize the AI-detected text using the Parrot Paraphraser model
-def humanize_text(AI_text):
-    inputs = paraphrase_tokenizer(AI_text, return_tensors="pt", max_length=512, truncation=True).to(device)
-    with torch.no_grad():  # Avoid gradient calculations for faster inference
-        paraphrased_ids = paraphrase_model.generate(
-            inputs['input_ids'],
-            max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
-            num_beams=4,
-            early_stopping=True,
-            length_penalty=1.0,
-            no_repeat_ngram_size=3,
-        )
-    paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
-    return f"Humanized Text:\n{paraphrased_text}"
-# Gradio interface definition
-ai_detection_interface = gr.Interface(
-    fn=detect_ai_generated,
-    inputs="textbox",
-    outputs="text",
-    title="AI Text Detection",
-    description="Enter text to determine the probability of it being AI-generated."
-)
-humanization_interface = gr.Interface(
-    fn=humanize_text,
-    inputs="textbox",
-    outputs="text",
-    title="Text Humanizer",
-    description="Enter text to get a human-written version, paraphrased for natural output."
-)
-# Combine both interfaces into a single Gradio app with tabs
-interface = gr.TabbedInterface(
-    [ai_detection_interface, humanization_interface],
-    ["AI Detection", "Humanization"]
-)
 # Launch the Gradio app
 interface.launch(debug=False)

 # Import dependencies
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import nltk
 from nltk.corpus import wordnet
+import spacy
+from gensim.models import KeyedVectors
+from gensim import downloader as api
+from nltk.tokenize import word_tokenize
 # Download NLTK data (if not already downloaded)
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('wordnet')  # Download WordNet
+# Load spaCy model
+nlp = spacy.load("en_core_web_sm")
+# Load a smaller Word2Vec model from Gensim's pre-trained models
+word_vectors = api.load("glove-wiki-gigaword-50")
 # Check for GPU and set the device accordingly
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
 model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
 # AI detection function using DistilBERT
 def detect_ai_generated(text):
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
     ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
     return f"AI-Generated Content Probability: {ai_probability:.2f}%"
+# Function to get synonyms using Gensim Word2Vec
+def get_synonyms_gensim(word):
+    try:
+        synonyms = word_vectors.most_similar(positive=[word], topn=5)
+        return [synonym[0] for synonym in synonyms]
+    except KeyError:
+        return []
+# Paraphrasing function using Gensim for synonym replacement
+def paraphrase_with_gensim(text):
+    words = word_tokenize(text)
+    paraphrased_words = []
+    for word in words:
+        synonyms = get_synonyms_gensim(word.lower())
+        if synonyms:
+            paraphrased_words.append(synonyms[0])
+        else:
+            paraphrased_words.append(word)
+    return ' '.join(paraphrased_words)
+# Paraphrasing function using spaCy for synonym replacement
+def paraphrase_with_spacy(text):
+    doc = nlp(text)
+    paraphrased_words = []
+    for token in doc:
+        synonyms = get_synonyms_gensim(token.text.lower())
+        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:  # Only replace certain types of words
+            paraphrased_words.append(synonyms[0])
+        else:
+            paraphrased_words.append(token.text)
+    return ' '.join(paraphrased_words)
+# Gradio interface definition
+with gr.Blocks() as interface:
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(lines=5, label="Input Text")
+            detect_button = gr.Button("AI Detection")
+            paraphrase_gensim_button = gr.Button("Paraphrase with Gensim")
+            paraphrase_spacy_button = gr.Button("Paraphrase with spaCy")
+        with gr.Column():
+            output_text = gr.Textbox(label="Output")
+    detect_button.click(detect_ai_generated, inputs=text_input, outputs=output_text)
+    paraphrase_gensim_button.click(paraphrase_with_gensim, inputs=text_input, outputs=output_text)
+    paraphrase_spacy_button.click(paraphrase_with_spacy, inputs=text_input, outputs=output_text)
 # Launch the Gradio app
 interface.launch(debug=False)