DivScore / app.py
RichardChenZH's picture
Update app.py
4afc6ee verified
import gradio as gr
from divscore import DivScore
import torch
import os
# Set environment variables for Hugging Face
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Initialize the DivScore detector with loading state
def load_model():
try:
detector = DivScore(
generalLM_name_or_path="mistral-community/Mistral-7B-v0.2",
enhancedLM_name_or_path="RichardChenZH/DivScore_combined",
device="cuda:0" if torch.cuda.is_available() else "cpu",
use_bfloat16=True # Use bfloat16 for better memory efficiency
)
return detector
except Exception as e:
print(f"Error loading model: {str(e)}")
return None
# Global variable for the detector
detector = None
def detect_ai_text(text):
"""
Detect if the input text is AI-generated using DivScore.
Returns a tuple of (score, is_ai_generated)
"""
global detector
# Initialize detector if not already done
if detector is None:
detector = load_model()
if detector is None:
return "Error: Failed to load the model. Please try again later.", False
if not text.strip():
return "Please enter some text to analyze.", False
try:
score, entropy_score, ce_score = detector.compute_score(text)
# Based on the paper's findings, we use 0.19 as the threshold
is_ai_generated = score < 0.19
# Format the result with more detailed information
result = f"""
### Analysis Results
- **DivScore**: {score:.4f}
- **Entropy Score**: {entropy_score:.4f}
- **CE Score**: {ce_score:.4f}
### Interpretation
- **Threshold**: 0.19 (scores below this are likely AI-generated)
- **Current Score**: {score:.4f}
"""
if is_ai_generated:
detection_result = "## πŸ”΄ AI-Generated Text Detected\nThis text is likely to have been generated by an AI model."
else:
detection_result = "## 🟒 Human-Written Text Detected\nThis text appears to be written by a human."
return result, detection_result
except Exception as e:
return f"Error occurred: {str(e)}", "Error in analysis"
# Create the Gradio interface with loading state
with gr.Blocks(title="DivScore AI Text Detector") as demo:
gr.Markdown("""
# DivScore AI Text Detector
This demo uses the DivScore model to detect if specialized domain text (i.e. medical and legal text) was generated by an AI model.
Enter your text below to analyze it.
**Note:** The demo may take quite a few moments to run as it runs on Huggingface free CPUs. Running on local GPUs provides much faster speed.
""")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Input Text",
placeholder="Enter text to analyze...",
lines=5
)
submit_btn = gr.Button("Analyze Text", variant="primary")
with gr.Row():
with gr.Column():
result_output = gr.Markdown(label="Analysis Results")
with gr.Column():
ai_generated = gr.Markdown(label="AI Generation Detection")
gr.Examples(
examples=[
["""Adjustment for reporting heterogeneity can be necessary in sleep disorders research to ensure accurate and reliable results.
Reporting heterogeneity refers to variations in how information is documented or reported, which can stem from differences
in assessment tools, study design, participant demographics, cultural differences, and subjective interpretations of symptoms.
In sleep disorders, symptoms and their severity can be perceived differently by individuals due to personal, cultural, or
situational factors. Without adjusting for these differences, there can be inconsistencies in data interpretation, potentially
leading to skewed or misleading findings. Therefore, employing methods to adjust for reporting heterogeneity can enhance
the validity and comparability of research outcomes across different studies or populations."""],
["""In Pennsylvania, as in many other jurisdictions, parking violations, including those for street sweeper offenses, are generally
considered civil infractions rather than criminal offenses. This means that the violation is typically against the vehicle and
not directly against the individual who parked it. As such, the burden of proof is generally on the city to show that the
vehicle was parked in violation of the law, rather than proving who specifically parked the car. The registered letter you
received offering options to plead not guilty or guilty and pay a bond amount is a common procedural step in handling such
civil infractions. While parking tickets are civil matters, failure to address them can lead to additional consequences, such
as fines, vehicle registration holds, or even a bench warrant for failing to respond to the citation, which can create legal
complications. Regarding jurisdiction, if the owner of the vehicle resides out of state, such as in Texas, Pennsylvania cannot
extradite the individual solely for an unpaid parking ticket. Extradition typically applies to criminal offenses. However,
unresolved parking tickets can lead to complications like increased fines or affecting the owner's ability to register their
vehicle, depending on interstate compacts or agreements. If you believe that the ticket was issued in error or you have
compelling reasons to challenge it, you may wish to contest the ticket through the legal process outlined in the letter. It can
also be helpful to seek legal advice to understand the specific options and implications based on the circumstances of the
case."""]
],
inputs=text_input
)
submit_btn.click(
fn=detect_ai_text,
inputs=text_input,
outputs=[result_output, ai_generated]
)
if __name__ == "__main__":
demo.queue() # Enable queuing for better handling of multiple requests
demo.launch()