Spaces:

RichardChenZH
/

DivScore

Sleeping

App Files Files Community

DivScore / app.py

RichardChenZH

Update app.py

4afc6ee verified 5 months ago

raw

history blame contribute delete

6.05 kB

	import gradio as gr
	from divscore import DivScore
	import torch
	import os

	# Set environment variables for Hugging Face
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	# Initialize the DivScore detector with loading state
	def load_model():
	try:
	detector = DivScore(
	generalLM_name_or_path="mistral-community/Mistral-7B-v0.2",
	enhancedLM_name_or_path="RichardChenZH/DivScore_combined",
	device="cuda:0" if torch.cuda.is_available() else "cpu",
	use_bfloat16=True # Use bfloat16 for better memory efficiency
	)
	return detector
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	return None

	# Global variable for the detector
	detector = None

	def detect_ai_text(text):
	"""
	Detect if the input text is AI-generated using DivScore.
	Returns a tuple of (score, is_ai_generated)
	"""
	global detector

	# Initialize detector if not already done
	if detector is None:
	detector = load_model()
	if detector is None:
	return "Error: Failed to load the model. Please try again later.", False

	if not text.strip():
	return "Please enter some text to analyze.", False

	try:
	score, entropy_score, ce_score = detector.compute_score(text)

	# Based on the paper's findings, we use 0.19 as the threshold
	is_ai_generated = score < 0.19

	# Format the result with more detailed information
	result = f"""
	### Analysis Results
	- DivScore: {score:.4f}
	- Entropy Score: {entropy_score:.4f}
	- CE Score: {ce_score:.4f}

	### Interpretation
	- Threshold: 0.19 (scores below this are likely AI-generated)
	- Current Score: {score:.4f}
	"""
	if is_ai_generated:
	detection_result = "## 🔴 AI-Generated Text Detected\nThis text is likely to have been generated by an AI model."
	else:
	detection_result = "## 🟢 Human-Written Text Detected\nThis text appears to be written by a human."

	return result, detection_result

	except Exception as e:
	return f"Error occurred: {str(e)}", "Error in analysis"

	# Create the Gradio interface with loading state
	with gr.Blocks(title="DivScore AI Text Detector") as demo:
	gr.Markdown("""
	# DivScore AI Text Detector

	This demo uses the DivScore model to detect if specialized domain text (i.e. medical and legal text) was generated by an AI model.
	Enter your text below to analyze it.

	Note: The demo may take quite a few moments to run as it runs on Huggingface free CPUs. Running on local GPUs provides much faster speed.
	""")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Input Text",
	placeholder="Enter text to analyze...",
	lines=5
	)
	submit_btn = gr.Button("Analyze Text", variant="primary")

	with gr.Row():
	with gr.Column():
	result_output = gr.Markdown(label="Analysis Results")
	with gr.Column():
	ai_generated = gr.Markdown(label="AI Generation Detection")

	gr.Examples(
	examples=[
	["""Adjustment for reporting heterogeneity can be necessary in sleep disorders research to ensure accurate and reliable results.
	Reporting heterogeneity refers to variations in how information is documented or reported, which can stem from differences
	in assessment tools, study design, participant demographics, cultural differences, and subjective interpretations of symptoms.
	In sleep disorders, symptoms and their severity can be perceived differently by individuals due to personal, cultural, or
	situational factors. Without adjusting for these differences, there can be inconsistencies in data interpretation, potentially
	leading to skewed or misleading findings. Therefore, employing methods to adjust for reporting heterogeneity can enhance
	the validity and comparability of research outcomes across different studies or populations."""],
	["""In Pennsylvania, as in many other jurisdictions, parking violations, including those for street sweeper offenses, are generally
	considered civil infractions rather than criminal offenses. This means that the violation is typically against the vehicle and
	not directly against the individual who parked it. As such, the burden of proof is generally on the city to show that the
	vehicle was parked in violation of the law, rather than proving who specifically parked the car. The registered letter you
	received offering options to plead not guilty or guilty and pay a bond amount is a common procedural step in handling such
	civil infractions. While parking tickets are civil matters, failure to address them can lead to additional consequences, such
	as fines, vehicle registration holds, or even a bench warrant for failing to respond to the citation, which can create legal
	complications. Regarding jurisdiction, if the owner of the vehicle resides out of state, such as in Texas, Pennsylvania cannot
	extradite the individual solely for an unpaid parking ticket. Extradition typically applies to criminal offenses. However,
	unresolved parking tickets can lead to complications like increased fines or affecting the owner's ability to register their
	vehicle, depending on interstate compacts or agreements. If you believe that the ticket was issued in error or you have
	compelling reasons to challenge it, you may wish to contest the ticket through the legal process outlined in the letter. It can
	also be helpful to seek legal advice to understand the specific options and implications based on the circumstances of the
	case."""]
	],
	inputs=text_input
	)

	submit_btn.click(
	fn=detect_ai_text,
	inputs=text_input,
	outputs=[result_output, ai_generated]
	)

	if __name__ == "__main__":
	demo.queue() # Enable queuing for better handling of multiple requests
	demo.launch()