import time
import gradio as gr
from transformers import pipeline

# Load a small, free text generation model
generator = pipeline("text-generation", model="bigscience/bloomz-1b1")

# Load a moderation pipeline
moderator = pipeline("text-classification", model="unitary/toxic-bert")

# Simple in-memory rate limiter
request_log = {}
MAX_REQUESTS = 10       # max requests
WINDOW_SECONDS = 60    # per 60 seconds

def is_rate_limited(ip):
    now = time.time()
    window_start = now - WINDOW_SECONDS
    history = request_log.get(ip, [])
    # keep only recent requests
    history = [t for t in history if t > window_start]
    if len(history) >= MAX_REQUESTS:
        return True
    history.append(now)
    request_log[ip] = history
    return False

def contains_profanity(text, threshold=0.5):
    """
    Uses a Hugging Face moderation model to detect toxicity.
    Returns True if the text is likely toxic/profane.
    """
    results = moderator(text)

    # results looks like: [{'label': 'toxic', 'score': 0.87}]
    label = results[0]['label']
    score = results[0]['score']

    return label.lower() == "toxic" and score >= threshold

# Define your function with some safeguards
def generate_conclusion(user_input, request: gr.Request):
    ip = request.client.host if request else "unknown"
    if is_rate_limited(ip):
        return "⚠️ Too many requests. Please wait a bit before trying again."

    if not user_input.strip():
        return "⚠️ Please enter some text."

    if contains_profanity(user_input):
        return "⚠️ Your input contains inappropriate or toxic language."

    # Limit input length
    if len(user_input) > 300:
        return "⚠️ Input too long. Please keep it under 300 characters."

    # Add your instruction prompt
    prompt = (
        "Generate only the final conclusion with minimal explanation from the following input. "
        "Do not include elaboration or supporting details.\n\n"
        f"{user_input}"
    )

    # Generate with limits to prevent abuse
    try:
        output = generator(
            prompt,
            max_new_tokens=200,   # keep responses short
            do_sample=True,
            temperature=0.5,
            return_full_text=False
        )

        return output[0]["generated_text"]
    except Exception as e:
        return f"⚠️ An error occurred: {str(e)}"

# Use Blocks instead of Interface
with gr.Blocks() as demo:
    gr.Markdown("## Simple AI Response Demo")
    gr.Markdown("Generates a short conclusion with minimal explanation. Older AI engine that may contain inaccuracies. Input limit 300 characters.")
    input_box = gr.Textbox(lines=4, placeholder="Enter your query here...")
    output_box = gr.Textbox(placeholder="AI response...")
    submit_btn = gr.Button("Generate Response")
    submit_btn.click(fn=generate_conclusion, inputs=input_box, outputs=output_box, concurrency_limit=2)

if __name__ == "__main__":
    demo.queue().launch()