import gradio as gr import torch # ✅ Explicitly import torch from transformers import pipeline import re # Load model llm = pipeline("text-generation", model="gpt2") # Guardrails blocked_keywords = ["kill", "bomb", "hack", "suicide", "drugs"] blocked_patterns = [r"\bhow to (make|build|create) (a )?(bomb|weapon)\b", r"\b(hack|bypass) (password|system)\b"] violation_log = [] def guardrails_check(prompt): for keyword in blocked_keywords: if keyword in prompt.lower(): violation_log.append(f"Keyword blocked: {keyword}") return False for pattern in blocked_patterns: if re.search(pattern, prompt.lower()): violation_log.append(f"Pattern blocked: {pattern}") return False return True def generate_response(prompt): if not guardrails_check(prompt): return "🚫 Prompt blocked by guardrails." output = llm(prompt, max_length=100, do_sample=True)[0]["generated_text"] return output def show_log(): return "\n".join(violation_log[-5:]) if violation_log else "No violations yet." with gr.Blocks() as demo: gr.Markdown("## 🛡️ LLM Guardrails Demo") with gr.Row(): prompt = gr.Textbox(label="Enter your prompt") output = gr.Textbox(label="LLM Response") with gr.Row(): log = gr.Textbox(label="Violation Log", interactive=False) submit = gr.Button("Generate") submit.click(fn=generate_response, inputs=prompt, outputs=output) submit.click(fn=show_log, inputs=None, outputs=log) demo.launch()