import gradio as gr import json import random import os from typing import List, Dict, Any, Optional # ----------------------------- # Available JSON files (persona datasets) # ----------------------------- available_files = [ "persona_annotator_sample.json" ] data = [] index = 0 current_file = None ICONS = { "header": "πŸ‘€", "categories": "🏷️", "presenting": "🚩", "clinical": "🩺", "history": "πŸ“œ", "functioning": "πŸ”§", "summary": "🧾", "context": "🧩", "metadata": "πŸ”–", "other": "πŸ—‚οΈ", } SECTION_FIELDS = { "header": [ "name", "archetype", "age", "sex", "location", "education_level", "bachelors_field", "ethnic_background", "marital_status", "version" ], "categories": ["appearance_category", "behavior_category"], "presenting": ["presenting_problems"], "clinical": ["appearance", "behavior", "mood_affect", "speech", "thought_content", "insight_judgment", "cognition"], "history": ["medical_developmental_history", "family_history", "educational_vocational_history"], "functioning": ["emotional_behavioral_functioning", "social_functioning"], "summary": ["summary_of_psychological_profile"], "context": ["archetype_description", "memoir", "memoir_summary", "memoir_narrative"], "metadata": ["uid"], } # ----------------------------- # Persistent storage path # ----------------------------- PERSISTENT_DIR = "/home/user/app/storage" if os.path.exists(PERSISTENT_DIR): STORAGE_DIR = PERSISTENT_DIR else: STORAGE_DIR = "." os.makedirs(STORAGE_DIR, exist_ok=True) ANNOTATION_FILE = os.path.join(STORAGE_DIR, "persona_annotations.jsonl") # ----------------------------- # Core functions # ----------------------------- def _get(entry: Dict[str, Any], key: str, default: str = "β€”") -> str: v = entry.get(key, default) if v is None: return default if isinstance(v, (list, dict)): try: return json.dumps(v, ensure_ascii=False) except Exception: return str(v) return str(v).strip() def _truncate(s: str, limit: int = 2000) -> str: s = s or "" return (s[:limit] + " …") if len(s) > limit else s def load_file(file_name): """Load selected JSON file and show first/random entry""" global data, index, current_file current_file = file_name with open(file_name, "r", encoding="utf-8") as f: data = json.load(f) index = random.randint(0, len(data) - 1) return show_entry() def save_annotation(p_uuid, *scores_and_comments): """Save annotations to persistent storage as JSONL (with file name)""" ann = { "file_name": current_file, "persona_uuid": p_uuid, "annotations": {} } rubric_fields = [ "clarity", "originality", "coherence", "diversity", "realism", "psychological_depth", "consistency", "informativeness", "ethical_considerations", "demographic_fidelity", "overall_score" ] for field, value in zip(rubric_fields, scores_and_comments): ann["annotations"][field] = value with open(ANNOTATION_FILE, "a", encoding="utf-8") as f: f.write(json.dumps(ann, ensure_ascii=False) + "\n") return f"βœ… Saved annotation for {p_uuid} (from {current_file}) β†’ {ANNOTATION_FILE}" def export_annotations(): """Return path to annotations file for download""" if os.path.exists(ANNOTATION_FILE): return ANNOTATION_FILE else: with open(ANNOTATION_FILE, "w", encoding="utf-8") as f: pass return ANNOTATION_FILE def md_header(entry: Dict[str, Any]) -> str: name = _get(entry, "name") archetype = _get(entry, "archetype") age = _get(entry, "age") sex = _get(entry, "sex") location = _get(entry, "location") education_level = _get(entry, "education_level") bachelors_field = _get(entry, "bachelors_field") ethnic_background = _get(entry, "ethnic_background") marital_status = _get(entry, "marital_status") version = _get(entry, "version") return ( f"## {ICONS['header']} Persona\n" f"**Name:** {name} \n" f"**Archetype:** {archetype} \n" f"**Age:** {age} \n" f"**Sex:** {sex} \n" f"**Location:** {location} \n" f"**Education Level:** {education_level} \n" f"**Bachelor’s Field:** {bachelors_field} \n" f"**Ethnic Background:** {ethnic_background} \n" f"**Marital Status:** {marital_status} \n" f"**Version:** {version}" ) def md_categories(entry: Dict[str, Any]) -> str: app_cat = _get(entry, "appearance_category") beh_cat = _get(entry, "behavior_category") return ( f"## {ICONS['categories']} Categories\n" f"**Appearance Category:** {app_cat} \n" f"**Behavior Category:** {beh_cat}" ) def md_presenting(entry: Dict[str, Any]) -> str: raw = entry.get("presenting_problems") items: List[str] = [] if isinstance(raw, list): items = [str(x).strip() for x in raw if str(x).strip()] elif isinstance(raw, str) and raw.strip(): try: parsed = json.loads(raw) if isinstance(parsed, list): items = [str(x).strip() for x in parsed if str(x).strip()] else: items = [x.strip() for x in raw.split(";") if x.strip()] except Exception: items = [x.strip() for x in raw.split(";") if x.strip()] bullets = "\n".join(f"- {x}" for x in items) if items else "β€”" return f"## {ICONS['presenting']} Presenting Problems\n{bullets}" def md_clinical(entry: Dict[str, Any]) -> str: blocks = [] mapping = [ ("appearance", "Appearance"), ("behavior", "Behavior"), ("mood_affect", "Mood / Affect"), ("speech", "Speech"), ("thought_content", "Thought Content"), ("insight_judgment", "Insight & Judgment"), ("cognition", "Cognition"), ] for k, label in mapping: v = entry.get(k) if isinstance(v, str) and v.strip(): blocks.append(f"**{label}**\n{_truncate(v)}") return f"## {ICONS['clinical']} Clinical Observations\n" + ("\n\n".join(blocks) if blocks else "β€”") def md_history(entry: Dict[str, Any]) -> str: blocks = [] mapping = [ ("medical_developmental_history", "Medical / Developmental History"), ("family_history", "Family History"), ("educational_vocational_history", "Educational / Vocational History"), ] for k, label in mapping: v = entry.get(k) if isinstance(v, str) and v.strip(): blocks.append(f"**{label}**\n{_truncate(v)}") return f"## {ICONS['history']} Life History\n" + ("\n\n".join(blocks) if blocks else "β€”") def md_functioning(entry: Dict[str, Any]) -> str: blocks = [] mapping = [ ("emotional_behavioral_functioning", "Emotional / Behavioral Functioning"), ("social_functioning", "Social Functioning"), ] for k, label in mapping: v = entry.get(k) if isinstance(v, str) and v.strip(): blocks.append(f"**{label}**\n{_truncate(v)}") return f"## {ICONS['functioning']} Functioning\n" + ("\n\n".join(blocks) if blocks else "β€”") def md_summary(entry: Dict[str, Any]) -> str: v = entry.get("summary_of_psychological_profile") body = _truncate(v) if isinstance(v, str) and v.strip() else "β€”" return f"## {ICONS['summary']} Summary\n{body}" def md_context(entry: Dict[str, Any]) -> str: arch_desc = entry.get("archetype_description") or entry.get("archetype_summary") or "β€”" memoir_title = entry.get("memoir") memoir_summary = entry.get("memoir_summary") memoir_narr = entry.get("memoir_narrative") title_line = f"**Memoir:** {memoir_title}\n\n" if isinstance(memoir_title, str) and memoir_title.strip() else "" sum_line = f"**Memoir Summary**\n{_truncate(memoir_summary)}\n\n" if isinstance(memoir_summary, str) and memoir_summary.strip() else "" narr_line = f"**Memoir Narrative**\n{_truncate(memoir_narr)}" if isinstance(memoir_narr, str) and memoir_narr.strip() else "β€”" return ( f"## {ICONS['context']} Context\n" f"**Archetype Description**\n{_truncate(str(arch_desc)) if isinstance(arch_desc, str) else 'β€”'}\n\n" f"{title_line}{sum_line}{narr_line}" ) def md_metadata(entry: Dict[str, Any]) -> str: uid = _get(entry, "uid") return f"## {ICONS['metadata']} Metadata\n**UID:** {uid}" def md_other_fields(entry: Dict[str, Any]) -> str: # Show any extra keys (e.g., concat_field, concat_embedding) not covered elsewhere known = set().union(*SECTION_FIELDS.values()) other_keys = [k for k in entry.keys() if k not in known] if not other_keys: return f"## {ICONS['other']} Other Fields\nβ€”" pairs = [] for k in sorted(other_keys): v = entry.get(k) if isinstance(v, (dict, list)): try: s = json.dumps(v, ensure_ascii=False) except Exception: s = str(v) else: s = str(v) if v is not None else "" pairs.append(f"- **{k}:** {_truncate(s)}") return f"## {ICONS['other']} Other Fields\n" + ("\n".join(pairs) if pairs else "β€”") def show_entry(step=None): """Navigate entries and show persona entry""" global index, data if not data: return [""] * (11 + 11) if step == "Next": index = (index + 1) % len(data) elif step == "Previous": index = (index - 1) % len(data) elif step == "Random Shuffle": index = random.randint(0, len(data) - 1) % len(data) entry = data[index] p_uuid = entry.get("uuid", f"persona_{index}") if not entry: empty = "_No data_" # diagram HTML, then the sections return ["", empty, empty, empty, empty, empty, empty, empty, empty, empty] persona_out = [ p_uuid, md_header(entry), md_categories(entry), md_presenting(entry), md_clinical(entry), md_history(entry), md_functioning(entry), md_summary(entry), md_context(entry), md_metadata(entry), md_other_fields(entry), ] # Reset rubric dropdowns to None resets = [None] * 11 return persona_out + resets # persona_str = entry.get("persona_string", "").replace("\n", "
") # archetype = entry.get("archetype", "N/A") # persona_md = f"### πŸ‘€ Persona Summary\n**Archetype:** {archetype}\n\n{persona_str}" # ----------------------------- # Gradio UI # ----------------------------- with gr.Blocks() as demo: gr.Markdown("## Persona Annotation Tool") # File selection dropdown file_dropdown = gr.Dropdown( choices=available_files, value=available_files[0], label="Select Persona JSON File" ) with gr.Row(): prev_btn = gr.Button("Previous") next_btn = gr.Button("Next") shuffle_btn = gr.Button("Random Shuffle") phash_out = gr.Textbox(label="Persona Hash ID", interactive=False) # persona_out = gr.Markdown(label="Persona Description") md_header_out = gr.Markdown() md_cats_out = gr.Markdown() md_present_out = gr.Markdown() md_clinical_out = gr.Markdown() md_history_out = gr.Markdown() md_function_out = gr.Markdown() md_summary_out = gr.Markdown() md_context_out = gr.Markdown() md_meta_out = gr.Markdown() md_other_out = gr.Markdown() gr.Markdown("### Evaluation Rubric (0 = Worst, 5 = Best)") choices = [str(i) for i in range(6)] clarity = gr.Dropdown(choices=choices, label="Clarity", value=None) originality = gr.Dropdown(choices=choices, label="Originality", value=None) coherence = gr.Dropdown(choices=choices, label="Coherence", value=None) diversity = gr.Dropdown(choices=choices, label="Diversity", value=None) realism = gr.Dropdown(choices=choices, label="Realism", value=None) psychological_depth = gr.Dropdown(choices=choices, label="Psychological Depth (focus metric)", value=None) consistency = gr.Dropdown(choices=choices, label="Consistency", value=None) informativeness = gr.Dropdown(choices=choices, label="Informativeness", value=None) ethical_considerations = gr.Dropdown(choices=choices, label="Ethical Considerations (0–5)", value=None) demographic_fidelity = gr.Dropdown(choices=choices, label="Demographic Fidelity", value=None) overall_score = gr.Dropdown(choices=choices, label="Overall Score", value=None) save_btn = gr.Button("Save Annotation") save_status = gr.Textbox(label="Status", interactive=False) all_outputs = [ phash_out, md_header_out, md_cats_out, md_present_out, md_clinical_out, md_history_out, md_function_out, md_summary_out, md_context_out, md_meta_out, md_other_out, clarity, originality, coherence, diversity, realism, psychological_depth, consistency, informativeness, ethical_considerations, demographic_fidelity, overall_score ] with gr.Row(): export_btn = gr.Button("Download All Annotations") export_file = gr.File(label="Exported Annotations", type="filepath") # Wiring file_dropdown.change(load_file, inputs=file_dropdown, outputs=all_outputs) prev_btn.click(show_entry, inputs=gr.State("Previous"), outputs=all_outputs) next_btn.click(show_entry, inputs=gr.State("Next"), outputs=all_outputs) shuffle_btn.click(show_entry, inputs=gr.State("Random Shuffle"), outputs=all_outputs) save_btn.click( save_annotation, inputs=[phash_out, clarity, originality, coherence, diversity, realism, psychological_depth, consistency, informativeness, ethical_considerations, demographic_fidelity, overall_score], outputs=save_status ) export_btn.click(export_annotations, inputs=None, outputs=export_file) demo.load(load_file, inputs=gr.State(available_files[0]), outputs=all_outputs) demo.launch()