File size: 27,686 Bytes
ecc4b02
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecc4b02
21b80d7
 
 
 
 
 
 
 
ecc4b02
21b80d7
ecc4b02
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5916940
 
 
 
 
21b80d7
 
 
 
 
 
 
 
 
 
bbd0f3e
 
 
 
 
 
21b80d7
bbd0f3e
 
 
 
21b80d7
87e5329
 
bbd0f3e
 
 
 
 
87e5329
bbd0f3e
 
87e5329
 
bbd0f3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21b80d7
 
bbd0f3e
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5916940
c4b8b95
5916940
21b80d7
 
 
 
 
 
 
 
5916940
 
 
 
21b80d7
bbd0f3e
21b80d7
 
 
 
 
 
bbd0f3e
21b80d7
 
 
 
 
bbd0f3e
21b80d7
 
 
 
bbd0f3e
 
 
 
 
 
 
 
 
 
 
 
 
 
21b80d7
bbd0f3e
21b80d7
bbd0f3e
 
 
21b80d7
 
 
87e5329
 
 
 
 
 
 
21b80d7
 
 
 
f0a56b8
bbd0f3e
 
 
 
 
 
 
 
21b80d7
bbd0f3e
21b80d7
 
f0a56b8
 
 
 
bbd0f3e
 
 
21b80d7
bbd0f3e
f0a56b8
bbd0f3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0a56b8
bbd0f3e
 
 
 
 
 
 
 
 
 
 
 
21b80d7
 
 
 
 
 
 
 
 
bbd0f3e
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
21b80d7
 
 
 
 
 
 
 
 
 
6ded7d7
 
 
5916940
 
 
6ded7d7
 
364c0c4
bbd0f3e
6ded7d7
364c0c4
5916940
364c0c4
6ded7d7
bbd0f3e
21b80d7
6ded7d7
 
bbd0f3e
 
21b80d7
6ded7d7
 
5916940
6ded7d7
5916940
6ded7d7
5916940
6ded7d7
5916940
 
6ded7d7
5916940
364c0c4
 
21b80d7
 
 
 
 
 
 
 
 
 
 
bbd0f3e
21b80d7
 
 
 
 
 
bbd0f3e
 
 
 
 
 
 
 
 
21b80d7
 
 
bbd0f3e
 
21b80d7
 
 
 
bbd0f3e
21b80d7
 
 
 
 
bbd0f3e
21b80d7
 
bbd0f3e
21b80d7
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
 
 
 
 
 
 
 
21b80d7
 
bbd0f3e
 
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
 
21b80d7
bbd0f3e
 
 
 
 
 
 
 
 
 
21b80d7
 
 
ecc4b02
21b80d7
 
 
 
 
 
 
 
bbd0f3e
 
21b80d7
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
ecc4b02
bbd0f3e
21b80d7
 
ecc4b02
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
21b80d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbd0f3e
21b80d7
 
bbd0f3e
21b80d7
 
bbd0f3e
21b80d7
 
bbd0f3e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648

import os
import time
import re
import requests
import phonenumbers
import pandas as pd
import urllib.parse
from bs4 import BeautifulSoup

import torch
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    AutoModelForSeq2SeqLM,
    pipeline
)

import gradio as gr
from concurrent.futures import ThreadPoolExecutor, as_completed
from email.message import EmailMessage
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

# ============================
# CONFIG 
# ============================
API_KEY = os.environ.get("GOOGLE_API_KEY", "YOUR_GOOGLE_API_KEY")
CX      = os.environ.get("GOOGLE_CSE_ID", "YOUR_CSE_ID")
DEFAULT_COUNTRY = "Ghana"

RESULTS_PER_QUERY = int(os.environ.get("RESULTS_PER_QUERY", 4))
MAX_SCRAPE_WORKERS = int(os.environ.get("MAX_SCRAPE_WORKERS", 6))

ALLY_AI_NAME = os.environ.get("ALLY_AI_NAME", "Ally AI")
ALLY_AI_LOGO_URL_DEFAULT = os.environ.get("ALLY_AI_LOGO_URL",
                                         "https://imgur.com/a/lVxnQke")

COUNTRY_TLD_MAP   = {"Ghana":"gh","Nigeria":"ng","Kenya":"ke","South Africa":"za","USA":"us","United Kingdom":"uk"}
COUNTRY_REGION_MAP= {"Ghana":"GH","Nigeria":"NG","Kenya":"KE","South Africa":"ZA","USA":"US","United Kingdom":"GB"}

HEADERS = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64)"}
EMAIL_REGEX = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")

# ============================
# MODELS (lightweight & CPU-friendly)
# ============================
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device set to use", DEVICE)

# NER model (people/orgs/locs)
ner_model_id = "dslim/bert-base-NER"
ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_id)
ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_id)
ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple",
                    device=0 if DEVICE=="cuda" else -1)

# Summarizer / anonymizer
text_model_id = "google/flan-t5-large"
text_tokenizer = AutoTokenizer.from_pretrained(text_model_id)
text_model = AutoModelForSeq2SeqLM.from_pretrained(text_model_id).to(DEVICE)

# ============================
# TAXONOMY & HELPERS
# ============================
PROFESSION_KEYWORDS = ["lawyer","therapist","doctor","counselor","social worker",
                       "advocate","psychologist","psychiatrist","consultant","nurse","hotline","gbv"]

PROBLEM_PROFESSION_MAP = {
    "rape": ["lawyer","therapist","counselor","doctor"],
    "sexual assault": ["lawyer","therapist","counselor"],
    "domestic violence": ["lawyer","social worker","therapist"],
    "abuse": ["counselor","social worker","therapist","lawyer"],
    "trauma": ["therapist","psychologist","psychiatrist"],
    "depression": ["therapist","psychologist","doctor"],
    "violence": ["lawyer","counselor","social worker"],
}

def get_region_for_country(country: str) -> str:
    return COUNTRY_REGION_MAP.get(country, "GH")

def get_tld_for_country(country: str) -> str:
    return COUNTRY_TLD_MAP.get(country, "")

def build_country_biased_query(core: str, country: str) -> str:
    tld = get_tld_for_country(country)
    suffix = f" in {country}"
    if tld:
        return f"{core}{suffix} site:.{tld} OR {country}"
    return f"{core}{suffix}"

def dedup_by_url(items):
    seen, out = set(), []
    for it in items:
        u = it.get("link") or it.get("url")
        if u and u not in seen:
            seen.add(u)
            out.append(it)
    return out

# ============================
# SEARCH & SCRAPING
# ============================
def google_search(query, num_results=5):
    if not API_KEY or not CX or "YOUR_GOOGLE_API_KEY" in API_KEY or "YOUR_CSE_ID" in CX:
        raise RuntimeError("Google API key and CSE ID must be set as environment variables.")
    url = "https://www.googleapis.com/customsearch/v1"
    params = {"q":query, "key":API_KEY, "cx":CX, "num":num_results}
    r = requests.get(url, params=params, timeout=20)
    r.raise_for_status()
    items = r.json().get("items", []) or []
    return [{"title":i.get("title",""), "link":i.get("link",""), "snippet":i.get("snippet","")} for i in items]

def extract_phones(text, region="GH"):
    phones = []
    for match in phonenumbers.PhoneNumberMatcher(text, region):
        try:
            phones.append(phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL))
        except Exception:
            pass
    return list(set(phones))

def _domain_from_url(url):
    try:
        return urllib.parse.urlparse(url).netloc
    except Exception:
        return url

def scrape_contacts(url, region="GH"):
    """
    Extended scraping: returns emails, phones, and a guessed 'org' name
    extracted from meta tags, headings, or via NER on page text.
    """
    try:
        res = requests.get(url, headers=HEADERS, timeout=12)
        if not res.ok or not res.text:
            return {"emails": [], "phones": [], "org": None}
        soup = BeautifulSoup(res.text, "html.parser")

        # raw text for phone/email/NER
        text = soup.get_text(separator=" ")
        text = " ".join(text.split())[:300000]

        # emails & phones
        emails = list(set(EMAIL_REGEX.findall(text)))
        phones = extract_phones(text, region)

        # try meta og:site_name or twitter site meta
        org_name = None
        meta_og = soup.find("meta", property="og:site_name") or soup.find("meta", attrs={"name":"og:site_name"})
        if meta_og and meta_og.get("content"):
            org_name = meta_og.get("content").strip()

        # fallback to <title> or first <h1>
        if not org_name:
            title_tag = soup.find("title")
            if title_tag and title_tag.get_text(strip=True):
                org_name = title_tag.get_text(strip=True)
        if not org_name:
            h1 = soup.find("h1")
            if h1 and h1.get_text(strip=True):
                org_name = h1.get_text(strip=True)

        # run NER to find ORG mentions in the page text and prefer that if short and clean
        try:
            people, orgs, locs = extract_entities(text)
            if orgs:
                # choose first short/clean org
                for o in orgs:
                    if len(o) > 1 and len(o) < 80:
                        org_name = o
                        break
        except Exception:
            pass

        # final fallback: domain
        if not org_name:
            org_name = _domain_from_url(url)

        return {"emails": emails, "phones": phones, "org": org_name}
    except Exception as e:
        print(f"[scrape error] {url} -> {e}")
        return {"emails": [], "phones": [], "org": _domain_from_url(url)}

# ============================
# NER + STORY β†’ PROFESSIONS
# ============================
def extract_entities(text):
    if not text:
        return [],[],[]
    try:
        ner_results = ner_pipe(text)
    except Exception as e:
        print("[ner error]", e)
        return [],[],[]
    people = [e["word"] for e in ner_results if e.get("entity_group") == "PER"]
    orgs   = [e["word"] for e in ner_results if e.get("entity_group") == "ORG"]
    locs   = [e["word"] for e in ner_results if e.get("entity_group") == "LOC"]
    return list(set(people)), list(set(orgs)), list(set(locs))

def professions_from_story(story: str):
    s = (story or "").lower()
    found = set([p for p in PROFESSION_KEYWORDS if p in s])
    for prob, profs in PROBLEM_PROFESSION_MAP.items():
        if prob in s:
            found.update(profs)
    if not found:
        return ["gbv","counselor"]
    order = ["lawyer","therapist","counselor","social worker","psychologist","psychiatrist","doctor","advocate","nurse","hotline","gbv"]
    return [p for p in order if p in found]

def build_queries(story: str, country: str):
    profs = professions_from_story(story)
    cores = []
    for p in profs:
        if p == "gbv":
            cores += ["GBV support organizations", "gender based violence help"]
        else:
            cores += [f"{p} for GBV", f"{p} for sexual assault", f"{p} near me {p} {country}"]
    unique_cores, seen = [], set()
    for c in cores:
        if c not in seen:
            unique_cores.append(c); seen.add(c)
    return [build_country_biased_query(core, country) for core in unique_cores], profs

# ============================
# TEXT GEN: anonymize + result summary
# ============================
def anonymize_story(story: str, max_sentences: int = 2):
    if not story or not story.strip():
        return ""
    prompt = (
        "Anonymize and shorten the following personal story for contacting professionals. "
        "Remove names, exact ages, dates, locations and any identifying details. "
        f"Keep only the essential problem and the type of help requested. Output <= {max_sentences} sentences.\n\n"
        f"Story: {story}\n\nSummary:"
    )
    inputs = text_tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = text_model.generate(**inputs, max_new_tokens=120, temperature=0.2)
    return text_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

def generate_summary(query, people, orgs, locs):
    prompt = (
        "Write a short, empathetic summary of these search results for a person seeking GBV help.\n"
        f"Query: {query}\nPeople: {', '.join(people) or 'β€”'}\nOrgs: {', '.join(orgs) or 'β€”'}\nLocations: {', '.join(locs) or 'β€”'}\n\n"
        "Explain how the organizations/professionals can help in 3-4 sentences."
    )
    inputs = text_tokenizer(prompt, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = text_model.generate(**inputs, max_new_tokens=150, temperature=0.7)
    return text_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

# ============================
# MAIN PIPELINE
# ============================
def find_professionals_from_story(story, country=DEFAULT_COUNTRY, results_per_query=RESULTS_PER_QUERY):
    region = get_region_for_country(country)
    queries, profs = build_queries(story, country)

    # Search
    search_results = []
    for q in queries:
        try:
            items = google_search(q, num_results=results_per_query)
            for it in items:
                it["query"] = q
            search_results.extend(items)
        except Exception as e:
            print("[search error]", q, e)

    search_results = dedup_by_url(search_results)
    if not search_results:
        return {"summary":"No results found. Try a different country or wording.",
                "professionals":[], "queries_used":queries}

    # NER on titles/snippets for context
    all_people, all_orgs, all_locs = [], [], []
    for r in search_results:
        ctx = f"{r.get('title','')}. {r.get('snippet','')}"
        p,o,l = extract_entities(ctx)
        all_people += p; all_orgs += o; all_locs += l

    # Scrape contacts concurrently, extracting org names from page content
    professionals = []
    with ThreadPoolExecutor(max_workers=MAX_SCRAPE_WORKERS) as ex:
        futures = {ex.submit(scrape_contacts, r["link"], region): r for r in search_results}
        for fut in as_completed(futures):
            r = futures[fut]
            contacts = {"emails": [], "phones": [], "org": None}
            try:
                contacts = fut.result()
            except Exception as e:
                print("[scrape future error]", r["link"], e)

            # choose a single email/phone if available
            email = contacts["emails"][0] if contacts.get("emails") else None
            phone = contacts["phones"][0] if contacts.get("phones") else None
            org_name = contacts.get("org") or ""
            # attempt to choose profession tag from the query used
            prof_tag = None
            qlower = (r.get("query") or "").lower()
            for p in professions_from_story(story):
                if p in qlower:
                    prof_tag = p
                    break
            prof_tag = prof_tag or (professions_from_story(story)[0] if professions_from_story(story) else "gbv")

            professionals.append({
                "org": org_name,
                "url": r.get("link",""),
                "email": email if email else "Not found",
                "phone": phone if phone else "Not found",
                "profession": prof_tag,
                "source_query": r.get("query","")
            })

    summary = generate_summary("; ".join(queries[:3]) + (" ..." if len(queries)>3 else ""),
                               list(set(all_people)), list(set(all_orgs)), list(set(all_locs)))

    # Sort by availability of email/phone
    professionals.sort(key=lambda it: (0 if it["email"]!="Not found" else 1,
                                       0 if it["phone"]!="Not found" else 1))
    return {"summary": summary, "professionals": professionals, "queries_used": queries}

# ============================
# DRAFT (mailto + .eml)
# ============================
def build_mailto_and_eml(to_addr, subject, body, default_from="noreply@ally.ai"):
    """
    Creates a proper .eml file and returns (mailto_link, absolute_eml_path).
    Ensures the file is actually written and non-empty. If .eml fails, writes a .txt fallback.
    """
    # sanitize inputs
    to_addr = (to_addr or "").strip()
    subject = subject or ""
    body = body or ""

    # Create EmailMessage object
    msg = EmailMessage()
    msg["From"] = default_from
    msg["To"] = to_addr
    msg["Subject"] = subject
    msg.set_content(body)

    # ensure output dir exists and use absolute path (more robust for HF Spaces / Colab)
    out_dir = os.path.abspath("tmp")
    os.makedirs(out_dir, exist_ok=True)

    fname = os.path.join(out_dir, f"email_draft_{int(time.time())}.eml")

    try:
        # write bytes
        with open(fname, "wb") as f:
            f.write(msg.as_bytes())

        # verify file exists and is non-empty
        if os.path.exists(fname) and os.path.getsize(fname) > 0:
            mailto = f"mailto:{urllib.parse.quote(to_addr)}?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body)}"
            return mailto, fname

        # fallback: create a plain text copy (useful for debugging)
        fallback = fname + ".txt"
        with open(fallback, "w", encoding="utf-8") as f:
            f.write(f"To: {to_addr}\nSubject: {subject}\n\n{body}")
        mailto = f"mailto:{urllib.parse.quote(to_addr)}?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body)}"
        return mailto, fallback

    except Exception as e:
        # If writing .eml fails entirely, create a .txt fallback and return that path
        fallback = os.path.join(out_dir, f"email_draft_{int(time.time())}.txt")
        try:
            with open(fallback, "w", encoding="utf-8") as f:
                f.write(f"Error writing .eml: {e}\n\nTo: {to_addr}\nSubject: {subject}\n\n{body}")
            mailto = f"mailto:{urllib.parse.quote(to_addr)}?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body)}"
            return mailto, fallback
        except Exception as e2:
            # ultimate fallback: return no file and an informative mailto
            mailto = f"mailto:{urllib.parse.quote(to_addr)}?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body)}"
            return mailto, None

# ============================
# SENDER (SMTP) β€” Ally AI branding
# ============================
def send_ally_ai_email(to_email, subject, body, user_email,
                       sender_email, sender_password,
                       ai_name=ALLY_AI_NAME,
                       logo_url=ALLY_AI_LOGO_URL_DEFAULT):
    if not to_email or to_email == "Not found":
        return "❌ No recipient email found β€” choose a contact or provide a manual email."

    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject or "Request for support"
    msg["From"] = f"{ai_name} <{sender_email}>"
    msg["To"] = to_email

    html_content = f"""
    <html>
    <body style="font-family: Arial, sans-serif; color: #333;">
        <div style="padding: 20px; border: 1px solid #eee; border-radius: 10px; max-width: 640px; margin: auto;">
            <div style="text-align: center;">
                <img src="{logo_url}" alt="{ai_name} Logo" width="120" style="margin-bottom: 20px;" />
            </div>
            <p>{body}</p>
            <p style="margin-top:20px;">
                <b>Contact the survivor back at:</b> <a href="mailto:{user_email}">{user_email}</a>
            </p>
            <hr style="border:none;border-top:1px solid #eee;margin:24px 0;">
            <p style="font-size: 12px; color: gray; text-align: center;">
                This message was prepared with the help of <b>{ai_name}</b> β€” connecting survivors with help safely.
            </p>
        </div>
    </body>
    </html>
    """
    msg.attach(MIMEText(html_content, "html"))

    try:
        server = smtplib.SMTP("smtp.gmail.com", 587)
        server.starttls()
        server.login(sender_email, sender_password)
        server.sendmail(sender_email, [to_email], msg.as_string())
        server.quit()
        return f"βœ… Email sent successfully to {to_email}"
    except Exception as e:
        return f"❌ Failed to send email: {str(e)}"

# ============================
# GRADIO UI
# ============================
def run_search(story, country):
    try:
        out = find_professionals_from_story(story, country=country, results_per_query=RESULTS_PER_QUERY)
    except Exception as e:
        err_msg = f"Search failed: {e}"
        placeholder = ["0 β€” No results (search failed)"]
        return err_msg, [], placeholder, ""

    pros = out.get("professionals", []) or []

    # build table records with org instead of article title
    try:
        records = pd.DataFrame(pros).to_dict(orient="records") if pros else []
    except Exception:
        records = []

    # build dropdown options as list of strings
    options = []
    for i, r in enumerate(pros):
        label_contact = r.get("email") if r.get("email") and r.get("email") != "Not found" else (r.get("phone", "No contact"))
        org_label = r.get("org") or r.get("url") or "(no org)"
        label = f"{i} β€” {org_label} ({label_contact})"
        options.append(label)

    if not options:
        options = ["0 β€” No results (try a different country/query)"]

    # anonymize safely
    try:
        anon = anonymize_story(story) or "I am seeking confidential support regarding gender-based violence."
    except Exception as e:
        print("[anonymize error]", e)
        anon = "I am seeking confidential support regarding gender-based violence."

    summary = out.get("summary", "No results found.")
    return summary, records, options, anon

def make_body(anon_text, full_story, use_anon, user_email):
    core = (anon_text or "").strip() if use_anon else (full_story or "").strip()
    lines = [
        core,
        "",
        f"Reply contact: {user_email}",
        "",
        "Thank you."
    ]
    return "\n".join([l for l in lines if l is not None])

def preview_contact(dropdown_value, df_json, subject, message_text, manual_email):
    if not dropdown_value:
        return "No contact selected.", ""
    try:
        idx = int(str(dropdown_value).split(" β€” ")[0])
        rows = pd.DataFrame(df_json)
        contact = rows.iloc[idx].to_dict()

        # choose recipient from manual_email if provided & valid, else scraped email
        recipient = None
        if manual_email and EMAIL_REGEX.search(manual_email):
            recipient = manual_email.strip()
        else:
            recipient = contact.get("email") if contact.get("email") and contact.get("email")!="Not found" else "[no email]"

        org_display = contact.get('org') or contact.get('url') or "(no org)"
        html = f"""
        <h3>Preview</h3>
        <b>To:</b> {recipient}<br/>
        <b>Organization:</b> <a href="{contact.get('url')}" target="_blank" rel="noopener">{org_display}</a><br/>
        <b>Profession tag:</b> {contact.get('profession')}<br/>
        <b>Subject:</b> {subject}<br/>
        <hr/>
        <pre style="white-space:pre-wrap;">{message_text}</pre>
        """
        text = f"To: {recipient}\nOrganization: {org_display}\nSubject: {subject}\n\n{message_text[:600]}{'...' if len(message_text)>600 else ''}"
        return text, html
    except Exception as e:
        return f"Preview error: {e}", ""

def confirm_action(mode, dropdown_value, df_json, subject, message_text,
                   user_email, sender_email, sender_password, logo_url, manual_email):
    """
    mode: "Draft only" or "Send via SMTP (Gmail)"
    manual_email: optional override to use when scraped email not found
    """
    if not dropdown_value:
        return "❌ No contact selected.", "", None

    # locate contact
    try:
        idx = int(str(dropdown_value).split(" β€” ")[0])
        rows = pd.DataFrame(df_json)
        contact = rows.iloc[idx].to_dict()
    except Exception as e:
        return f"❌ Selection error: {e}", "", None

    scraped_recipient = contact.get("email")
    # use manual if valid
    recipient = None
    if manual_email and EMAIL_REGEX.search(manual_email):
        recipient = manual_email.strip()
    elif scraped_recipient and scraped_recipient != "Not found":
        recipient = scraped_recipient

    if mode.startswith("Send"):
        # Validate required fields
        if not recipient:
            return "❌ No recipient email found β€” either pick a contact with an email or provide a manual email.", "", None
        if not user_email or "@" not in user_email:
            return "❌ Please enter your email (so the organisation can contact you).", "", None
        if not sender_email or not sender_password:
            return "❌ Sender email and app password are required for SMTP sending.", "", None

        status = send_ally_ai_email(
            to_email=recipient,
            subject=subject,
            body=message_text,
            user_email=user_email,
            sender_email=sender_email,
            sender_password=sender_password,
            ai_name=ALLY_AI_NAME,
            logo_url=logo_url or ALLY_AI_LOGO_URL_DEFAULT
        )
        _, eml_path = build_mailto_and_eml(recipient, subject, message_text, default_from=sender_email)
        file_out = eml_path if eml_path and os.path.exists(eml_path) else None
        return status, "", file_out
    else:
        # Draft-only path (mailto + .eml)
        recip_for_draft = recipient or ""
        mailto, eml_path = build_mailto_and_eml(recip_for_draft, subject, message_text, default_from="noreply@ally.ai")
        if eml_path and os.path.exists(eml_path) and os.path.getsize(eml_path) > 0:
            html_link = f'<a href="{mailto}" target="_blank" rel="noopener">Open draft in email client</a>'
            file_out = eml_path
            return "βœ… Draft created (no email sent).", html_link, file_out
        elif eml_path and os.path.exists(eml_path):
            # file exists but is empty
            return "⚠️ Draft file created but it's empty. Check the message body or try manual email.", "", eml_path
        else:
            return "❌ Failed to create draft file.", "", None


with gr.Blocks() as demo:
    gr.Markdown("## Ally AI β€” GBV Help Finder & Email Assistant\n"
                "This tool searches local professionals/organizations lets you select a contact or enter an email manually, and creates an email draft or sends a branded email via Gmail"
                "**Privacy tip:** Prefer anonymized summaries unless you’re comfortable sharing details.")

    with gr.Row():
        story_in   = gr.Textbox(label="Your story (free text)", lines=6, placeholder="Describe your situation and the help you want...")
        country_in = gr.Textbox(value=DEFAULT_COUNTRY, label="Country (to bias search)")

    search_btn = gr.Button("Search for professionals")
    summary_out = gr.Textbox(label="Search summary (AI)", interactive=False)
    # updated headers: use org (organization name) instead of article title
    results_table = gr.Dataframe(headers=["org","url","email","phone","profession","source_query"], label="Search results")

    dropdown_sel = gr.Dropdown(label="Select organization (from results)", choices=[])

    with gr.Row():
        use_anon = gr.Checkbox(value=True, label="Use anonymized summary (recommended)")
        anon_out = gr.Textbox(label="Anonymized summary", lines=3)
    user_email_in = gr.Textbox(label="Your email (for the organisation to reply to you)")

    gr.Markdown("### Compose message")
    subject_in = gr.Textbox(value="Request for GBV support", label="Email subject")
    message_in = gr.Textbox(label="Message body", lines=10)

    # Manual override for organization email (new)
    manual_email_in = gr.Textbox(label="Manual org email (optional)")

    with gr.Accordion("Sending options (for automatic sending via Ally AI SMTP)", open=False):
        mode = gr.Radio(choices=["Draft only (mailto + .eml)", "Send via SMTP (Gmail)"], value="Draft only (mailto + .eml)", label="Delivery mode")
        sender_email_in = gr.Textbox(label="Ally AI sender email (Gmail account)")
        sender_pass_in  = gr.Textbox(label="Ally AI sender app password", type="password")
        logo_url_in     = gr.Textbox(value=ALLY_AI_LOGO_URL_DEFAULT, label="Ally AI logo URL")

    with gr.Row():
        preview_btn = gr.Button("Preview")
        confirm_btn = gr.Button("Confirm (Create Draft or Send)")

    preview_text_out = gr.Textbox(label="Preview (text)", interactive=False)
    preview_html_out = gr.HTML()
    status_out = gr.Textbox(label="Status", interactive=False)
    mailto_html_out = gr.HTML()
    eml_file_out = gr.File(label="Download .eml")

    # Wire: Search
    def _on_search(story, country):
        s, records, options, anon = run_search(story, country)
        prefill = make_body(anon, story, True, "")  # user email unknown yet
        # return updated dropdown choices (value is first option)
        return s, records, gr.update(choices=options, value=(options[0] if options else None)), anon, prefill

    search_btn.click(_on_search,
                     inputs=[story_in, country_in],
                     outputs=[summary_out, results_table, dropdown_sel, anon_out, message_in])

    # When user toggles anonymized vs full story, refresh the message body
    def _refresh_body(use_anon_flag, anon_text, story, user_email):
        return make_body(anon_text, story, use_anon_flag, user_email)

    use_anon.change(_refresh_body, inputs=[use_anon, anon_out, story_in, user_email_in], outputs=message_in)
    user_email_in.change(_refresh_body, inputs=[use_anon, anon_out, story_in, user_email_in], outputs=message_in)
    anon_out.change(_refresh_body, inputs=[use_anon, anon_out, story_in, user_email_in], outputs=message_in)
    story_in.change(_refresh_body, inputs=[use_anon, anon_out, story_in, user_email_in], outputs=message_in)

    # Preview
    preview_btn.click(preview_contact,
                      inputs=[dropdown_sel, results_table, subject_in, message_in, manual_email_in],
                      outputs=[preview_text_out, preview_html_out])

    # Confirm (create draft or send) - manual_email_in passed as last arg
    confirm_btn.click(confirm_action,
                      inputs=[mode, dropdown_sel, results_table, subject_in, message_in,
                              user_email_in, sender_email_in, sender_pass_in, logo_url_in, manual_email_in],
                      outputs=[status_out, mailto_html_out, eml_file_out])

demo.launch(share=False)