File size: 50,532 Bytes
c5f39f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
import os
import sys
import re
import json
import time
import logging
import gc
import asyncio
import aiohttp
import random
from typing import Optional, Dict, Any
from fastapi import FastAPI, APIRouter, HTTPException
from pydantic import BaseModel
from google import genai
from google.genai import types
from newspaper import Article
import trafilatura

# Supabase Config
SUPABASE_URL = "https://iiwbixdrrhejkthxygak.supabase.co"
SUPABASE_KEY = os.getenv("SUPA_KEY")
SUPABASE_ROLE_KEY = os.getenv("SUPA_SERVICE_KEY")
if not SUPABASE_KEY or not SUPABASE_ROLE_KEY:
    raise ValueError("❌ SUPA_KEY or SUPA_SERVICE_KEY not set in environment!")
SUPABASE_HEADERS = {
    "apikey": SUPABASE_KEY,
    "Authorization": f"Bearer {SUPABASE_KEY}",
    "Content-Type": "application/json"
}
SUPABASE_ROLE_HEADERS = {
    "apikey": SUPABASE_ROLE_KEY,
    "Authorization": f"Bearer {SUPABASE_ROLE_KEY}",
    "Content-Type": "application/json"
}

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger("news-filter-api")

http_session = None

# Lista de User-Agents realistas para rotacionar
USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
]

async def get_http_session():
    global http_session
    if http_session is None:
        connector = aiohttp.TCPConnector(
            limit=20,
            limit_per_host=10,
            ttl_dns_cache=300,
            use_dns_cache=True,
            keepalive_timeout=30,
            enable_cleanup_closed=True
        )
        timeout = aiohttp.ClientTimeout(total=30, connect=5)
        http_session = aiohttp.ClientSession(
            connector=connector,
            timeout=timeout
        )
    return http_session

def get_realistic_headers():
    """Retorna headers realistas para evitar bloqueios"""
    return {
        'User-Agent': random.choice(USER_AGENTS),
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Cache-Control': 'max-age=0'
    }

async def extract_article_text(url: str) -> str:
    """Extrai o texto completo de uma notícia usando newspaper3k com fallback para trafilatura"""
    try:
        # Método 1: newspaper3k com headers realistas
        try:
            article = Article(url)
            article.config.browser_user_agent = random.choice(USER_AGENTS)
            article.config.request_timeout = 10
            article.config.number_threads = 1
            
            article.download()
            article.parse()
            
            if article.text and len(article.text.strip()) > 100:
                return article.text.strip()
            
        except Exception:
            pass
        
        # Método 2: trafilatura como fallback
        session = await get_http_session()
        headers = get_realistic_headers()
        
        # Adiciona um pequeno delay para parecer mais humano
        await asyncio.sleep(random.uniform(1, 3))
        
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                html = await response.text()
                extracted_text = trafilatura.extract(html)
                
                if extracted_text and len(extracted_text.strip()) > 100:
                    return extracted_text.strip()
        
        return ""
        
    except Exception as e:
        log.error(f"Erro ao extrair texto da URL {url}: {str(e)}")
        return ""

async def fetch_unused_news():
    """Busca uma notícia não usada do Supabase"""
    try:
        session = await get_http_session()
        url = f"{SUPABASE_URL}/rest/v1/news_extraction"
        params = {
            "used": "eq.false",
            "limit": "1",
            "order": "created_at.asc"
        }
        
        async with session.get(url, headers=SUPABASE_HEADERS, params=params) as response:
            if response.status != 200:
                raise HTTPException(status_code=500, detail="Erro ao buscar notícia")
            
            data = await response.json()
            if not data:
                raise HTTPException(status_code=404, detail="Nenhuma notícia disponível")
            
            return data[0]
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Erro Supabase: {str(e)}")

async def fetch_last_50_titles():
    """Busca os últimos 50 títulos da tabela news ordenados por created_at"""
    try:
        session = await get_http_session()
        url = f"{SUPABASE_URL}/rest/v1/news"
        params = {
            "select": "title_pt",
            "limit": "50",
            "order": "created_at.desc"
        }
        
        async with session.get(url, headers=SUPABASE_HEADERS, params=params) as response:
            if response.status != 200:
                log.warning("Erro ao buscar títulos anteriores")
                return []
            
            data = await response.json()
            titles = [item.get("title_pt", "") for item in data if item.get("title_pt")]
            return titles
    except Exception as e:
        log.warning(f"Erro ao buscar últimos títulos: {str(e)}")
        return []

async def insert_news_to_db(title: str, text: str, news_id: str, url: str, image_url: str, filters: dict):
    """Insere notícia na tabela news com dados originais e filtros"""
    try:
        session = await get_http_session()
        supabase_url = f"{SUPABASE_URL}/rest/v1/news"
        
        payload = {
            "title_en": title,
            "text_en": text,
            "news_id": news_id,
            "url": url,
            "image": image_url,
            "death_related": filters.get("death_related", False),
            "political_related": filters.get("political_related", False),
            "woke_related": filters.get("woke_related", False),
            "spoilers": filters.get("spoilers", False),
            "sensitive_theme": filters.get("sensitive_theme", False),
            "contains_video": filters.get("contains_video", False),
            "is_news_content": filters.get("is_news_content", True),
            "relevance": filters.get("relevance", ""),
            "brazil_interest": filters.get("brazil_interest", False),
            "breaking_news": filters.get("breaking_news", False),
            "audience_age_rating": filters.get("audience_age_rating", ""),
            "regional_focus": filters.get("regional_focus", ""),
            "country_focus": filters.get("country_focus", ""),
            "ideological_alignment": filters.get("ideological_alignment", ""),
            "entity_type": filters.get("entity_type", ""),
            "entity_name": filters.get("entity_name", ""),
            "duplication": filters.get("duplication", False)
        }
        
        async with session.post(supabase_url, headers=SUPABASE_ROLE_HEADERS, json=payload) as response:
            if response.status not in [200, 201]:
                response_text = await response.text()
                raise HTTPException(status_code=500, detail=f"Erro ao inserir notícia: {response.status} - {response_text}")
            
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Erro ao inserir: {str(e)}")

async def mark_news_as_used(news_id: str):
    """Marca notícia como usada - SEMPRE deve funcionar para evitar loops infinitos"""
    try:
        session = await get_http_session()
        url = f"{SUPABASE_URL}/rest/v1/news_extraction"
        params = {"news_id": f"eq.{news_id}"}
        
        payload = {"used": True}
        
        async with session.patch(url, headers=SUPABASE_ROLE_HEADERS, json=payload, params=params) as response:
            if response.status not in [200, 201, 204]:
                log.warning(f"Erro ao marcar {news_id} como usada, mas continuando...")
            
    except Exception as e:
        log.warning(f"Erro ao atualizar notícia {news_id}: {str(e)}")

def extract_json(text):
    match = re.search(r'\{.*\}', text, flags=re.DOTALL)
    return match.group(0) if match else text

def ensure_filter_order(filter_dict: Dict[str, Any]) -> Dict[str, Any]:
    ordered_keys = [
        "death_related", "political_related", "woke_related", "spoilers", 
        "sensitive_theme", "contains_video", "is_news_content", "relevance",
        "brazil_interest", "breaking_news", "audience_age_rating", "regional_focus",
        "country_focus", "ideological_alignment", "entity_type", "entity_name", "duplication"
    ]
    
    return {key: filter_dict[key] for key in ordered_keys if key in filter_dict}

async def filter_news(title: str, content: str, last_titles: list) -> dict:
    try:
        client = genai.Client(
            api_key=os.environ.get("GEMINI_API_KEY"),
        )

        model = "gemini-2.5-flash-lite"

        # Instruções do sistema
        SYSTEM_INSTRUCTIONS = """
        Analyze the news title and content, and return the filters in JSON format with the defined fields.
Please respond ONLY with the JSON filter, do NOT add any explanations, system messages, or extra text.

death_related (true | false): Whether the news involves the real-life death of a person. Does not include fictional character deaths or deaths within stories.
political_related (true | false): Related to real-world politics (governments, elections, politicians, or official decisions). Not about political storylines in fiction.
woke_related (true | false): Involves social issues like inclusion, diversity, racism, gender, LGBTQIA+, etc.
spoilers (true | false): Reveals important plot points (e.g., character deaths, endings, major twists).
sensitive_theme (true | false): Covers sensitive or disturbing topics like suicide, abuse, violence, or tragedy.
contains_video (true | false): The news includes an embedded video (e.g., trailer, teaser, interview, video report).
is_news_content (true | false): Whether the content is actual news reporting. True for breaking news, announcements, factual reports. False for reviews, opinion pieces, lists, rankings, recommendations, critiques, analysis, or editorial content.
relevance ("low" | "medium" | "high" | "viral"): The expected public interest or impact of the news.
brazil_interest (true | false): True only if the news topic has a clear and direct impact, relevance, or interest for the Brazilian audience. This includes:

Events, releases, or announcements happening in Brazil or significant international announcements.
Content (movies, series, sports, games, music) officially available in Brazil.
People, teams, companies, brands, or productions that are relevant and recognized by the Brazilian audience.
International celebrities, athletes, or artists with significant fan bases in Brazil.

Do not mark as true if the content is unknown to most of the Brazilian population or if the actors, artists, or productions do not have notable recognition in the country.

Examples:

"Couple on 'House Hunters' with a 30-year age difference shocks viewers" — TRUE (In Brazil, House Hunters is Em Busca da Casa Perfeita, so it is available)
"Wild Bill Wichrowski from 'Deadliest Catch' will miss the 21st season after battling prostate cancer" — TRUE (Because Deadliest Catch is known in Brazil as Pesca Mortal)
"Loni Anderson, star of 'WKRP in Cincinnati,' dies at 79" — FALSE (Few people know her in Brazil, and WKRP in Cincinnati is not available there)
"The 'forgotten' film in the 'Conjuring' universe: why 'The Curse of La Llorona' is considered the worst of the franchise" — TRUE
"Rose Byrne collapses: new A24 film described as a 'test of endurance'" — TRUE (Rose Byrne is well-known in Brazil)
"Star Trek: how to understand the timeline of one of the greatest sci-fi sagas" — TRUE
"Crisis at Mubi: top filmmakers, including Israelis, demand boycott over ties to military investor" — TRUE (Mubi operates in Brazil)
"Liam Neeson and Joe Keery face biological terror in the trailer for Cold Storage" — TRUE (Joe Keery is well-known in Brazil for Stranger Things)
"TIFF 2025: from John Candy to Lucrecia Martel, meet the documentaries of the year" — TRUE (Toronto International Film Festival is one of the most famous independent festivals, so it is considered relevant to Brazil)
"TIFF 2025: festival announces documentaries with Lucrecia Martel and a production by Barack and Michelle Obama" — TRUE (Toronto International Film Festival is well-known, relevant to Brazil)
"'Stranger Things' universe expands: animated series and stage play confirmed" — TRUE (Stranger Things is well-known in Brazil)
"New Park Chan-wook film with stars from 'Squid Game' and 'Landing on Love' will open a film festival" — TRUE (No Other Choice features a famous actor from Squid Game)
"Francis Ford Coppola hospitalized in Rome, but reassures fans: 'I'm fine'" — TRUE (Francis Coppola is internationally known)
"Ken Jennings used 'Who Wants to Be a Millionaire?' to provoke a rival, but the scene was cut" — FALSE (This program is not Brazilian; Brazil has its own more popular version)
"Canelo vs. Crawford: Netflix confirms fight of the century without pay-per-view cost" — TRUE (Even though they are not Brazilian, fights usually attract worldwide interest)

breaking_news (true | false): The content is urgent or part of a recent and unfolding event.
audience_age_rating ("L" | 10 | 12 | 14 | 16 | 18): Content rating based on Brazilian standards.
regional_focus ("global" | "americas" | "europe" | "asia" | "africa" | "middle_east" | "oceania"): The main geographic region the news relates to.
country_focus (ISO 3166-1 alpha-2 code like "br", "us", "fr", "jp" or null): The specific country the news is about, if applicable.
ideological_alignment ("left" | "center-left" | "center" | "center-right" | "right" | "apolitical"): The perceived political bias of the article.
entity_type ("movie" | "series" | "event" | "person" | "place" | "other"): The type of main subject mentioned in the news.
entity_name (string): The name of the person, title, event, or topic the article is primarily about.
duplication (true | false): Whether the current news is a duplicate or highly similar to any of the previously published news titles (Last titles).
        """

        # Formata os últimos títulos para incluir no prompt - aumentado para 25 títulos
        last_titles_formatted = "\n- ".join(last_titles[:25]) if last_titles else "No previous titles available"

        # Primeiro exemplo - SÉRIE HBO RENOVADA
        EXAMPLE_INPUT_1 = f"""Title: 'The Gilded Age' Renewed for Season 4 at HBO — Everything We Know So Far
Content: The Gilded Age will return. HBO announced on Monday, July 28, that the series has been renewed for Season 4. This comes after the release of Season 3 Episode 6 on Sunday, July 27. There are two episodes left to go in the third season. The Season 3 finale will air on Sunday, August 10, on HBO. According to HBO, total premiere-night viewing for the third season has grown for five consecutive weeks, culminating in a 20 percent growth compared to last season. Fan engagement has also climbed, with social chatter rising nearly 60 percent week over week. The show has also received its most critical acclaim to date with Season 3, its highest-stakes season so far. In the July 27 episode, the series that's known for its low stakes but high-camp drama, a character was seemingly killed off in violent (for The Gilded Age) fashion. The show is already Emmy-winning. Production designer Bob Shaw took home an Emmy for
Last titles:
- 'Quarteto Fantástico: Primeiros Passos' dispara para arrecadar US$ 118 milhões nas bilheterias dos EUA e US$ 218 milhões globalmente
- Bilheteria: 'Quarteto Fantástico: Primeiros Passos' sobe para US$ 218 milhões globalmente, 'Superman' e 'F1' ultrapassam US$ 500 milhões
- Reboot de 'Quarteto Fantástico' da Marvel ultrapassa US$ 200 milhões globalmente"""

        EXAMPLE_OUTPUT_1 = """{
   "death_related":false,
   "political_related":false,
   "woke_related":false,
   "spoilers":false,
   "sensitive_theme":false,
   "contains_video":false,
   "is_news_content":true,
   "relevance":"low",
   "brazil_interest":true,
   "breaking_news":true,
   "audience_age_rating":14,
   "regional_focus":"americas",
   "country_focus":"us",
   "ideological_alignment":"apolitical",
   "entity_type":"series",
   "entity_name":"The Gilded Age",
   "duplication":false
}"""

        # Segundo exemplo - SEQUÊNCIA DE FILME
        EXAMPLE_INPUT_2 = f"""Title: 'My Best Friend's Wedding' Sequel in the Works: 'Materialists,' 'Past Lives' Director Celine Song to Write Screenplay
Content: A sequel to the Julia Roberts romantic comedy "My Best Friend's Wedding" is in early development at Sony Pictures. The studio has tapped "Materialists" and "Past Lives" writer-director Celine Song to pen a screenplay for the project, though she is not in talks to helm the feature. 
Last titles:
- Sequência de "The Batman" ganha data de lançamento oficial da Warner Bros
- Sequência de "The Batman" de Robert Pattinson tem data oficial de lançamento para 2026
- Warner Bros. define data de lançamento da sequência de "The Batman" para 2026
- Sequência de 'O Casamento do Meu Melhor Amigo' terá roteiro da diretora de 'Vidas Passadas'"""

        EXAMPLE_OUTPUT_2 = """{
   "death_related":false,
   "political_related":false,
   "woke_related":false,
   "spoilers":false,
   "sensitive_theme":false,
   "contains_video":false,
   "is_news_content":true,
   "relevance":"medium",
   "brazil_interest":true,
   "breaking_news":false,
   "audience_age_rating":10,
   "regional_focus":"americas",
   "country_focus":"us",
   "ideological_alignment":"apolitical",
   "entity_type":"movie",
   "entity_name":"My Best Friend's Wedding",
   "duplication":true
}"""

        # Terceiro exemplo - SÉRIE COM SPOILERS E MORTE DE PERSONAGEM
        EXAMPLE_INPUT_3 = f"""Title: 9-1-1: Death of main character shakes series, which gets new date for the 9th season
Content: The 9-1-1 universe was permanently redefined after one of the most shocking events in its history. The show's eighth season bid farewell to one of its pillars with the death of Captain Bobby Nash, played by Peter Krause, in episode 15. Now, with the renewal for a ninth season confirmed, ABC has announced a schedule change: the premiere has been moved up to Thursday, October 9, 2025. Bobby Nash's death, the first of a main cast member, leaves a leadership vacuum in Battalion 118 and sets the main narrative arc for the new episodes. Peter Krause's departure had already been signaled, but the impact of his absence will be the driving force behind the next season, which will have 18 episodes. Showrunner Tim Minear had previously stated that, despite the death, the character would still appear in specific moments in the eighth season finale, fulfilling his promise.
Last titles:
- The Batman 2 ganha data oficial de lançamento para 2026 na Warner Bros
- Datas de estreia da ABC no outono de 2025: '9-1-1', 'Nashville' e 'Grey's Anatomy' antecipadas
- Warner Bros. anuncia sequência de 'The Batman' para 2026"""

        EXAMPLE_OUTPUT_3 = """{
   "death_related":false,
   "political_related":false,
   "woke_related":false,
   "spoilers":true,
   "sensitive_theme":false,
   "contains_video":false,
   "is_news_content":true,
   "relevance":"high",
   "brazil_interest":true,
   "breaking_news":true,
   "audience_age_rating":14,
   "regional_focus":"global",
   "country_focus":null,
   "ideological_alignment":"apolitical",
   "entity_type":"series",
   "entity_name":"9-1-1",
   "duplication":true
}"""

        # Quarto exemplo - MORTE DE CELEBRIDADE
        EXAMPLE_INPUT_4 = f"""Title: Julian McMahon, 'Fantastic Four,' 'Nip/Tuck' and 'FBI: Most Wanted' Star, Dies at 56
Content: Julian McMahon, the suave Australian actor best known for his performances on "FBI: Most Wanted," "Charmed," "Nip/Tuck" and the early aughts "Fantastic Four" films, died Wednesday in Florida. He was 56 and died after a battle with cancer. McMahon's death was confirmed through his reps, who shared a statement from his wife, Kelly McMahon, in remembrance of her husband. "With an open heart, I wish to share with the world that my beloved husband, Julian McMahon, died peacefully this week after a valiant effort to overcome cancer," she said. "Julian loved life. He loved his family. He loved his friends. He loved his work, and he loved his fans. His deepest wish was to bring joy into as many lives as possible. We ask for support during this time to allow our family to grieve in privacy. And we wish for all of those to whom Julian brought joy, to continue to find joy in life. We are grateful for the memories."
Last titles:
- Mortes de Celebridades em 2025: Estrelas que Perdemos Este Ano
- Programas de TV Cancelados em 2025: Quais Séries Foram Canceladas
- Atores Australianos que Estão Fazendo Sucesso em Hollywood"""

        EXAMPLE_OUTPUT_4 = """{
   "death_related":true,
   "political_related":false,
   "woke_related":false,
   "spoilers":false,
   "sensitive_theme":true,
   "contains_video":false,
   "is_news_content":true,
   "relevance":"high",
   "brazil_interest":true,
   "breaking_news":true,
   "audience_age_rating":14,
   "regional_focus":"americas",
   "country_focus":"au",
   "ideological_alignment":"apolitical",
   "entity_type":"person",
   "entity_name":"Julian McMahon",
   "duplication":false
}"""

        # Quinto exemplo - SEQUÊNCIA DE FILME COM ELEMENTOS POLÍTICOS
        EXAMPLE_INPUT_5 = f"""Title: Mikey Madison and Jeremy Allen White Circling Lead Roles in Aaron Sorkin's 'Social Network' Sequel
Content: Mikey Madison and Jeremy Allen White are circling the lead roles for Aaron Sorkin's sequel to the 2010 Oscar winner "The Social Network," according to sources with knowledge of the project. While no offers have been made, Sorkin has met with both Madison and White about the project. The film is still very much in the development stage and has yet to receive the green light from Sony.
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme "esquecido" do universo "Invocação do Mal": entenda por que "A Maldição da Chorona" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar"""

        EXAMPLE_OUTPUT_5 = """{
   "death_related":false,
   "political_related":true,
   "woke_related":false,
   "spoilers":false,
   "sensitive_theme":false,
   "contains_video":false,
   "is_news_content":true,
   "relevance":"high",
   "brazil_interest":true,
   "breaking_news":true,
   "audience_age_rating":14,
   "regional_focus":"americas",
   "country_focus":"au",
   "ideological_alignment":"apolitical",
   "entity_type":"movie",
   "entity_name":"The Social Network",
   "duplication":false
}"""

        # Sexto exemplo - EPISÓDIO COM SPOILERS
        EXAMPLE_INPUT_6 = f"""Title: Star Trek: Strange New Worlds' Holodeck Episode Began As A Tribute To A DS9 Masterpiece [Exclusive]
Content: Spoilers for episode 4 of "Star Trek: Strange New Worlds" season 4, titled "A Space Adventure Hour," episode follow. The newest episode of "Star Trek: Strange New Worlds" — "A Space Adventure Hour," written by Dana Horgan & Kathryn Lyn — features the show going back to the past. Except, it's not a time travel episode. To test a prototype holodeck, La'an (Christina Chong) crafts a murder mystery story set in mid-20th century Hollywood where she's the detective, Amelia Moon. And the suspects are the cast and crew of a space adventure series, "The Last Frontier," that's about to be canceled. The episode has enough metatext to fill the whole Enterprise, because "The Last Frontier" is a clear stand-in for "Star Trek: The Original Series." However, the writers weren't just thinking about "TOS" when it came to "A Space Adventure Hour."
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme "esquecido" do universo "Invocação do Mal": entenda por que "A Maldição da Chorona" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar"""

        EXAMPLE_OUTPUT_6 = """{
   "death_related": false,
   "political_related": false,
   "woke_related": false,
   "spoilers": true,
   "sensitive_theme": false,
   "contains_video": false,
   "is_news_content": true,
   "relevance": "medium",
   "brazil_interest": true,
   "breaking_news": false,
   "audience_age_rating": 10,
   "regional_focus": "global",
   "country_focus": "us",
   "ideological_alignment": "apolitical",
   "entity_type": "series",
   "entity_name": "Star Trek: Strange New Worlds",
   "duplication": false
}"""

        # Sétimo exemplo - SÉRIE DE HORROR (TEMA SENSÍVEL)
        EXAMPLE_INPUT_7 = f"""Title: 'Hostel' TV Series From Eli Roth and Starring Paul Giamatti Lands at Peacock for Development (Exclusive)
Content: The "Hostel" TV series has found a home at Peacock. Variety has learned exclusively that the TV extension of the horror film franchise is currently in development at the NBCUniversal streamer. The show was previously reported to be in the works in June 2024, but no platform was attached at that time. As originally reported, Paul Giamatti is attached to star in the series, with "Hostel" mastermind Eli Roth set to write, direct, and executive produce. Chris Briggs and Mike Fleiss, who have produced all the "Hostel" films, are also executive producers. Fifth Season is the studio. Exact plot details are being kept under wraps.
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme "esquecido" do universo "Invocação do Mal": entenda por que "A Maldição da Chorona" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar"""

        EXAMPLE_OUTPUT_7 = """{
   "death_related": false,
   "political_related": false,
   "woke_related": false,
   "spoilers": false,
   "sensitive_theme": true,
   "contains_video": false,
   "is_news_content": true,
   "relevance": "medium",
   "brazil_interest": false,
   "breaking_news": false,
   "audience_age_rating": 18,
   "regional_focus": "global",
   "country_focus": "us",
   "ideological_alignment": "apolitical",
   "entity_type": "series",
   "entity_name": "Hostel",
   "duplication": false
}"""

        # Oitavo exemplo - EVENTO ESPORTIVO
        EXAMPLE_INPUT_8 = f"""Title: Is Canelo vs. Crawford Free on Netflix? Here's How to Watch the Fight 
Content: When boxing legends Saúl "Canelo" Álvarez and Terence "Bud" Crawford meet in the ring on Sept. 13, it won't just be a clash of champions — it could be a career-defining moment. For the first time ever two of the most dominant fighters of their generation will share the ring. Only one will walk away as the greatest of their era. Given the high stakes and the long tradition of pay-per-view boxing events, fans are asking: Is Canelo vs. Crawford free on Netflix? Keep scrolling to learn more.
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme "esquecido" do universo "Invocação do Mal": entenda por que "A Maldição da Chorona" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar"""

        EXAMPLE_OUTPUT_8 = """{
   "death_related": false,
   "political_related": false,
   "woke_related": false,
   "spoilers": false,
   "sensitive_theme": false,
   "contains_video": false,
   "is_news_content": true,
   "relevance": "high",
   "brazil_interest": true,
   "breaking_news": false,
   "audience_age_rating": 10,
   "regional_focus": "global",
   "country_focus": "us",
   "ideological_alignment": "apolitical",
   "entity_type": "event",
   "entity_name": "Canelo Álvarez vs. Terence Crawford",
   "duplication": false
}"""

        # Nono exemplo - MORTE DE CELEBRIDADE (DUPLICAÇÃO)
        EXAMPLE_INPUT_9 = f"""Title: Loni Anderson, Emmy- and Golden Globe-Nominated Star of 'Wkrp in Cincinnati,' Dies at 79
Content: Loni Anderson, whose beloved role as Jennifer Marlowe on "WKRP in Cincinnati" was nominated for Emmy and Golden Globe awards, has died, her publicist confirmed Sunday. She was 79.
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme "esquecido" do universo "Invocação do Mal": entenda por que "A Maldição da Chorona" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar
- Liam Neeson e Joe Keery enfrentam terror biológico no trailer de Cold Storage
- TIFF 2025: de John Candy a Lucrecia Martel, conheça os documentários do ano"""

        EXAMPLE_OUTPUT_9 = """{
   "death_related": true,
   "political_related": false,
   "woke_related": false,
   "spoilers": false,
   "sensitive_theme": false,
   "contains_video": false,
   "is_news_content": true,
   "relevance": "medium",
   "brazil_interest": false,
   "breaking_news": true,
   "audience_age_rating": 10,
   "regional_focus": "global",
   "country_focus": "us",
   "ideological_alignment": "apolitical",
   "entity_type": "person",
   "entity_name": "Loni Anderson",
   "duplication": true
}"""

        # Décimo exemplo - FILME DE FESTIVAL (BAIXA RELEVÂNCIA)
        EXAMPLE_INPUT_10 = f"""Title: Jim Jarmusch's 'Father Mother Sister Brother' Sells to Multiple Territories Ahead of Venice Premiere
Content: Jim Jarmusch's "Father Mother Sister Brother" has sold to multiple territories ahead of its world premiere in competition at the Venice Film Festival. The film stars Tom Waits, Adam Driver, Mayim Bialik, Charlotte Rampling, Cate Blanchett, Vicky Krieps, Sarah Greene, Indya Moore, Luka Sabbat and Françoise Lebrun. Distribution rights have been picked up in Italy (Lucky Red), Spain (Avalon Distribucion Audiovisual), Portugal (Nos Lusomundo), Greece (Cinobo), Poland (Gutek Film), Hungary (Cirko Films), Romania (Bad Unicorn), Former Yugoslavia (MCF MegaCom Film), Czech Republic and Slovakia (Aerofilms), Middle East and North Africa (Front Row Filmed Ent.), South Korea (Andamiro Films), and Hong Kong (Edko Films).
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme "esquecido" do universo "Invocação do Mal": entenda por que "A Maldição da Chorona" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar
- Universo 'Stranger Things' se expande: série animada e peça de teatro são confirmadas
- Wandinha: O que já sabemos sobre a 2ª temporada e os boatos que circulam na internet
- Novo filme de Park Chan-wook, 'No Other Choice', escala festivais e une estrelas
- Homem-Aranha 4: Tom Holland revela novo traje e produção de 'Um Novo Dia' começa com participações surpreendentes
- Quarteto Fantástico segue no topo das bilheterias, mas queda preocupa
- Novo filme de Jim Jarmusch com Adam Driver e Cate Blanchett será distribuído pela MUBI
- Tulsa King: 3ª temporada com Sylvester Stallone ganha data de estreia e primeiras imagens"""

        EXAMPLE_OUTPUT_10 = """{
   "death_related": false,
   "political_related": false,
   "woke_related": false,
   "spoilers": false,
   "sensitive_theme": false,
   "contains_video": false,
   "is_news_content": true,
   "relevance": "low",
   "brazil_interest": false,
   "breaking_news": false,
   "audience_age_rating": 10,
   "regional_focus": "global",
   "country_focus": "us",
   "ideological_alignment": "apolitical",
   "entity_type": "movie",
   "entity_name": "Father Mother Sister Brother",
   "duplication": true
}"""

        EXAMPLE_INPUT_11 = f"""Title: ‘AGT’: Husband & Wife Comedians Audition Against Each Other — Did Either Make the Live Shows?
Content: Press The Golden Buzzer! For exclusive news and updates, subscribe to our America's Got Talent Newsletter:\n\nAmerica’s Got Talent has seen several couples audition together over the years, but it’s rare to see a husband and wife competing against one another. But that’s exactly what happened on Tuesday’s (August 5) episode.\n\nComedian Matt O’Brien and his wife, Julia Hladkowicz, also a comic, both auditioned for the NBC competition series separately. O’Brien was up first, winning the judges over with his jokes about being married versus being single.\n\n“You are really, really good,” Howie Mandel told the Canadian comic. “You deserve to be here. You’re the kind of comedian that could go really far in this, so I want to be the first one to give you a yes.”
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme \"esquecido\" do universo \"Invocação do Mal\": entenda por que \"A Maldição da Chorona\" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar
- Universo 'Stranger Things' se expande: série animada e peça de teatro são confirmadas
- Wandinha: O que já sabemos sobre a 2ª temporada e os boatos que circulam na internet
- Novo filme de Park Chan-wook, 'No Other Choice', escala festivais e une estrelas
- Homem-Aranha 4: Tom Holland revela novo traje e produção de 'Um Novo Dia' começa com participações surpreendentes
- Quarteto Fantástico segue no topo das bilheterias, mas queda preocupa"""

        EXAMPLE_OUTPUT_11 = """{
   "death_related": false,
   "political_related": false,
   "woke_related": false,
   "spoilers": true,
   "sensitive_theme": false,
   "contains_video": false,
   "is_news_content": true,
   "relevance": "medium",
   "brazil_interest": false,
   "breaking_news": false,
   "audience_age_rating": 10,
   "regional_focus": "global",
   "country_focus": "us",
   "ideological_alignment": "apolitical",
   "entity_type": "series",
   "entity_name": "America's Got Talent",
   "duplication": false
}"""

        EXAMPLE_INPUT_12 = f"""Title: Savannah Guthrie Has Emotional Reunion With Kids Amid ’Today’ Absence
Content: Savannah Guthrie returned to Today‘s Studio 1A on Wednesday, August 6, but not before picking up her kids from summer camp.\n\nThe news anchor enjoyed the end of her two-day Today absence by reuniting with her 10-year-old daughter, Vale, and 8-year-old son, Charley. Guthrie shared several photos from the camp pick-up via her Instagram Story on Tuesday, August 5, including individual snaps of herself hugging each of her children and a group selfie the three of them took together.\n\nShe also poked fun at her children by criticizing their hygiene habits. “There is no greater act of motherly love than touching the post-camp retainer 🤢,” she hilariously wrote over a snap of one of the kids’
Last titles:
- Wild Bill Wichrowski do 'Deadliest Catch' ficará de fora da 21ª temporada após batalha contra o câncer de próstata
- Loni Anderson, estrela de 'WKRP in Cincinnati', morre aos 79 anos
- O filme \"esquecido\" do universo \"Invocação do Mal\": entenda por que \"A Maldição da Chorona\" é considerado o pior da franquia
- Rose Byrne em colapso: novo filme da A24 é descrito como 'teste de resistência'
- Jornada nas Estrelas: como entender a linha do tempo de uma das maiores sagas da ficção
- Crise na Mubi: cineastas de peso, incluindo israelenses, exigem boicote por laços com investidor militar
- Universo 'Stranger Things' se expande: série animada e peça de teatro são confirmadas
- Wandinha: O que já sabemos sobre a 2ª temporada e os boatos que circulam na internet
- Novo filme de Park Chan-wook, 'No Other Choice', escala festivais e une estrelas
- Homem-Aranha 4: Tom Holland revela novo traje e produção de 'Um Novo Dia' começa com participações surpreendentes
- Quarteto Fantástico segue no topo das bilheterias, mas queda preocupa
- Novo filme de Jim Jarmusch com Adam Driver e Cate Blanchett será distribuído pela MUBI
- Tulsa King: 3ª temporada com Sylvester Stallone ganha data de estreia e primeiras imagens"""

        EXAMPLE_OUTPUT_12 = """{
    "death_related": false,
    "political_related": false,
    "woke_related": false,
    "spoilers": false,
    "sensitive_theme": false,
    "contains_video": false,
    "is_news_content": true,
    "relevance": "medium",
    "brazil_interest": false,
    "breaking_news": false,
    "audience_age_rating": 10,
    "regional_focus": "americas",
    "country_focus": "us",
    "ideological_alignment": "apolitical",
    "entity_type": "person",
    "entity_name": "Savannah Guthrie",
    "duplication": false
}"""

        # Estrutura de conversação correta com múltiplos exemplos
        contents = [
            # Primeiro exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_1)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_1)
                ]
            ),
            # Segundo exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_2)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_2)
                ]
            ),
            # Terceiro exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_3)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_3)
                ]
            ),
            # Quarto exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_4)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_4)
                ]
            ),
            # Quinto exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_5)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_5)
                ]
            ),
            # Sexto exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_6)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_6)
                ]
            ),
            # Sétimo exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_7)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_7)
                ]
            ),
            # Oitavo exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_8)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_8)
                ]
            ),
            # Nono exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_9)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_9)
                ]
            ),
            # Décimo exemplo
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_10)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_10)
                ]
            ),
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_11)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_11)
                ]
            ),
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=EXAMPLE_INPUT_12)
                ]
            ),
            types.Content(
                role="model",
                parts=[
                    types.Part.from_text(text=EXAMPLE_OUTPUT_12)
                ]
            ),
            # Agora o usuário envia a notícia real para ser analisada
            types.Content(
                role="user",
                parts=[
                    types.Part.from_text(text=f"""Title: {title}
Content: {content}
Last titles:
- {last_titles_formatted}""")
                ]
            )
        ]

        # Ferramentas para pesquisa e pensamento
        tools = [
            types.Tool(googleSearch=types.GoogleSearch())
        ]

        config = types.GenerateContentConfig(
            system_instruction=SYSTEM_INSTRUCTIONS,
            tools=tools,
            response_mime_type="text/plain",
            max_output_tokens=4096,
            temperature=0.8,
        )

        response_text = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=config
        ):
            if chunk.text:
                response_text += chunk.text
        
        json_result = extract_json(response_text)

        try:
            parsed = json.loads(json_result)
        except json.JSONDecodeError as e:
            raise ValueError("Modelo retornou JSON inválido")

        ALLOWED_KEYS = {
            "death_related", "political_related", "woke_related", "spoilers", 
            "sensitive_theme", "contains_video", "is_news_content", "relevance",
            "brazil_interest", "breaking_news", "audience_age_rating", "regional_focus",
            "country_focus", "ideological_alignment", "entity_type", "entity_name", "duplication"
        }

        clean_filter = {key: parsed[key] for key in ALLOWED_KEYS if key in parsed}
        clean_filter = ensure_filter_order(clean_filter)

        return {"filter": clean_filter}

    except Exception as e:
        raise ValueError(f"Erro na filtragem: {str(e)}")

def should_skip_insertion(filters: dict) -> tuple[bool, str]:
    """
    Verifica se a notícia deve ser pulada (não inserida na tabela news).
    Retorna (should_skip, reason)
    """

    # Condição 1: Se duplication for true → sempre pular
    if filters.get("duplication", False):
        return True, "duplicação detectada"

    # Condição 2: Se is_news_content for false → pular
    if not filters.get("is_news_content", True):
        return True, "conteúdo não é notícia (review, lista, crítica, etc.)"

    # Condição 3: Se brazil_interest for false → pular
    if not filters.get("brazil_interest", True):
        return True, "baixo interesse para o Brasil (brazil_interest=false)"

    # Condição 4: Se relevance for low ou ausente → pular
    if filters.get("relevance", "") not in {"medium", "high", "viral"}:
        return True, f"relevância insuficiente (relevance={filters.get('relevance')})"

    # Se passou por todas, pode inserir
    return False, ""
    
app = FastAPI(title="News Filter API")
router = APIRouter()

@router.post("/filter")
async def filter_endpoint():
    news_data = None
    news_id = None
    
    try:
        # Busca notícia não usada do Supabase
        news_data = await fetch_unused_news()
        
        title = news_data.get("title", "")
        url = news_data.get("url", "")
        news_id = news_data.get("news_id", "")
        image_url = news_data.get("image", "")
        
        if not title.strip() or not url.strip():
            raise ValueError("Title e URL não podem estar vazios")
        
        log.info(f"Processando notícia {news_id}: {title}")
        
        # Busca os últimos 50 títulos
        last_titles = await fetch_last_50_titles()
        
        # Extrai texto completo da URL
        full_text = await extract_article_text(url)
        
        if not full_text.strip():
            raise ValueError("Não foi possível extrair texto da URL")
        
        # Executa análise de filtros com os últimos títulos
        filter_result = await filter_news(title, full_text, last_titles)
        
        # Verifica se deve pular a inserção
        should_skip, skip_reason = should_skip_insertion(filter_result["filter"])
        
        if should_skip:
            # Apenas marca como usada, não insere na tabela news
            await mark_news_as_used(news_id)
            log.info(f"Notícia {news_id} pulada devido a: {skip_reason}")
            
            return {
                "filter": filter_result["filter"],
                "title_en": title,
                "text_en": full_text,
                "news_id": news_id,
                "url": url,
                "image": image_url,
                "last_titles": last_titles,
                "skipped": True,
                "skip_reason": skip_reason
            }
        else:
            # Insere na tabela news com filtros
            await insert_news_to_db(title, full_text, news_id, url, image_url, filter_result["filter"])
            
            # Marca como usada (sucesso)
            await mark_news_as_used(news_id)
            
            log.info(f"Notícia {news_id} processada e inserida com sucesso")
            
            return {
                "filter": filter_result["filter"],
                "title_en": title,
                "text_en": full_text,
                "news_id": news_id,
                "url": url,
                "image": image_url,
                "last_titles": last_titles,
                "skipped": False
            }
        
    except Exception as e:
        error_msg = str(e)
        log.error(f"Erro no processamento da notícia {news_id}: {error_msg}")
        
        # SEMPRE marca como usada em caso de erro para evitar loops infinitos
        if news_id:
            await mark_news_as_used(news_id)
        
        # Determina o tipo de erro para o HTTP response
        if "Nenhuma notícia disponível" in error_msg:
            raise HTTPException(status_code=404, detail=error_msg)
        elif "Title e URL não podem estar vazios" in error_msg:
            raise HTTPException(status_code=400, detail=error_msg)
        elif "Não foi possível extrair texto" in error_msg:
            raise HTTPException(status_code=400, detail=error_msg)
        else:
            raise HTTPException(status_code=500, detail=f"Erro interno: {error_msg}")

app.include_router(router)

@app.on_event("shutdown")
async def shutdown_event():
    global http_session
    if http_session:
        await http_session.close()