Spaces:

HebaElshimy
/

systematic-reviews

Sleeping

File size: 35,798 Bytes

import gradio as gr
import pandas as pd
import requests
import json
from transformers import pipeline, AutoTokenizer, AutoModel
import torch
from sentence_transformers import SentenceTransformer, CrossEncoder
import time
from typing import List, Dict, Tuple
import re
import numpy as np

# ============================================================================
# ADVANCED NLP MODELS INITIALIZATION
# ============================================================================

print("Loading advanced models...")

# Initialize advanced models
try:
    # Cross-encoder for accurate semantic similarity
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)
    
    # Zero-shot classifier for criteria matching
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    
    # Medical sentence transformer
    sentence_model = SentenceTransformer('pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb')
    
    # PubMedBERT for medical text understanding
    pubmed_tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
    pubmed_model = AutoModel.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")
    
    print("Advanced models loaded successfully!")
    USE_ADVANCED_MODELS = True
except Exception as e:
    print(f"Warning: Could not load advanced models, falling back to basic models. Error: {e}")
    # Fallback to basic models
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    similarity_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
    USE_ADVANCED_MODELS = False
    print("Basic models loaded successfully!")

# Medical terminology expansions
MEDICAL_SYNONYMS = {
    'rct': ['randomized controlled trial', 'randomised controlled trial', 'randomized clinical trial'],
    'pain': ['pain', 'nociception', 'analgesia', 'hyperalgesia', 'allodynia', 'neuropathic pain', 
            'chronic pain', 'acute pain', 'postoperative pain', 'pain management'],
    'surgery': ['surgery', 'surgical', 'operation', 'operative', 'postoperative', 'perioperative',
               'preoperative', 'surgical procedure', 'surgical intervention'],
    'study design': ['study design', 'trial design', 'research design', 'methodology', 
                   'randomized', 'controlled', 'cohort', 'case-control', 'cross-sectional'],
}

# ============================================================================
# ADVANCED NLP FUNCTIONS
# ============================================================================

def expand_medical_terms(term: str) -> List[str]:
    """Expand medical terms with synonyms"""
    term_lower = term.lower()
    expanded = [term]
    
    for key, synonyms in MEDICAL_SYNONYMS.items():
        if key in term_lower or any(syn in term_lower for syn in synonyms):
            expanded.extend(synonyms[:3])  # Limit expansion
    
    return list(set(expanded))

def cross_encoder_score(text: str, criteria: str) -> float:
    """Calculate cross-encoder similarity score"""
    if not USE_ADVANCED_MODELS:
        return 0.5  # Default score if not available
    try:
        score = cross_encoder.predict([[text, criteria]])
        return float(1 / (1 + np.exp(-score[0])))
    except:
        return 0.5

def get_pubmed_embedding(text: str) -> np.ndarray:
    """Get PubMedBERT embedding for medical text"""
    if not USE_ADVANCED_MODELS:
        return np.zeros(768)
    
    try:
        inputs = pubmed_tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
        with torch.no_grad():
            outputs = pubmed_model(**inputs)
            embedding = outputs.last_hidden_state[:, 0, :].numpy()
        return embedding.squeeze()
    except:
        return np.zeros(768)

def zero_shot_classify(text: str, labels: List[str], hypothesis_template: str = "This study is about {}") -> Dict:
    """Perform zero-shot classification"""
    if not labels:
        return {}
    
    try:
        result = classifier(text, candidate_labels=labels[:10], hypothesis_template=hypothesis_template, multi_label=True)
        scores = {}
        for label, score in zip(result['labels'], result['scores']):
            scores[label] = score
        return scores
    except:
        return {}

# ============================================================================
# ENHANCED CRITERIA PARSING
# ============================================================================

def parse_criteria(criteria_text: str, stage: str = "stage1") -> Dict:
    """Parse criteria with medical term expansion"""
    criteria = {
        'population': [], 'intervention': [], 'comparator': [], 'outcomes': [], 
        'study_design': [], 'include_general': [], 'exclude_general': []
    }
    
    lines = criteria_text.lower().split('\n')
    current_section = None
    
    for line in lines:
        line = line.strip()
        if not line:
            continue
        
        # Detect section headers
        if any(keyword in line for keyword in ['population:', 'participants:', 'subjects:']):
            current_section = 'population'
        elif any(keyword in line for keyword in ['intervention:', 'exposure:', 'treatment:']):
            current_section = 'intervention'
        elif any(keyword in line for keyword in ['comparator:', 'control:', 'comparison:']):
            current_section = 'comparator'
        elif any(keyword in line for keyword in ['outcomes:', 'endpoint:', 'results:']):
            current_section = 'outcomes'
        elif any(keyword in line for keyword in ['study design:', 'design:', 'study type:']):
            current_section = 'study_design'
        elif 'include' in line and ':' in line:
            current_section = 'include_general'
        elif 'exclude' in line and ':' in line:
            current_section = 'exclude_general'
        elif line.startswith('-') and current_section:
            term = line[1:].strip()
            if term and len(term) > 2:
                # Expand medical terms if advanced models are available
                if USE_ADVANCED_MODELS:
                    expanded = expand_medical_terms(term)
                    criteria[current_section].extend(expanded)
                else:
                    criteria[current_section].append(term)
        elif current_section and not any(keyword in line for keyword in ['include', 'exclude', 'population', 'intervention', 'comparator', 'outcomes', 'study']):
            terms = [t.strip() for t in line.split(',') if t.strip() and len(t.strip()) > 2]
            if USE_ADVANCED_MODELS:
                for term in terms:
                    expanded = expand_medical_terms(term)
                    criteria[current_section].extend(expanded)
            else:
                criteria[current_section].extend(terms)
    
    # Remove duplicates
    for key in criteria:
        criteria[key] = list(set(criteria[key]))
    
    return criteria

# ============================================================================
# ENHANCED STAGE 1 CLASSIFICATION
# ============================================================================

def semantic_similarity_score(study_text: str, criteria_terms: List[str]) -> Tuple[float, str]:
    """Calculate semantic similarity with advanced models if available"""
    if not criteria_terms:
        return 0.0, ""
    
    best_score, best_match = 0.0, ""
    
    if USE_ADVANCED_MODELS:
        # Use cross-encoder for more accurate matching
        for term in criteria_terms[:5]:  # Limit to avoid slowdown
            score = cross_encoder_score(study_text, term)
            if score > best_score:
                best_score, best_match = score, term
    else:
        # Fallback to basic embedding similarity
        study_embedding = get_text_embedding(study_text)
        for term in criteria_terms:
            term_embedding = get_text_embedding(term)
            similarity = cosine_similarity(study_embedding, term_embedding)
            if similarity > best_score:
                best_score, best_match = similarity, term
    
    return best_score, best_match

def cosine_similarity(a, b):
    """Simple cosine similarity calculation"""
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b) if norm_a > 0 and norm_b > 0 else 0

def get_text_embedding(text):
    """Get text embedding using the similarity model"""
    if USE_ADVANCED_MODELS:
        try:
            embedding = sentence_model.encode(text)
            return embedding
        except:
            return np.zeros(384)
    else:
        try:
            if 'similarity_model' in globals():
                embeddings = similarity_model(text)
                return np.mean(embeddings[0], axis=0)
            else:
                return np.zeros(384)
        except:
            return np.zeros(384)

def stage1_classification(title: str, abstract: str, criteria_text: str) -> Dict:
    """Enhanced Stage 1 classification with advanced NLP when available"""
    
    study_text = f"{title} {abstract}".lower()
    if len(study_text.strip()) < 20:
        return {'decision': 'UNCLEAR', 'confidence': 20, 'reasoning': 'Insufficient text', 'stage': 1}
    
    criteria = parse_criteria(criteria_text, "stage1")
    
    # Use zero-shot classification if available with advanced models
    if USE_ADVANCED_MODELS and criteria['include_general']:
        zs_scores = zero_shot_classify(
            study_text, 
            criteria['include_general'][:5],
            "This study is relevant to {}"
        )
        if zs_scores:
            max_zs_score = max(zs_scores.values())
            if max_zs_score > 0.7:
                return {
                    'decision': 'INCLUDE',
                    'confidence': min(int(max_zs_score * 100), 85),
                    'reasoning': f"Stage 1 INCLUDE: High relevance to inclusion criteria ({max_zs_score:.2f})",
                    'stage': 1
                }
    
    # Calculate PICOS scores with appropriate thresholds
    pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
    int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
    out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
    design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
    inc_score, inc_match = semantic_similarity_score(study_text, criteria['include_general'])
    exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
    
    # Adjust thresholds based on model availability
    threshold = 0.4 if USE_ADVANCED_MODELS else 0.25
    
    reasoning_parts = []
    if pop_score > threshold: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
    if int_score > threshold: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
    if out_score > threshold: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
    if design_score > threshold: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
    if inc_score > threshold: reasoning_parts.append(f"Include: '{inc_match}' ({inc_score:.2f})")
    if exc_score > threshold: reasoning_parts.append(f"Exclude: '{exc_match}' ({exc_score:.2f})")
    
    # Decision Logic
    exc_threshold = 0.5 if USE_ADVANCED_MODELS else 0.35
    if exc_score > exc_threshold:
        decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 90)
        reasoning = f"Stage 1 EXCLUDE: {'; '.join(reasoning_parts)}"
    elif sum([pop_score > threshold, int_score > threshold, out_score > threshold]) >= 2 and USE_ADVANCED_MODELS:
        avg_score = np.mean([s for s in [pop_score, int_score, out_score, design_score, inc_score] if s > threshold])
        decision, confidence = 'INCLUDE', min(int(avg_score * 85), 85)
        reasoning = f"Stage 1 INCLUDE (Advanced): {'; '.join(reasoning_parts)}"
    elif sum([pop_score > 0.25, int_score > 0.25, out_score > 0.25]) >= 1:
        avg_score = np.mean([s for s in [pop_score, int_score, out_score, design_score, inc_score] if s > 0.25])
        decision, confidence = 'INCLUDE', min(int(avg_score * 75), 80)
        reasoning = f"Stage 1 INCLUDE: {'; '.join(reasoning_parts)}"
    else:
        decision, confidence = 'UNCLEAR', 40
        reasoning = f"Stage 1 UNCLEAR: {'; '.join(reasoning_parts) if reasoning_parts else 'No clear matches'}"
    
    return {'decision': decision, 'confidence': confidence, 'reasoning': reasoning, 'stage': 1}

# ============================================================================
# STAGE 2 CLASSIFICATION (keeping original)
# ============================================================================

def stage2_classification(title: str, abstract: str, full_text: str, criteria_text: str, 
                         data_extraction_fields: Dict = None) -> Dict:
    """Stage 2: Detailed full-text screening with data extraction"""
    
    # Combine all available text
    study_text = f"{title} {abstract} {full_text}".lower()
    
    if len(study_text.strip()) < 50:
        return {'decision': 'UNCLEAR', 'confidence': 25, 'reasoning': 'Insufficient full text', 'stage': 2}
    
    criteria = parse_criteria(criteria_text, "stage2")
    
    # More stringent scoring for Stage 2
    pop_score, pop_match = semantic_similarity_score(study_text, criteria['population'])
    int_score, int_match = semantic_similarity_score(study_text, criteria['intervention'])
    comp_score, comp_match = semantic_similarity_score(study_text, criteria['comparator'])
    out_score, out_match = semantic_similarity_score(study_text, criteria['outcomes'])
    design_score, design_match = semantic_similarity_score(study_text, criteria['study_design'])
    exc_score, exc_match = semantic_similarity_score(study_text, criteria['exclude_general'])
    
    # Data extraction scoring
    extraction_scores = {}
    if data_extraction_fields:
        for field, terms in data_extraction_fields.items():
            if terms:
                field_score, field_match = semantic_similarity_score(study_text, terms)
                extraction_scores[field] = {'score': field_score, 'match': field_match}
    
    reasoning_parts = []
    if pop_score > 0.3: reasoning_parts.append(f"Population: '{pop_match}' ({pop_score:.2f})")
    if int_score > 0.3: reasoning_parts.append(f"Intervention: '{int_match}' ({int_score:.2f})")
    if comp_score > 0.3: reasoning_parts.append(f"Comparator: '{comp_match}' ({comp_score:.2f})")
    if out_score > 0.3: reasoning_parts.append(f"Outcome: '{out_match}' ({out_score:.2f})")
    if design_score > 0.3: reasoning_parts.append(f"Design: '{design_match}' ({design_score:.2f})")
    if exc_score > 0.3: reasoning_parts.append(f"Exclusion: '{exc_match}' ({exc_score:.2f})")
    
    # Stage 2 Decision Logic (High Specificity)
    if exc_score > 0.4:
        decision, confidence = 'EXCLUDE', min(int(exc_score * 100), 95)
        reasoning = f"Stage 2 EXCLUDE: {'; '.join(reasoning_parts)}"
    elif sum([pop_score > 0.4, int_score > 0.4, out_score > 0.4, design_score > 0.4]) >= 3:
        avg_score = np.mean([pop_score, int_score, comp_score, out_score, design_score])
        decision, confidence = 'INCLUDE', min(int(avg_score * 85), 92)
        reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
    elif max(pop_score, int_score, out_score) > 0.5:
        decision, confidence = 'INCLUDE', min(int(max(pop_score, int_score, out_score) * 80), 88)
        reasoning = f"Stage 2 INCLUDE: {'; '.join(reasoning_parts)}"
    else:
        decision, confidence = 'EXCLUDE', 60
        reasoning = f"Stage 2 EXCLUDE: Insufficient criteria match. {'; '.join(reasoning_parts)}"
    
    result = {
        'decision': decision, 
        'confidence': confidence, 
        'reasoning': reasoning, 
        'stage': 2,
        'extraction_data': extraction_scores
    }
    
    return result

# ============================================================================
# PROCESSING FUNCTIONS (keeping original structure)
# ============================================================================

def process_stage1(file, title_col, abstract_col, criteria, sample_size):
    """Process Stage 1 screening with enhanced NLP"""
    try:
        df = pd.read_csv(file.name)
        if sample_size < len(df):
            df = df.head(sample_size)
        
        results = []
        for idx, row in df.iterrows():
            title = str(row[title_col]) if pd.notna(row[title_col]) else ""
            abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
            
            if not title and not abstract:
                continue
            
            classification = stage1_classification(title, abstract, criteria)
            
            result = {
                'Study_ID': idx + 1,
                'Title': title[:100] + "..." if len(title) > 100 else title,
                'Stage1_Decision': classification['decision'],
                'Stage1_Confidence': f"{classification['confidence']}%",
                'Stage1_Reasoning': classification['reasoning'],
                'Ready_for_Stage2': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
                'Full_Title': title,
                'Full_Abstract': abstract
            }
            results.append(result)
        
        results_df = pd.DataFrame(results)
        
        # Summary for Stage 1
        total = len(results_df)
        included = len(results_df[results_df['Stage1_Decision'] == 'INCLUDE'])
        excluded = len(results_df[results_df['Stage1_Decision'] == 'EXCLUDE'])
        unclear = len(results_df[results_df['Stage1_Decision'] == 'UNCLEAR'])
        
        model_info = "**Using Advanced Medical NLP Models**" if USE_ADVANCED_MODELS else "**Using Basic NLP Models**"
        
        summary = f"""
## 📊 Stage 1 (Title/Abstract) Results

{model_info}

**Screening Complete:**
- **Total Studies:** {total}
- **Include for Stage 2:** {included} ({included/total*100:.1f}%)
- **Exclude:** {excluded} ({excluded/total*100:.1f}%)
- **Needs Manual Review:** {unclear} ({unclear/total*100:.1f}%)

**Next Steps:**
1. Review {unclear} studies marked as UNCLEAR
2. Proceed to Stage 2 with {included} included studies
3. Obtain full texts for Stage 2 screening
        """
        
        return summary, results_df, results_df.to_csv(index=False)
        
    except Exception as e:
        return f"Error: {str(e)}", None, ""

def process_stage2(file, title_col, abstract_col, fulltext_col, criteria, extraction_fields, sample_size):
    """Process Stage 2 screening with data extraction"""
    try:
        df = pd.read_csv(file.name)
        
        # Filter to only Stage 1 included studies if column exists
        if 'Stage1_Decision' in df.columns:
            df = df[df['Stage1_Decision'] == 'INCLUDE']
        
        if sample_size < len(df):
            df = df.head(sample_size)
        
        # Parse extraction fields
        extraction_dict = {}
        if extraction_fields:
            for line in extraction_fields.split('\n'):
                if ':' in line:
                    field, terms = line.split(':', 1)
                    extraction_dict[field.strip()] = [t.strip() for t in terms.split(',') if t.strip()]
        
        results = []
        for idx, row in df.iterrows():
            title = str(row[title_col]) if pd.notna(row[title_col]) else ""
            abstract = str(row[abstract_col]) if pd.notna(row[abstract_col]) else ""
            full_text = str(row[fulltext_col]) if fulltext_col and fulltext_col in df.columns and pd.notna(row[fulltext_col]) else ""
            
            if not title and not abstract:
                continue
            
            classification = stage2_classification(title, abstract, full_text, criteria, extraction_dict)
            
            result = {
                'Study_ID': idx + 1,
                'Title': title[:100] + "..." if len(title) > 100 else title,
                'Stage2_Decision': classification['decision'],
                'Stage2_Confidence': f"{classification['confidence']}%",
                'Stage2_Reasoning': classification['reasoning'],
                'Final_Include': 'Yes' if classification['decision'] == 'INCLUDE' else 'No',
                'Extraction_Data': str(classification.get('extraction_data', {})),
                'Full_Title': title,
                'Full_Abstract': abstract,
                'Full_Text': full_text
            }
            results.append(result)
        
        results_df = pd.DataFrame(results)
        
        # Summary for Stage 2
        total = len(results_df)
        final_included = len(results_df[results_df['Stage2_Decision'] == 'INCLUDE'])
        final_excluded = len(results_df[results_df['Stage2_Decision'] == 'EXCLUDE'])
        
        summary = f"""
## 📊 Stage 2 (Full-Text) Results

**Detailed Screening Complete:**
- **Studies Reviewed:** {total}
- **Final INCLUDE:** {final_included} ({final_included/total*100:.1f}%)
- **Final EXCLUDE:** {final_excluded} ({final_excluded/total*100:.1f}%)

**Ready for Next Steps:**
- **Data Extraction:** {final_included} studies
- **Quality Assessment:** {final_included} studies  
- **Evidence Synthesis:** Ready to proceed

**Recommended Actions:**
1. Export {final_included} included studies for detailed data extraction
2. Conduct quality assessment (ROB2, ROBINS-I, etc.)
3. Begin evidence synthesis and meta-analysis planning
        """
        
        return summary, results_df, results_df.to_csv(index=False)
        
    except Exception as e:
        return f"Error: {str(e)}", None, ""

# ============================================================================
# ORIGINAL INTERFACE (PRESERVED)
# ============================================================================

def create_interface():
    with gr.Blocks(title="🔬 2-Stage Systematic Review AI Assistant", theme=gr.themes.Soft()) as interface:
        
        gr.Markdown("""
        # 🔬 2-Stage Systematic Review AI Assistant
        
        **Complete workflow for evidence-based systematic reviews**
        
        This tool supports the full 2-stage systematic review process:
        - **Stage 1:** Title/Abstract screening (high sensitivity)
        - **Stage 2:** Full-text screening with data extraction (high specificity)
        """)
        
        with gr.Tabs():
            
            # STAGE 1 TAB
            with gr.TabItem("📋 Stage 1: Title/Abstract Screening"):
                with gr.Row():
                    with gr.Column(scale=1):
                        gr.Markdown("### 📁 Upload Study Data")
                        
                        stage1_file = gr.File(
                            label="Upload Studies (CSV) - Search results from databases",
                            file_types=[".csv"],
                            type="filepath"
                        )
                        
                        with gr.Row():
                            stage1_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
                            stage1_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
                        
                        stage1_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=500, value=100, step=5)
                    
                    with gr.Column(scale=1):
                        gr.Markdown("### 🎯 Stage 1 Criteria (Broad/Sensitive)")
                        
                        stage1_criteria = gr.Textbox(
                            label="Inclusion/Exclusion Criteria for Stage 1",
                            value="""POPULATION:
- Adult participants
- Human studies

INTERVENTION:
- [Your intervention/exposure of interest]

OUTCOMES:
- [Primary outcomes of interest]

STUDY DESIGN:
- Randomized controlled trials
- Cohort studies
- Case-control studies

EXCLUDE:
- Animal studies
- Case reports
- Reviews (unless relevant)""",
                            lines=15
                        )
                
                stage1_process_btn = gr.Button("🚀 Start Stage 1 Screening", variant="primary")
                
                stage1_results = gr.Markdown()
                stage1_table = gr.Dataframe(label="Stage 1 Results")
                stage1_download_data = gr.Textbox(visible=False)
                stage1_download_btn = gr.DownloadButton(label="💾 Download Stage 1 Results", visible=False)
            
            # STAGE 2 TAB  
            with gr.TabItem("📄 Stage 2: Full-Text Screening"):
                with gr.Row():
                    with gr.Column(scale=1):
                        gr.Markdown("### 📁 Upload Stage 1 Results or Full-Text Data")
                        
                        stage2_file = gr.File(
                            label="Upload Stage 1 Results or Studies with Full Text",
                            file_types=[".csv"],
                            type="filepath"
                        )
                        
                        with gr.Row():
                            stage2_title_col = gr.Dropdown(label="Title Column", choices=[], interactive=True)
                            stage2_abstract_col = gr.Dropdown(label="Abstract Column", choices=[], interactive=True)
                        
                        stage2_fulltext_col = gr.Dropdown(label="Full Text Column", choices=[], interactive=True)
                        stage2_sample = gr.Slider(label="Studies to Process", minimum=5, maximum=200, value=50, step=5)
                    
                    with gr.Column(scale=1):
                        gr.Markdown("### 🎯 Stage 2 Criteria (Strict/Specific)")
                        
                        stage2_criteria = gr.Textbox(
                            label="Detailed Inclusion/Exclusion Criteria for Stage 2",
                            value="""POPULATION:
- [Specific population criteria]
- [Age ranges, conditions, etc.]

INTERVENTION:
- [Detailed intervention specifications]
- [Dosage, duration, delivery method]

COMPARATOR:
- [Control group specifications]
- [Placebo, standard care, etc.]

OUTCOMES:
- [Primary endpoint definitions]
- [Secondary outcomes]
- [Measurement methods]

STUDY DESIGN:
- [Minimum study quality requirements]
- [Follow-up duration requirements]

EXCLUDE:
- [Specific exclusion criteria]
- [Study quality thresholds]""",
                            lines=15
                        )
                        
                        extraction_fields = gr.Textbox(
                            label="Data Extraction Fields (Optional)",
                            value="""Sample Size: participants, subjects, patients, n=
Intervention Duration: weeks, months, days, duration  
Primary Outcome: endpoint, primary outcome, main outcome
Statistical Method: analysis, statistical, regression, model
Risk of Bias: randomization, blinding, allocation""",
                            lines=8
                        )
                
                stage2_process_btn = gr.Button("🔍 Start Stage 2 Screening", variant="primary")
                
                stage2_results = gr.Markdown()
                stage2_table = gr.Dataframe(label="Stage 2 Results with Data Extraction")
                stage2_download_data = gr.Textbox(visible=False)
                stage2_download_btn = gr.DownloadButton(label="💾 Download Final Results", visible=False)
            
            # WORKFLOW GUIDANCE TAB
            with gr.TabItem("📚 Systematic Review Workflow"):
                gr.Markdown("""
                ## 🔄 Complete 2-Stage Systematic Review Process
                
                ### **Stage 1: Title/Abstract Screening**
                **Objective:** High sensitivity screening to identify potentially relevant studies
                
                **Process:**
                1. Upload search results from multiple databases (PubMed, Embase, etc.)
                2. Define broad inclusion/exclusion criteria
                3. AI screens titles/abstracts with high sensitivity
                4. Manually review "UNCLEAR" classifications
                5. Export studies marked for inclusion to Stage 2
                
                **Criteria Guidelines:**
                - Use broad terms to capture all potentially relevant studies
                - Focus on key PICOS elements (Population, Intervention, Outcomes)
                - Err on the side of inclusion when uncertain
                
                ### **Stage 2: Full-Text Screening** 
                **Objective:** High specificity screening with detailed data extraction
                
                **Process:**
                1. Upload Stage 1 results or add full-text content
                2. Define strict, specific inclusion/exclusion criteria
                3. AI performs detailed full-text analysis
                4. Extract key data points for synthesis
                5. Export final included studies for meta-analysis
                
                **Criteria Guidelines:**
                - Use specific, measurable criteria
                - Include detailed PICOS specifications
                - Define minimum quality thresholds
                - Specify exact outcome measurements needed
                
                ### **Quality Assurance Recommendations:**
                
                **For Stage 1:**
                - Manual review of 10-20% of AI decisions
                - Inter-rater reliability testing with subset
                - Calibration exercises among reviewers
                
                **For Stage 2:**
                - Manual validation of all AI INCLUDE decisions
                - Detailed reason documentation for exclusions
                - Data extraction verification by second reviewer
                
                ### **After 2-Stage Screening:**
                
                1. **Data Extraction:** Extract detailed study characteristics
                2. **Quality Assessment:** Apply ROB2, ROBINS-I, or other tools
                3. **Evidence Synthesis:** Qualitative synthesis and meta-analysis
                4. **GRADE Assessment:** Evaluate certainty of evidence
                5. **Reporting:** Follow PRISMA guidelines
                
                ### **Best Practices:**
                
                - **Document everything:** Keep detailed logs of decisions and criteria
                - **Validate AI decisions:** Use AI as assistance, not replacement
                - **Follow guidelines:** Adhere to Cochrane and PRISMA standards
                - **Test criteria:** Pilot with known studies before full screening
                - **Multiple reviewers:** Have disagreements resolved by third reviewer
                
                ### **When to Use Each Stage:**
                
                **Use Stage 1 when:**
                - Starting with large search results (>1000 studies)
                - Need to quickly filter irrelevant studies
                - Working with title/abstract data only
                
                **Use Stage 2 when:**
                - Have full-text access to studies
                - Need detailed inclusion/exclusion assessment
                - Ready for data extraction
                - Preparing for meta-analysis
                
                ### **Advanced NLP Features:**
                
                This tool now includes advanced medical NLP models when available:
                - **PubMedBERT** for medical text understanding
                - **Cross-encoders** for accurate semantic matching
                - **Zero-shot classification** for flexible criteria
                - **Medical term expansion** for comprehensive matching
                
                The system automatically detects and uses advanced models when available,
                falling back to basic models if needed.
                """)
        
        # Event handlers for file uploads and column detection
        def update_stage1_columns(file):
            if file is None:
                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
            try:
                df = pd.read_csv(file.name)
                columns = df.columns.tolist()
                title_col = next((col for col in columns if 'title' in col.lower()), columns[0] if columns else None)
                abstract_col = next((col for col in columns if 'abstract' in col.lower()), columns[1] if len(columns) > 1 else None)
                return gr.Dropdown(choices=columns, value=title_col), gr.Dropdown(choices=columns, value=abstract_col)
            except:
                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
        
        def update_stage2_columns(file):
            if file is None:
                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
            try:
                df = pd.read_csv(file.name)
                columns = df.columns.tolist()
                title_col = next((col for col in columns if 'title' in col.lower()), columns[0] if columns else None)
                abstract_col = next((col for col in columns if 'abstract' in col.lower()), columns[1] if len(columns) > 1 else None)
                fulltext_col = next((col for col in columns if any(term in col.lower() for term in ['full_text', 'fulltext', 'text', 'content'])), None)
                return (gr.Dropdown(choices=columns, value=title_col), 
                       gr.Dropdown(choices=columns, value=abstract_col),
                       gr.Dropdown(choices=columns, value=fulltext_col))
            except:
                return gr.Dropdown(choices=[]), gr.Dropdown(choices=[]), gr.Dropdown(choices=[])
        
        # Event bindings
        stage1_file.change(fn=update_stage1_columns, inputs=[stage1_file], outputs=[stage1_title_col, stage1_abstract_col])
        stage2_file.change(fn=update_stage2_columns, inputs=[stage2_file], outputs=[stage2_title_col, stage2_abstract_col, stage2_fulltext_col])
        
        def process_stage1_with_download(*args):
            summary, table, csv_data = process_stage1(*args)
            return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
        
        def process_stage2_with_download(*args):
            summary, table, csv_data = process_stage2(*args)
            return summary, table, csv_data, gr.DownloadButton(visible=bool(csv_data))
        
        stage1_process_btn.click(
            fn=process_stage1_with_download,
            inputs=[stage1_file, stage1_title_col, stage1_abstract_col, stage1_criteria, stage1_sample],
            outputs=[stage1_results, stage1_table, stage1_download_data, stage1_download_btn]
        )
        
        stage2_process_btn.click(
            fn=process_stage2_with_download,
            inputs=[stage2_file, stage2_title_col, stage2_abstract_col, stage2_fulltext_col, stage2_criteria, extraction_fields, stage2_sample],
            outputs=[stage2_results, stage2_table, stage2_download_data, stage2_download_btn]
        )
        
        stage1_download_btn.click(lambda data: data, inputs=[stage1_download_data], outputs=[gr.File()])
        stage2_download_btn.click(lambda data: data, inputs=[stage2_download_data], outputs=[gr.File()])
    
    return interface

if __name__ == "__main__":
    interface = create_interface()
    interface.launch()