import gradio as gr
import subprocess
import os
import sys
from datetime import datetime
import shutil


TRAINING_SCRIPT = "HF_LayoutLM_with_Passage.py"

MODEL_OUTPUT_DIR = "checkpoints"
MODEL_FILE_NAME = "layoutlmv3_crf_passage.pth"
MODEL_FILE_PATH = os.path.join(MODEL_OUTPUT_DIR, MODEL_FILE_NAME)


# ----------------------------------------------------------------

def retrieve_model():
    """
    Checks for the final model file and prepares it for download.
    Useful for when the training job finishes server-side but the
    client connection has timed out.
    """
    MODEL_OUTPUT_DIR = "checkpoints"
    MODEL_FILE_NAME = "layoutlmv3_crf_passage.pth"
    MODEL_FILE_PATH = os.path.join(MODEL_OUTPUT_DIR, MODEL_FILE_NAME)

    if os.path.exists(MODEL_FILE_PATH):
        file_size = os.path.getsize(MODEL_FILE_PATH) / (1024 * 1024)  # Size in MB

        # CRITICAL: Copy to a simple location that Gradio can reliably serve
        import tempfile
        temp_dir = tempfile.gettempdir()
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        temp_model_path = os.path.join(temp_dir, f"layoutlmv3_trained_{timestamp}_recovered.pth")

        try:
            shutil.copy2(MODEL_FILE_PATH, temp_model_path)
            download_path = temp_model_path

            log_output = (
                f"--- Model Status Check: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n"
                f"🎉 SUCCESS! A trained model was found and recovered. Boobs! AASTIK MERA NAAM\n"
                f"📦 Model file: {MODEL_FILE_PATH}\n"
                f"📊 Model size: {file_size:.2f} MB\n"
                f"🔗 Download path prepared: {download_path}\n\n"
                f"⬇️ Click the '📥 Download Model' button below to save your model."
            )
            return log_output, download_path, gr.Button(visible=True)

        except Exception as e:
            log_output = (
                f"--- Model Status Check FAILED ---\n"
                f"⚠️ Trained model found, but could not prepare for download: {e}\n"
                f"📁 Original Path: {MODEL_FILE_PATH}. Try again or check Space logs."
            )
            return log_output, None, gr.Button(visible=False)

    else:
        log_output = (
            f"--- Model Status Check: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n"
            f"❌ Model file not found at {MODEL_FILE_PATH}.\n"
            f"Training may still be running or it failed. Check back later."
        )
        return log_output, None, gr.Button(visible=False)


def clear_memory(dataset_file: gr.File):
    """
    Deletes the model output directory and the uploaded dataset file.
    """
    MODEL_OUTPUT_DIR = "checkpoints"

    log_output = f"--- Memory Clear Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n"

    # 1. Clear Model Checkpoints Directory
    if os.path.exists(MODEL_OUTPUT_DIR):
        try:
            shutil.rmtree(MODEL_OUTPUT_DIR)
            log_output += f"✅ Successfully deleted model directory: {MODEL_OUTPUT_DIR}\n"
        except Exception as e:
            log_output += f"❌ ERROR deleting model directory {MODEL_OUTPUT_DIR}: {e}\n"
    else:
        log_output += f"ℹ️ Model directory not found: {MODEL_OUTPUT_DIR} (Nothing to delete)\n"

    # 2. Clear Uploaded Dataset File (Temporary file cleanup)
    if dataset_file is not None:
        input_path = dataset_file.name if hasattr(dataset_file, 'name') else str(dataset_file)
        if os.path.exists(input_path):
            try:
                os.remove(input_path)
                log_output += f"✅ Successfully deleted uploaded dataset file: {input_path}\n"
            except Exception as e:
                log_output += f"❌ ERROR deleting dataset file {input_path}: {e}\n"
        else:
            log_output += f"ℹ️ Uploaded dataset file not found at {input_path}.\n"
    else:
        log_output += f"ℹ️ No dataset file currently tracked for deletion.\n"

    # 3. Final message and state reset
    log_output += f"--- Memory Clear Complete: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n"
    log_output += "✨ Files and checkpoints have been removed. You can now start a fresh training run."

    # Reset log_output, model_path_state, download_btn visibility, and model_download component
    return log_output, None, gr.Button(visible=False), None


def train_model(dataset_file: gr.File, batch_size: int, epochs: int, lr: float, max_len: int, progress=gr.Progress()):
    """
    Handles the Gradio submission and executes the training script using subprocess.
    Yields logs in real-time for user feedback.
    """

    # 1. Setup: Create output directory if it doesn't exist
    os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)

    # 2. File Handling: Use the temporary path of the uploaded file
    if dataset_file is None:
        yield "❌ ERROR: Please upload a file.", None, gr.Button(visible=False)
        return

    # CRITICAL FIX: dataset_file is a gradio.File object, use .name to get the path
    # This is a temporary file path like /tmp/gradio/.../filename.json
    input_path = dataset_file.name if hasattr(dataset_file, 'name') else str(dataset_file)

    # Verify the file actually exists before proceeding
    if not os.path.exists(input_path):
        error_msg = f"❌ ERROR: Uploaded file not found at {input_path}. Please try uploading again."
        yield error_msg, None, gr.Button(visible=False)
        return

    if not input_path.lower().endswith(".json"):
        yield "❌ ERROR: Please upload a valid Label Studio JSON file (.json).", None, gr.Button(visible=False)
        return

    progress(0.1, desc="Starting LayoutLMv3 Training...")

    log_output = f"--- Training Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n"

    # 3. Construct the subprocess command
    command = [
        sys.executable,
        TRAINING_SCRIPT,
        "--mode", "train",
        "--input", input_path,
        "--batch_size", str(batch_size),
        "--epochs", str(epochs),
        "--lr", str(lr),
        "--max_len", str(max_len)
    ]

    log_output += f"Executing command: {' '.join(command)}\n\n"
    yield log_output, None, gr.Button(visible=False)  # Initial yield

    try:
        # 4. Run the training script and capture output
        process = subprocess.Popen(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            bufsize=1
        )

        # Stream logs in real-time
        for line in iter(process.stdout.readline, ""):
            log_output += line
            # Print to console as well for debugging
            print(line, end='')
            # Yield updated logs in real-time
            yield log_output, None, gr.Button(visible=False)

        process.stdout.close()
        return_code = process.wait()

        # 5. Check for successful completion
        if return_code == 0:
            log_output += "\n" + "=" * 60 + "\n"
            log_output += "✅ TRAINING COMPLETE! Model saved successfully.\n"
            log_output += "=" * 60 + "\n"
            print("\n✅ TRAINING COMPLETE! Model saved.")

            # 6. Verify model file exists
            if os.path.exists(MODEL_FILE_PATH):
                file_size = os.path.getsize(MODEL_FILE_PATH) / (1024 * 1024)  # Size in MB
                log_output += f"\n📦 Model file found: {MODEL_FILE_PATH}"
                log_output += f"\n📊 Model size: {file_size:.2f} MB"

                print(f"\n✅ Model exists at: {MODEL_FILE_PATH} ({file_size:.2f} MB)")

                # CRITICAL: Copy to a simple location that Gradio can reliably serve
                # Use the same temp directory pattern as the uploaded JSON file
                import tempfile
                temp_dir = tempfile.gettempdir()
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

                # Create filename in temp directory
                temp_model_path = os.path.join(temp_dir, f"layoutlmv3_trained_{timestamp}.pth")

                try:
                    # Copy the model to temp directory
                    shutil.copy2(MODEL_FILE_PATH, temp_model_path)
                    log_output += f"\n📋 Model copied to temporary download location"
                    log_output += f"\n🔗 Download path: {temp_model_path}"
                    print(f"✅ Model copied to temp location: {temp_model_path}")

                    # Verify the copy exists
                    if os.path.exists(temp_model_path):
                        log_output += f"\n✅ Download file verified and ready!"
                        download_path = temp_model_path
                    else:
                        log_output += f"\n⚠️ Warning: Temp copy verification failed, using original path"
                        download_path = MODEL_FILE_PATH

                except Exception as e:
                    log_output += f"\n⚠️ Could not create temp copy: {e}"
                    log_output += f"\n📁 Using original path: {MODEL_FILE_PATH}"
                    print(f"⚠️ Copy failed: {e}, using original path")
                    download_path = MODEL_FILE_PATH

                # Final success message
                log_output += f"\n\n{'=' * 60}"
                log_output += f"\n🎉 SUCCESS! Your model is ready for download."
                log_output += f"\n{'=' * 60}"
                log_output += f"\n\n⬇️ Click the '📥 Download Model' button below to save your model."
                log_output += f"\n⚠️ CRITICAL: Download NOW! File will be deleted when:"
                log_output += f"\n   - This tab is closed"
                log_output += f"\n   - Space restarts or goes idle"
                log_output += f"\n   - System clears temp files"
                log_output += f"\n\n📥 The file will download as a .pth file to your computer's Downloads folder."
                log_output += f"\n\n{'=' * 60}\n"

                # Return final logs and make download button visible
                # IMPORTANT: Return the path that Gradio can access
                yield log_output, download_path, gr.Button(visible=True)
                return
            else:
                log_output += f"\n⚠️ WARNING: Training completed, but model file not found at expected path ({MODEL_FILE_PATH})."
                log_output += f"\n🔍 Checking directory contents..."

                # List files in checkpoints directory for debugging
                if os.path.exists(MODEL_OUTPUT_DIR):
                    files = os.listdir(MODEL_OUTPUT_DIR)
                    log_output += f"\n📁 Files in {MODEL_OUTPUT_DIR}: {files}"
                else:
                    log_output += f"\n❌ Directory {MODEL_OUTPUT_DIR} does not exist!"

                yield log_output, None, gr.Button(visible=False)
                return
        else:
            log_output += f"\n\n{'=' * 60}\n"
            log_output += f"❌ TRAINING FAILED with return code {return_code}\n"
            log_output += f"{'=' * 60}\n"
            log_output += f"\nPlease check the logs above for error details.\n"
            yield log_output, None, gr.Button(visible=False)
            return

    except FileNotFoundError:
        error_msg = f"❌ ERROR: The training script '{TRAINING_SCRIPT}' was not found. Ensure it is in the root directory of your Space."
        print(error_msg)
        yield log_output + "\n" + error_msg, None, gr.Button(visible=False)
        return
    except Exception as e:
        error_msg = f"❌ An unexpected error occurred: {e}"
        print(error_msg)
        import traceback
        print(traceback.format_exc())
        yield log_output + "\n" + error_msg, None, gr.Button(visible=False)
        return


# --- Gradio Interface Setup (using Blocks for a nicer layout) ---
with gr.Blocks(title="LayoutLMv3 Fine-Tuning App by Aastik", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🚀 LayoutLMv3 Fine-Tuning on Hugging Face Spaces")
    gr.Markdown(
        """
        Upload your Label Studio JSON file, set your hyperparameters, and click **Train Model** to fine-tune the LayoutLMv3 model.

        **⚠️ IMPORTANT - Free Tier Users:**
        - **Download your model IMMEDIATELY** after training completes! 
        - The model file is **temporary** and will be deleted when the Space restarts.
        - A download button will appear below once training is complete.
        - **Real-time logs** will stream during training so you can monitor progress.

        **⏱️ Timeout Note:** Training may timeout on free tier. Consider reducing epochs or batch size for faster training.
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📁 Dataset Upload")
            file_input = gr.File(
                label="Upload Label Studio JSON Dataset",
                file_types=[".json"]
            )

            gr.Markdown("---")
            gr.Markdown("### ⚙️ Training Parameters")

            batch_size_input = gr.Slider(
                minimum=1, maximum=16, step=1, value=4,
                label="Batch Size",
                info="Smaller = less memory, slower training"
            )
            epochs_input = gr.Slider(
                minimum=1, maximum=10, step=1, value=3,
                label="Epochs",
                info="Fewer epochs = faster training (recommended: 3-5)"
            )
            lr_input = gr.Number(
                value=5e-5, label="Learning Rate",
                info="Default: 5e-5"
            )
            max_len_input = gr.Slider(
                minimum=128, maximum=512, step=128, value=512,
                label="Max Sequence Length",
                info="Shorter = faster training, less memory"
            )

            train_button = gr.Button("🔥 Start Training", variant="primary", size="lg")
            check_button = gr.Button("🔍 Check Model Status/Download", variant="secondary", size="lg")
            clear_button = gr.Button("🧹 Clear Model/Dataset Files", variant="stop", size="lg")

        with gr.Column(scale=2):
            gr.Markdown("### 📊 Training Progress (Real-Time Logs)")

            log_output = gr.Textbox(
                label="Training Logs - Updates in Real-Time",
                lines=25,
                max_lines=30,
                autoscroll=True,
                show_copy_button=True,
                placeholder="Click 'Start Training' to begin...\n\nLogs will stream here in real-time as training progresses."
            )

            gr.Markdown("### ⬇️ Download Trained Model")

            # Hidden state to store the file path
            model_path_state = gr.State(value=None)

            # Download button (initially hidden)
            download_btn = gr.Button(
                "📥 Download Model (.pth file)",
                variant="primary",
                size="lg",
                visible=False
            )

            check_button.click(
                fn=retrieve_model,  # A new function we'll define
                inputs=[],
                outputs=[log_output, model_path_state, download_btn]
            )


            # File output for download
            model_download = gr.File(
                label="Your trained model will appear here after clicking Download",
                interactive=False,
                visible=True
            )

            clear_button.click(
                fn=clear_memory,
                inputs=[file_input],  # Pass the uploaded file object to delete the temp file
                outputs=[log_output, model_path_state, download_btn, model_download]
            )

            gr.Markdown(
                """
                **📥 Download Instructions:**
                1. Wait for training to complete - watch the real-time logs above
                2. Look for **"✅ TRAINING COMPLETE!"** message
                3. Click the **"📥 Download Model"** button that appears above
                4. Save the `.pth` file to your local machine
                5. **Do this immediately** - file is temporary and will be deleted on Space restart!

                **🔧 Troubleshooting:**
                - If download button doesn't appear, check the logs for errors
                - Try reducing epochs or batch size if timeout occurs
                - Ensure your JSON file is properly formatted
                - Logs update in real-time - you can monitor training progress
                """
            )

    # Define the training action - now with real-time log streaming via yield
    train_button.click(
        fn=train_model,
        inputs=[file_input, batch_size_input, epochs_input, lr_input, max_len_input],
        outputs=[log_output, model_path_state, download_btn],
        api_name="train"
    )

    # Define the download action
    download_btn.click(
        fn=lambda path: path,
        inputs=[model_path_state],
        outputs=[model_download]
    )

    gr.Markdown(
        """
        ---
        ### 📖 About
        This Space fine-tunes LayoutLMv3 with CRF for document understanding tasks including:
        - Questions, Options, Answers
        - Section Headings
        - Passages

        **Model Details:** LayoutLMv3-base + CRF layer for sequence labeling

        **Features:**
        - ✅ Real-time log streaming during training
        - ✅ Progress monitoring with epoch/batch updates
        - ✅ Immediate model download after completion
        - ✅ Automatic file preparation for download
        """
    )

if __name__ == "__main__":
    demo.launch()