Spaces:

richardyoung
/

2pac

Sleeping

File size: 75,274 Bytes

c43a81f

#!/usr/bin/env python3
"""
2PAC: The Picture Analyzer & Corruption killer
Author: Richard Young
License: MIT

In memory of Jeff Young, who loved Tupac's music and lived by his values of helping others.
Like Tupac, Jeff believed in bringing people together and always lending a hand to those in need.
May your photos always be as clear as the memories they capture, and may we all strive to help others as Jeff did.
"""

import os
import argparse
import concurrent.futures
import sys
import time
import io
import json
import shutil
import hashlib
import struct
import tempfile
import subprocess
import random
from datetime import datetime
from pathlib import Path
from PIL import Image, ImageFile, UnidentifiedImageError
from tqdm import tqdm
import tqdm.auto as tqdm_auto
import colorama
import humanize
import logging

# Import 2PAC quotes
try:
    from quotes import QUOTES
except ImportError:
    # Default quotes if file is missing
    QUOTES = ["All Eyez On Your Images."]

# Initialize colorama (required for Windows)
colorama.init()

# Allow loading of truncated images for repair attempts
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Dictionary of supported image formats with their extensions
SUPPORTED_FORMATS = {
    'JPEG': ('.jpg', '.jpeg', '.jpe', '.jif', '.jfif', '.jfi'),
    'PNG': ('.png',),
    'GIF': ('.gif',),
    'TIFF': ('.tiff', '.tif'),
    'BMP': ('.bmp', '.dib'),
    'WEBP': ('.webp',),
    'ICO': ('.ico',),
    'HEIC': ('.heic',),
}

# Default formats (all supported formats)
DEFAULT_FORMATS = list(SUPPORTED_FORMATS.keys())

# List of formats that can potentially be repaired
REPAIRABLE_FORMATS = ['JPEG', 'PNG', 'GIF']

# Default progress directory
DEFAULT_PROGRESS_DIR = os.path.expanduser("~/.bad_image_finder/progress")

# Current version
VERSION = "1.5.1"

# Security: Maximum file size to process (100MB) to prevent DoS
MAX_FILE_SIZE = 100 * 1024 * 1024

# Security: Maximum image dimensions (50 megapixels) to prevent decompression bombs
MAX_IMAGE_PIXELS = 50000 * 50000

def setup_logging(verbose, no_color=False):
    level = logging.DEBUG if verbose else logging.INFO
    
    # Define color codes
    if not no_color:
        # Color scheme
        COLORS = {
            'DEBUG': colorama.Fore.CYAN,
            'INFO': colorama.Fore.GREEN,
            'WARNING': colorama.Fore.YELLOW,
            'ERROR': colorama.Fore.RED,
            'CRITICAL': colorama.Fore.MAGENTA + colorama.Style.BRIGHT,
            'RESET': colorama.Style.RESET_ALL
        }
        
        # Custom formatter with colors
        class ColoredFormatter(logging.Formatter):
            def format(self, record):
                levelname = record.levelname
                if levelname in COLORS:
                    record.levelname = f"{COLORS[levelname]}{levelname}{COLORS['RESET']}"
                    record.msg = f"{COLORS[levelname]}{record.msg}{COLORS['RESET']}"
                return super().format(record)
                
        formatter = ColoredFormatter('%(asctime)s - %(levelname)s - %(message)s')
    else:
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        
    handler = logging.StreamHandler()
    handler.setFormatter(formatter)
    
    logging.basicConfig(
        level=level,
        handlers=[handler]
    )

def diagnose_image_issue(file_path):
    """
    Attempts to diagnose what's wrong with the image.
    Returns: (error_type, details)
    """
    try:
        with open(file_path, 'rb') as f:
            header = f.read(16)  # Read first 16 bytes
        
        # Check for zero-byte file
        if len(header) == 0:
            return "empty_file", "File is empty (0 bytes)"
        
        # Check for correct JPEG header
        if file_path.lower().endswith(SUPPORTED_FORMATS['JPEG']):
            if not (header.startswith(b'\xff\xd8\xff')):
                return "invalid_header", "Invalid JPEG header"
        
        # Check for correct PNG header
        elif file_path.lower().endswith(SUPPORTED_FORMATS['PNG']):
            if not header.startswith(b'\x89PNG\r\n\x1a\n'):
                return "invalid_header", "Invalid PNG header"
        
        # Try to open with PIL for more detailed diagnosis
        try:
            with Image.open(file_path) as img:
                img.verify()
        except Exception as e:
            error_str = str(e).lower()
            
            if "truncated" in error_str:
                return "truncated", "File is truncated"
            elif "corrupt" in error_str:
                return "corrupt_data", "Data corruption detected"
            elif "incorrect mode" in error_str or "decoder" in error_str:
                return "decoder_issue", "Image decoder issue"
            else:
                return "unknown", f"Unknown issue: {str(e)}"
                
        # Now try to load the data
        try:
            with Image.open(file_path) as img:
                img.load()
        except Exception as e:
            return "data_load_failed", f"Image data couldn't be loaded: {str(e)}"
            
        # If we got here, there's some other issue
        return "unknown", "Unknown issue"
        
    except Exception as e:
        return "access_error", f"Error accessing file: {str(e)}"

def check_jpeg_structure(file_path):
    """
    Performs a deep check of JPEG file structure to find corruption that PIL might miss.
    Returns (is_valid, error_message)
    """
    try:
        with open(file_path, 'rb') as f:
            data = f.read()
        
        # Check for correct JPEG header (SOI marker)
        if not data.startswith(b'\xFF\xD8'):
            return False, "Invalid JPEG header (missing SOI marker)"
        
        # Check for proper EOI marker at the end
        if not data.endswith(b'\xFF\xD9'):
            return False, "Missing EOI marker at end of file"
        
        # Check for key JPEG segments
        # SOF marker (Start of Frame) - At least one should be present
        sof_markers = [b'\xFF\xC0', b'\xFF\xC1', b'\xFF\xC2', b'\xFF\xC3']
        has_sof = any(marker in data for marker in sof_markers)
        if not has_sof:
            return False, "No Start of Frame (SOF) marker found"
        
        # Check for SOS marker (Start of Scan)
        if b'\xFF\xDA' not in data:
            return False, "No Start of Scan (SOS) marker found"
        
        # Scan through the file to check marker structure
        i = 2  # Skip SOI marker
        while i < len(data) - 1:
            if data[i] == 0xFF and data[i+1] != 0x00 and data[i+1] != 0xFF:
                # Found a marker
                marker = data[i:i+2]
                
                # For markers with length fields, validate length
                if (0xC0 <= data[i+1] <= 0xCF and data[i+1] != 0xC4 and data[i+1] != 0xC8) or \
                   (0xDB <= data[i+1] <= 0xFE):
                    if i + 4 >= len(data):
                        return False, f"Truncated marker {data[i+1]:02X} at position {i}"
                    length = struct.unpack('>H', data[i+2:i+4])[0]
                    if i + 2 + length > len(data):
                        return False, f"Invalid segment length for marker {data[i+1]:02X}"
                    i += 2 + length
                    continue
            
            # Move to next byte
            i += 1
                
        return True, "JPEG structure appears valid"
    except Exception as e:
        return False, f"Error during JPEG structure check: {str(e)}"

def check_png_structure(file_path):
    """
    Performs a deep check of PNG file structure to find corruption.
    Returns (is_valid, error_message)
    """
    try:
        with open(file_path, 'rb') as f:
            data = f.read()
        
        # Check for PNG signature
        png_signature = b'\x89PNG\r\n\x1a\n'
        if not data.startswith(png_signature):
            return False, "Invalid PNG signature"
        
        # Check minimum viable PNG (signature + IHDR chunk)
        if len(data) < 8 + 12:  # 8 bytes signature + 12 bytes min IHDR chunk
            return False, "PNG file too small to contain valid header"
            
        # Check for IEND chunk at the end
        if not data.endswith(b'IEND\xaeB`\x82'):
            return False, "Missing IEND chunk at end of file"
        
        # Parse chunks
        pos = 8  # Skip signature
        required_chunks = {'IHDR': False}
        
        while pos < len(data):
            if pos + 8 > len(data):
                return False, "Truncated chunk header"
                
            # Read chunk length and type
            chunk_len = struct.unpack('>I', data[pos:pos+4])[0]
            chunk_type = data[pos+4:pos+8].decode('ascii', errors='replace')
            
            # Validate chunk length
            if pos + chunk_len + 12 > len(data):
                return False, f"Truncated {chunk_type} chunk"
            
            # Track required chunks
            if chunk_type in required_chunks:
                required_chunks[chunk_type] = True
                
            # Special validation for IHDR chunk
            if chunk_type == 'IHDR' and chunk_len != 13:
                return False, "Invalid IHDR chunk length"
                
            # Mandatory IHDR must be first chunk
            if pos == 8 and chunk_type != 'IHDR':
                return False, "First chunk must be IHDR"
                
            # IEND must be the last chunk
            if chunk_type == 'IEND' and pos + chunk_len + 12 != len(data):
                return False, "Data after IEND chunk"
            
            # Move to next chunk
            pos += chunk_len + 12  # Length (4) + Type (4) + Data (chunk_len) + CRC (4)
        
        # Verify required chunks
        for chunk, present in required_chunks.items():
            if not present:
                return False, f"Missing required {chunk} chunk"
                
        return True, "PNG structure appears valid"
    except Exception as e:
        return False, f"Error during PNG structure check: {str(e)}"

def validate_subprocess_path(file_path):
    """
    Validate file path before passing to subprocess to prevent command injection.

    Args:
        file_path: Path to validate

    Returns:
        True if path is safe

    Raises:
        ValueError: If path contains dangerous characters or patterns
    """
    import re

    # Must be an absolute path
    if not os.path.isabs(file_path):
        raise ValueError(f"Path must be absolute: {file_path}")

    # File must exist
    if not os.path.exists(file_path):
        raise ValueError(f"File does not exist: {file_path}")

    # Check for shell metacharacters and dangerous patterns
    # Allow: alphanumeric, spaces, dots, dashes, underscores, forward slashes
    # Block: semicolons, pipes, backticks, $, &, >, <, etc.
    dangerous_chars = ['`', '$', '&', '|', ';', '>', '<', '\n', '\r', '(', ')']
    for char in dangerous_chars:
        if char in file_path:
            raise ValueError(f"Dangerous character '{char}' found in path: {file_path}")

    # Block path traversal attempts
    if '..' in file_path:
        raise ValueError(f"Path traversal pattern '..' detected: {file_path}")

    # Block null bytes
    if '\x00' in file_path:
        raise ValueError("Null byte detected in path")

    return True


def try_external_tools(file_path):
    """
    Try using external tools to validate the image if they're available.
    Returns (is_valid, message)

    Security: Validates file path before passing to subprocess to prevent
    command injection attacks.
    """
    # Validate path before passing to subprocess
    try:
        validate_subprocess_path(file_path)
    except ValueError as e:
        logging.warning(f"Skipping external tool validation due to security check: {e}")
        return True, "External tools check skipped (security)"

    # Try using exiftool if available
    try:
        result = subprocess.run(['exiftool', '-m', '-p', '$Error', file_path],
                               capture_output=True, text=True, timeout=5)
        if result.returncode == 0 and result.stdout.strip():
            return False, f"Exiftool error: {result.stdout.strip()}"

        # Check with identify (ImageMagick) if available
        result = subprocess.run(['identify', '-verbose', file_path],
                               capture_output=True, text=True, timeout=5)
        if result.returncode != 0:
            return False, "ImageMagick identify failed to read the image"

        return True, "Passed external tool validation"
    except (subprocess.SubprocessError, FileNotFoundError):
        # External tools not available or failed
        return True, "External tools check skipped"

def try_full_decode_check(file_path):
    """
    Try to fully decode the image to a temporary file.
    This catches more subtle corruption that might otherwise be missed.
    """
    try:
        # For JPEGs, try to decode and re-encode the image
        with Image.open(file_path) as img:
            # Create a temporary file for testing
            with tempfile.NamedTemporaryFile(delete=True) as tmp:
                # Try to save a decoded copy
                img.save(tmp.name, format="BMP")
                
                # If we get here, the image data could be fully decoded
                return True, "Full decode test passed"
    except Exception as e:
        return False, f"Full decode test failed: {str(e)}"
        
def check_visual_corruption(file_path, block_threshold=0.20, uniform_threshold=10, strict_mode=False):
    """
    Analyze image content to detect visual corruption like large uniform areas.
    
    Args:
        file_path: Path to the image file
        block_threshold: Percentage of image that must be uniform to be considered corrupt (0.0-1.0)
        uniform_threshold: Color variation threshold for considering pixels "uniform"
        strict_mode: If True, only detect gray/black areas as corruption indicators
        
    Returns:
        (is_visually_corrupt, details)
    """
    try:
        with Image.open(file_path) as img:
            # Get image dimensions
            width, height = img.size
            total_pixels = width * height
            
            # Convert to RGB to ensure consistent analysis
            if img.mode != "RGB":
                img = img.convert("RGB")
            
            # Sample the image (analyzing every pixel would be too slow)
            # We'll create a grid of sample points - we'll use more samples for more accuracy
            sample_step = max(1, min(width, height) // 150)  # Adjust based on image size
            
            # Track unique colors and their counts
            color_counts = {}
            total_samples = 0
            
            # Sample the image
            for y in range(0, height, sample_step):
                for x in range(0, width, sample_step):
                    total_samples += 1
                    pixel = img.getpixel((x, y))
                    
                    # Round pixel values to reduce sensitivity to minor variations
                    rounded_pixel = (
                        pixel[0] // uniform_threshold * uniform_threshold,
                        pixel[1] // uniform_threshold * uniform_threshold,
                        pixel[2] // uniform_threshold * uniform_threshold
                    )
                    
                    if rounded_pixel in color_counts:
                        color_counts[rounded_pixel] += 1
                    else:
                        color_counts[rounded_pixel] = 1
            
            # Find the most common color
            most_common_color = max(color_counts.items(), key=lambda x: x[1])
            most_common_percentage = most_common_color[1] / total_samples
            
            # Check for large blocks of uniform color (potential corruption)
            if most_common_percentage > block_threshold:
                # Calculate approximate percentage of the image affected
                affected_pct = most_common_percentage * 100
                color_value = most_common_color[0]
                
                # Determine if this is likely corruption
                # Gray/black areas are common in corruption
                is_dark = sum(color_value) < 3 * uniform_threshold  # Very dark areas
                
                # Check if it's a gray area (equal R,G,B values)
                is_gray = abs(color_value[0] - color_value[1]) < uniform_threshold and \
                          abs(color_value[1] - color_value[2]) < uniform_threshold and \
                          abs(color_value[0] - color_value[2]) < uniform_threshold
                
                # Only consider mid-range grays as corruption indicators (not white/black)
                is_mid_gray = is_gray and 30 < sum(color_value)/3 < 220
                
                # Special case: almost pure white is often legitimate content
                is_white = color_value[0] > 240 and color_value[1] > 240 and color_value[2] > 240
                
                # Determine likelihood of corruption based on color and percentage
                if (is_dark or is_mid_gray) and not is_white:
                    # Higher threshold for white areas since they're common in legitimate images
                    white_threshold = 0.4  # 40% of image
                    if is_white and most_common_percentage < white_threshold:
                        return False, f"Large white area ({affected_pct:.1f}%) but likely not corruption"
                    
                    # More likely to be corruption
                    return True, f"Visual corruption detected: {affected_pct:.1f}% of image is uniform {color_value}"
                else:
                    # Could be a legitimate image with a uniform background
                    return False, f"Large uniform area ({affected_pct:.1f}%) but likely not corruption"
            
            # Check for other telltale signs of corruption - but only in strict mode
            if strict_mode:
                # 1. Excessive color blocks (fragmentation) - this works well for detecting noise
                if len(color_counts) > total_samples * 0.85 and total_samples > 200:
                    return True, f"Excessive color fragmentation detected ({len(color_counts)} colors in {total_samples} samples)"
                
                # 2. Check for very specific corruption patterns
                # Analyze distribution of colors to look for unusual patterns
                if total_samples > 500:  # Only for larger images with enough samples
                    # Check if there's an unnatural color distribution
                    # Normal photos have a more gradual distribution rather than spikes
                    sorted_counts = sorted(color_counts.values(), reverse=True)
                    
                    # Calculate the color distribution ratio
                    if len(sorted_counts) > 5:
                        top5_ratio = sum(sorted_counts[:5]) / sum(sorted_counts)
                        # Usually, the top 5 colors shouldn't dominate more than 80% of the image
                        # unless it's a graphic or very simple image
                        if top5_ratio < 0.2 and most_common_percentage < 0.1:
                            return True, f"Unusual color distribution (possible noise/corruption)"
                
            return False, "No visual corruption detected"
            
    except Exception as e:
        return False, f"Error during visual analysis: {str(e)}"

def is_valid_image(file_path, thorough=True, sensitivity='medium', ignore_eof=False, check_visual=False, visual_strictness='medium'):
    """
    Validate image file integrity using multiple methods.
    
    Args:
        file_path: Path to the image file
        thorough: Whether to perform deep structure validation
        sensitivity: 'low', 'medium', or 'high'
        ignore_eof: Whether to ignore missing end-of-file markers
        check_visual: Whether to perform visual content analysis to detect corruption
        visual_strictness: 'low', 'medium', or 'high' strictness for visual corruption detection
    
    Returns:
        True if valid, False if corrupt.
    """
    # Basic PIL validation first (fast check)
    try:
        with Image.open(file_path) as img:
            # verify() checks the file header
            img.verify()
            
            # Additional step: try to load the image data
            # This catches more corruption issues
            with Image.open(file_path) as img2:
                img2.load()
                
            # If check_visual is enabled, analyze the image content
            if check_visual:
                # Set thresholds based on strictness level
                if visual_strictness == 'low':
                    # More permissive - only detect very obvious corruption
                    block_threshold = 0.3  # 30% of the image must be uniform
                    uniform_threshold = 5  # Smaller color variations are allowed
                elif visual_strictness == 'high':
                    # Most strict - catches subtle corruption but may have false positives
                    block_threshold = 0.15  # Only 15% of the image needs to be uniform
                    uniform_threshold = 15  # Larger color variations are considered uniform
                else:  # medium (default)
                    block_threshold = 0.20  # 20% threshold
                    uniform_threshold = 10
                
                # Check for visual corruption with appropriate thresholds
                is_visually_corrupt, msg = check_visual_corruption(
                    file_path, 
                    block_threshold=block_threshold, 
                    uniform_threshold=uniform_threshold,
                    # Only use additional detection methods in high strictness mode
                    strict_mode=(visual_strictness == 'high')
                )
                
                if is_visually_corrupt:
                    logging.debug(f"Visual corruption detected in {file_path}: {msg}")
                    return False
                
            # If thorough checking is disabled, return after basic check
            if not thorough or sensitivity == 'low':
                return True
                
            # For JPEG files, do additional structure checking
            if file_path.lower().endswith(tuple(SUPPORTED_FORMATS['JPEG'])):
                # Check JPEG structure
                is_valid, error_msg = check_jpeg_structure(file_path)
                if not is_valid:
                    # If ignore_eof is enabled and the only issue is missing EOI marker, consider it valid
                    if ignore_eof and error_msg == "Missing EOI marker at end of file":
                        logging.debug(f"Ignoring missing EOI marker for {file_path} as requested")
                    else:
                        logging.debug(f"JPEG structure invalid for {file_path}: {error_msg}")
                        return False
                
                # Try full decode test (catches subtle corruption)
                is_valid, error_msg = try_full_decode_check(file_path)
                if not is_valid:
                    logging.debug(f"Full decode test failed for {file_path}: {error_msg}")
                    return False
                
                # Try external tools if applicable
                is_valid, error_msg = try_external_tools(file_path)
                if not is_valid:
                    logging.debug(f"External tool validation failed for {file_path}: {error_msg}")
                    return False
            
            # For PNG files, do additional structure checking
            elif file_path.lower().endswith(tuple(SUPPORTED_FORMATS['PNG'])):
                # Check PNG structure
                is_valid, error_msg = check_png_structure(file_path)
                if not is_valid:
                    logging.debug(f"PNG structure invalid for {file_path}: {error_msg}")
                    return False
                
                # Try full decode test (catches subtle corruption)
                is_valid, error_msg = try_full_decode_check(file_path)
                if not is_valid:
                    logging.debug(f"Full decode test failed for {file_path}: {error_msg}")
                    return False
                    
            return True
    except Exception as e:
        logging.debug(f"Invalid image {file_path}: {str(e)}")
        return False

def attempt_repair(file_path, backup_dir=None):
    """
    Attempts to repair corrupt image files.
    Returns: (success, message, fixed_width, fixed_height)
    """
    # Create backup if requested
    if backup_dir:
        backup_path = os.path.join(backup_dir, os.path.basename(file_path) + ".bak")
        try:
            shutil.copy2(file_path, backup_path)
            logging.debug(f"Created backup at {backup_path}")
        except Exception as e:
            logging.warning(f"Could not create backup: {str(e)}")
    
    try:
        # First, diagnose the issue
        issue_type, details = diagnose_image_issue(file_path)
        logging.debug(f"Diagnosis for {file_path}: {issue_type} - {details}")
        
        file_ext = os.path.splitext(file_path)[1].lower()
        
        # Check if file format is supported for repair
        format_supported = False
        for fmt in REPAIRABLE_FORMATS:
            if file_ext in SUPPORTED_FORMATS[fmt]:
                format_supported = True
                break
                
        if not format_supported:
            return False, f"Format not supported for repair ({file_ext})", None, None
        
        # Try to open and resave the image with PIL's error forgiveness
        # This works for many truncated files
        try:
            with Image.open(file_path) as img:
                width, height = img.size
                format = img.format
                
                # Create a buffer for the fixed image
                buffer = io.BytesIO()
                img.save(buffer, format=format)
                
                # Write the repaired image back to the original file
                with open(file_path, 'wb') as f:
                    f.write(buffer.getvalue())
                
                # Verify the repaired image
                if is_valid_image(file_path):
                    return True, f"Repaired {issue_type} issue", width, height
                else:
                    # If verification fails, try again with JPEG specific options for JPEG files
                    if format == 'JPEG':
                        with Image.open(file_path) as img:
                            buffer = io.BytesIO()
                            # Use optimize=True and quality=85 for better repair chances
                            img.save(buffer, format='JPEG', optimize=True, quality=85)
                            with open(file_path, 'wb') as f:
                                f.write(buffer.getvalue())
                            
                            if is_valid_image(file_path):
                                return True, f"Repaired {issue_type} issue with JPEG optimization", width, height
                    
                    return False, f"Failed to repair {issue_type} issue", None, None
                    
        except Exception as e:
            logging.debug(f"Repair attempt failed for {file_path}: {str(e)}")
            return False, f"Repair failed: {str(e)}", None, None
            
    except Exception as e:
        logging.debug(f"Error during repair of {file_path}: {str(e)}")
        return False, f"Repair error: {str(e)}", None, None

def process_file(args):
    """Process a single image file."""
    file_path, repair_mode, repair_dir, thorough_check, sensitivity, ignore_eof, check_visual, visual_strictness, enable_security_checks = args

    # Security validation (if enabled)
    if enable_security_checks:
        try:
            is_safe, warnings = validate_file_security(file_path, check_size=True, check_dimensions=True)

            # Log security warnings
            for warning in warnings:
                logging.warning(f"Security warning for {file_path}: {warning}")

            if not is_safe:
                # File failed security checks - treat as invalid
                size = os.path.getsize(file_path)
                return file_path, False, size, "security_failed", "Failed security validation", None

        except ValueError as e:
            # Critical security failure (file too large, dimensions too big, etc.)
            logging.error(f"Security check failed for {file_path}: {e}")
            size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
            return file_path, False, size, "security_failed", str(e), None
        except Exception as e:
            # Unexpected error during security validation
            logging.debug(f"Security validation error for {file_path}: {e}")
            # Continue processing anyway for this case

    # Check if the image is valid
    is_valid = is_valid_image(file_path, thorough=thorough_check, sensitivity=sensitivity,
                             ignore_eof=ignore_eof, check_visual=check_visual, visual_strictness=visual_strictness)

    if not is_valid and repair_mode:
        # Try to repair the file
        repair_success, repair_msg, width, height = attempt_repair(file_path, repair_dir)

        if repair_success:
            # File was repaired
            return file_path, True, 0, "repaired", repair_msg, (width, height)
        else:
            # File is still corrupt
            size = os.path.getsize(file_path)
            return file_path, False, size, "repair_failed", repair_msg, None
    else:
        # No repair attempted or file is valid
        size = os.path.getsize(file_path) if not is_valid else 0
        return file_path, is_valid, size, "not_repaired", None, None

def get_session_id(directory, formats, recursive):
    """Generate a unique session ID based on scan parameters."""
    # Create a unique identifier for this scan session
    dir_path = str(directory).encode('utf-8')
    formats_str = ",".join(sorted(formats)).encode('utf-8')
    recursive_str = str(recursive).encode('utf-8')

    # Use SHA256 instead of MD5 for better security
    # MD5 is cryptographically broken and should not be used
    hash_obj = hashlib.sha256()
    hash_obj.update(dir_path)
    hash_obj.update(formats_str)
    hash_obj.update(recursive_str)

    return hash_obj.hexdigest()[:16]  # Use first 16 chars of hash for uniqueness

def _deduplicate(seq):
    """Return a list with duplicates removed while preserving order."""
    seen = set()
    deduped = []
    for item in seq:
        if item not in seen:
            deduped.append(item)
            seen.add(item)
    return deduped


def validate_file_security(file_path, check_size=True, check_dimensions=True):
    """
    Perform security validation on a file before processing.

    Args:
        file_path: Path to the file
        check_size: Whether to check file size limits
        check_dimensions: Whether to check image dimension limits

    Returns:
        (is_safe, warnings) - tuple of boolean and list of warning messages

    Raises:
        ValueError: If file fails critical security checks
    """
    warnings = []

    # Check if file exists
    if not os.path.exists(file_path):
        raise ValueError(f"File does not exist: {file_path}")

    # Check file size to prevent DoS via huge files
    if check_size:
        file_size = os.path.getsize(file_path)
        if file_size > MAX_FILE_SIZE:
            raise ValueError(f"File too large ({file_size} bytes, max {MAX_FILE_SIZE}). "
                           f"This could indicate a malicious file or decompression bomb.")

        # Warn about suspiciously large files (over 10MB for images is unusual)
        if file_size > 10 * 1024 * 1024:
            warnings.append(f"Large file size: {humanize.naturalsize(file_size)}")

    # Check image dimensions to prevent decompression bombs
    if check_dimensions:
        try:
            with Image.open(file_path) as img:
                width, height = img.size
                total_pixels = width * height

                if total_pixels > MAX_IMAGE_PIXELS:
                    raise ValueError(f"Image dimensions too large ({width}x{height} = {total_pixels} pixels, "
                                   f"max {MAX_IMAGE_PIXELS}). This could be a decompression bomb attack.")

                # Warn about very large images
                if total_pixels > 10000 * 10000:
                    warnings.append(f"Large image dimensions: {width}x{height}")

                # Check for format mismatch (file extension vs actual format)
                actual_format = img.format
                expected_formats = []
                for fmt, extensions in SUPPORTED_FORMATS.items():
                    if file_path.lower().endswith(extensions):
                        expected_formats.append(fmt)

                if actual_format and expected_formats and actual_format not in expected_formats:
                    warnings.append(f"Format mismatch: file has '{file_path.split('.')[-1]}' extension "
                                  f"but is actually '{actual_format}' format")

        except UnidentifiedImageError:
            raise ValueError(f"Cannot identify image format - file may be corrupted or malicious")
        except Exception as e:
            raise ValueError(f"Error validating image: {str(e)}")

    return True, warnings


def calculate_file_hash(file_path, algorithm='sha256'):
    """
    Calculate cryptographic hash of a file.

    Args:
        file_path: Path to the file
        algorithm: Hash algorithm to use (sha256, sha512, etc.)

    Returns:
        Hexadecimal hash string
    """
    hash_obj = hashlib.new(algorithm)

    # Read file in chunks to handle large files
    with open(file_path, 'rb') as f:
        for chunk in iter(lambda: f.read(4096), b''):
            hash_obj.update(chunk)

    return hash_obj.hexdigest()


def safe_join_path(base_dir, user_path):
    """
    Safely join paths and prevent path traversal attacks.

    Args:
        base_dir: Base directory (trusted)
        user_path: User-provided path component (untrusted)

    Returns:
        Safe absolute path within base_dir

    Raises:
        ValueError: If path traversal is detected
    """
    # Normalize base directory
    base_dir = os.path.abspath(base_dir)

    # Join paths
    full_path = os.path.normpath(os.path.join(base_dir, user_path))

    # Resolve any symlinks
    full_path = os.path.abspath(full_path)

    # Ensure the result is within base_dir
    if not full_path.startswith(base_dir + os.sep) and full_path != base_dir:
        raise ValueError(f"Path traversal detected: '{user_path}' resolves outside base directory")

    return full_path


def save_progress(session_id, directory, formats, recursive, processed_files,
                 bad_files, repaired_files, progress_dir=DEFAULT_PROGRESS_DIR):
    """Save the current progress to a file."""
    # Create progress directory if it doesn't exist
    if not os.path.exists(progress_dir):
        os.makedirs(progress_dir, exist_ok=True)

    # Create a progress state object
    progress_state = {
        'version': VERSION,
        'timestamp': datetime.now().isoformat(),
        'directory': str(directory),
        'formats': formats,
        'recursive': recursive,
        'processed_files': _deduplicate(processed_files),
        'bad_files': _deduplicate(bad_files),
        'repaired_files': _deduplicate(repaired_files)
    }

    # Save to file using JSON instead of pickle for security
    # This prevents arbitrary code execution via malicious progress files
    progress_file = os.path.join(progress_dir, f"session_{session_id}.progress.json")
    with open(progress_file, 'w') as f:
        json.dump(progress_state, f, indent=2)

    logging.debug(f"Progress saved to {progress_file}")
    return progress_file

def load_progress(session_id, progress_dir=DEFAULT_PROGRESS_DIR):
    """Load progress from a saved session."""
    # Try new JSON format first (more secure)
    progress_file_json = os.path.join(progress_dir, f"session_{session_id}.progress.json")
    progress_file_legacy = os.path.join(progress_dir, f"session_{session_id}.progress")

    # Prefer JSON format for security
    if os.path.exists(progress_file_json):
        progress_file = progress_file_json
        use_json = True
    elif os.path.exists(progress_file_legacy):
        progress_file = progress_file_legacy
        use_json = False
        logging.warning("Loading legacy pickle format. This format is deprecated for security reasons.")
    else:
        return None

    try:
        if use_json:
            # Secure JSON deserialization
            with open(progress_file, 'r') as f:
                progress_state = json.load(f)
        else:
            # Legacy pickle support (with warning)
            # TODO: Remove pickle support in future versions
            import pickle
            with open(progress_file, 'rb') as f:
                progress_state = pickle.load(f)
            logging.warning("SECURITY WARNING: Loaded progress file using unsafe pickle format. "
                          "Please delete old .progress files and use new .progress.json format.")

        # Remove any duplicate entries from lists
        for key in ('processed_files', 'bad_files', 'repaired_files'):
            if key in progress_state:
                progress_state[key] = _deduplicate(progress_state[key])

        # Check version compatibility
        if progress_state.get('version', '0.0.0') != VERSION:
            logging.warning("Progress file was created with a different version. Some incompatibilities may exist.")

        logging.info(f"Loaded progress from {progress_file}")
        return progress_state
    except Exception as e:
        logging.error(f"Failed to load progress: {str(e)}")
        return None

def list_saved_sessions(progress_dir=DEFAULT_PROGRESS_DIR):
    """List all saved sessions with their details."""
    if not os.path.exists(progress_dir):
        return []

    sessions = []
    for filename in os.listdir(progress_dir):
        # Support both new JSON format and legacy pickle format
        if filename.endswith('.progress.json') or filename.endswith('.progress'):
            try:
                filepath = os.path.join(progress_dir, filename)
                use_json = filename.endswith('.progress.json')

                if use_json:
                    with open(filepath, 'r') as f:
                        progress_state = json.load(f)
                else:
                    # Legacy pickle format
                    import pickle
                    with open(filepath, 'rb') as f:
                        progress_state = pickle.load(f)

                # Extract session ID from filename
                if filename.endswith('.progress.json'):
                    session_id = filename.replace('session_', '').replace('.progress.json', '')
                else:
                    session_id = filename.replace('session_', '').replace('.progress', '')

                session_info = {
                    'id': session_id,
                    'timestamp': progress_state.get('timestamp', 'Unknown'),
                    'directory': progress_state.get('directory', 'Unknown'),
                    'formats': progress_state.get('formats', []),
                    'processed_count': len(progress_state.get('processed_files', [])),
                    'bad_count': len(progress_state.get('bad_files', [])),
                    'repaired_count': len(progress_state.get('repaired_files', [])),
                    'filepath': filepath,
                    'format': 'JSON' if use_json else 'Pickle (Legacy)'
                }
                sessions.append(session_info)
            except Exception as e:
                logging.debug(f"Failed to load session from {filename}: {str(e)}")

    # Sort by timestamp, newest first
    sessions.sort(key=lambda x: x['timestamp'], reverse=True)
    return sessions

def get_extensions_for_formats(formats):
    """Get all file extensions for the specified formats."""
    extensions = []
    for fmt in formats:
        if fmt in SUPPORTED_FORMATS:
            extensions.extend(SUPPORTED_FORMATS[fmt])
    return tuple(extensions)

def find_image_files(directory, formats, recursive=True):
    """Find all image files of specified formats in a directory."""
    image_files = []
    extensions = get_extensions_for_formats(formats)
    
    if not extensions:
        logging.warning("No valid image formats specified!")
        return []
    
    format_names = ", ".join(formats)
    if recursive:
        logging.info(f"Recursively scanning for {format_names} files...")
        for root, _, files in os.walk(directory):
            for file in files:
                if file.lower().endswith(extensions):
                    image_files.append(os.path.join(root, file))
    else:
        logging.info(f"Scanning for {format_names} files in {directory} (non-recursive)...")
        for file in os.listdir(directory):
            if os.path.isfile(os.path.join(directory, file)) and file.lower().endswith(extensions):
                image_files.append(os.path.join(directory, file))
    
    logging.info(f"Found {len(image_files)} image files")
    return image_files

def process_images(directory, formats, dry_run=True, repair=False,
                  max_workers=None, recursive=True, move_to=None, repair_dir=None,
                  save_progress_interval=5, resume_session=None, progress_dir=DEFAULT_PROGRESS_DIR,
                  thorough_check=False, sensitivity='medium', ignore_eof=False, check_visual=False,
                  visual_strictness='medium', enable_security_checks=False):
    """Find corrupt image files and optionally repair, delete, or move them."""
    start_time = time.time()
    
    # Generate session ID for this scan
    session_id = get_session_id(directory, formats, recursive)
    processed_files = []
    bad_files = []
    repaired_files = []
    total_size_saved = 0
    last_progress_save = time.time()
    
    # If resuming, load previous progress
    if resume_session:
        try:
            progress = load_progress(resume_session, progress_dir)
            if progress and progress['directory'] == str(directory) and progress['formats'] == formats:
                processed_files = progress['processed_files']
                bad_files = progress['bad_files']
                repaired_files = progress['repaired_files']
                logging.info(f"Resuming session: {len(processed_files)} files already processed")
            else:
                if progress:
                    logging.warning("Session parameters don't match current parameters. Starting fresh scan.")
                else:
                    logging.warning(f"Couldn't find session {resume_session}. Starting fresh scan.")
        except Exception as e:
            logging.error(f"Error loading session: {str(e)}. Starting fresh scan.")
    
    # Find all image files
    image_files = find_image_files(directory, formats, recursive)
    if not image_files:
        logging.warning("No image files found!")
        return [], [], 0
    
    # Filter out already processed files if resuming
    if processed_files:
        remaining_files = [f for f in image_files if f not in processed_files]
        skipped_count = len(image_files) - len(remaining_files)
        image_files = remaining_files
        logging.info(f"Skipping {skipped_count} already processed files")
        
    if not image_files:
        logging.info("All files have already been processed in the previous session!")
        return bad_files, repaired_files, total_size_saved
        
    # Create directories if they don't exist
    if move_to and not os.path.exists(move_to):
        os.makedirs(move_to)
        logging.info(f"Created directory for corrupt files: {move_to}")
    
    if repair and repair_dir and not os.path.exists(repair_dir):
        os.makedirs(repair_dir)
        logging.info(f"Created directory for backup files: {repair_dir}")
    
    # Prepare input arguments for workers
    input_args = [(file_path, repair, repair_dir, thorough_check, sensitivity, ignore_eof, check_visual, visual_strictness, enable_security_checks) for file_path in image_files]
    
    # Process files in parallel
    logging.info("Processing files in parallel...")
    
    # Create a custom progress bar class that saves progress periodically
    class ProgressSavingBar(tqdm_auto.tqdm):
        def update(self, n=1):
            nonlocal last_progress_save, processed_files
            result = super().update(n)
            
            # Save progress periodically
            current_time = time.time()
            if save_progress_interval > 0 and current_time - last_progress_save >= save_progress_interval * 60:
                # Save the progress using the list of files that have actually
                # completed processing. ``processed_files`` is updated as each
                # future finishes so we can safely persist it as-is.
                save_progress(
                    session_id,
                    directory,
                    formats,
                    recursive,
                    processed_files,
                    bad_files,
                    repaired_files,
                    progress_dir,
                )
                
                last_progress_save = current_time
                logging.debug(f"Progress saved at {self.n} / {len(image_files)} files")
            
            return result
    
    try:
        with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
            # Colorful progress bar with progress saving
            results = []
            futures = {executor.submit(process_file, arg): arg[0] for arg in input_args}
            
            with ProgressSavingBar(
                total=len(image_files),
                desc=f"{colorama.Fore.BLUE}Checking image files{colorama.Style.RESET_ALL}",
                unit="file",
                bar_format="{desc}: {percentage:3.0f}%|{bar:30}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
                colour="blue"
            ) as pbar:
                for future in concurrent.futures.as_completed(futures):
                    file_path = futures[future]
                    try:
                        result = future.result()
                        results.append(result)
                        
                        # Track this file as processed for resuming later if needed
                        processed_files.append(file_path)
                        
                        # Update progress for successful or failed processing
                        pbar.update(1)
                        
                        # Update our tracking of bad/repaired files in real-time for progress saving
                        file_path, is_valid, size, repair_status, repair_msg, dimensions = result
                        if repair_status == "repaired":
                            repaired_files.append(file_path)
                        elif not is_valid:
                            bad_files.append(file_path)
                        
                    except Exception as e:
                        logging.error(f"Error processing {file_path}: {str(e)}")
                        pbar.update(1)
    except KeyboardInterrupt:
        # If the user interrupts, save progress before exiting
        logging.warning("Process interrupted by user. Saving progress...")
        save_progress(session_id, directory, formats, recursive, 
                     processed_files, bad_files, repaired_files, progress_dir)
        logging.info(f"Progress saved. You can resume with --resume {session_id}")
        raise
    
    # Process results
    total_size_saved = 0
    for file_path, is_valid, size, repair_status, repair_msg, dimensions in results:
        if repair_status == "repaired":
            # File was successfully repaired (already added to repaired_files during processing)
            width, height = dimensions
            msg = f"Repaired: {file_path} ({width}x{height}) - {repair_msg}"
            logging.info(msg)
        elif not is_valid:
            # File is corrupt and wasn't repaired (or repair failed)
            # (already added to bad_files during processing)
            total_size_saved += size
            
            size_str = humanize.naturalsize(size)
            if repair_status == "repair_failed":
                fail_msg = f"Repair failed: {file_path} ({size_str}) - {repair_msg}"
                logging.warning(fail_msg)
                
            if dry_run:
                msg = f"Would delete: {file_path} ({size_str})"
                logging.info(msg)
            elif move_to:
                # Preserve the subdirectory structure by getting the relative path from the search directory
                try:
                    # Get the relative path from the base directory
                    rel_path = os.path.relpath(file_path, str(directory))
                    # If relpath starts with ".." it means file_path is not within directory
                    # In this case, just use the basename as fallback
                    if rel_path.startswith('..'):
                        rel_path = os.path.basename(file_path)

                    # Use safe path joining to prevent path traversal attacks
                    # This ensures files can't be written outside the move_to directory
                    try:
                        dest_path = safe_join_path(move_to, rel_path)
                    except ValueError as ve:
                        logging.error(f"Security error moving {file_path}: {ve}")
                        continue

                    # Create parent directories if they don't exist
                    os.makedirs(os.path.dirname(dest_path), exist_ok=True)

                    # Use shutil.move instead of os.rename to handle cross-device file movements
                    shutil.move(file_path, dest_path)

                    # Add arrow with color
                    arrow = f"{colorama.Fore.CYAN}→{colorama.Style.RESET_ALL}"
                    msg = f"Moved: {file_path} {arrow} {dest_path} ({size_str})"
                    logging.info(msg)
                except Exception as e:
                    logging.error(f"Failed to move {file_path}: {e}")
            else:
                try:
                    os.remove(file_path)
                    msg = f"Deleted: {file_path} ({size_str})"
                    logging.info(msg)
                except Exception as e:
                    logging.error(f"Failed to delete {file_path}: {e}")
    
    # Final progress save
    save_progress(session_id, directory, formats, recursive, 
                 processed_files, bad_files, repaired_files, progress_dir)
    
    elapsed = time.time() - start_time
    logging.info(f"Processed {len(processed_files)} files in {elapsed:.2f} seconds")
    logging.info(f"Session ID: {session_id} (use --resume {session_id} to resume if needed)")
    
    return bad_files, repaired_files, total_size_saved

def print_banner():
    """Print 2PAC-themed ASCII art banner"""
    banner = r"""
    ░▒▓███████▓▒░░▒▓███████▓▒░ ░▒▓██████▓▒░ ░▒▓██████▓▒░
           ░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░
           ░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░
     ░▒▓██████▓▒░░▒▓███████▓▒░░▒▓████████▓▒░▒▓█▓▒░
    ░▒▓█▓▒░      ░▒▓█▓▒░      ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░
    ░▒▓█▓▒░      ░▒▓█▓▒░      ░▒▓█▓▒░░▒▓█▓▒░▒▓█▓▒░░▒▓█▓▒░
    ░▒▓████████▓▒░▒▓█▓▒░      ░▒▓█▓▒░░▒▓█▓▒░░▒▓██████▓▒░
    ╔═════════════════════════════════════════════════════════╗
    ║ The Picture Analyzer & Corruption killer                ║
    ║ In memory of Jeff Young - Bringing people together      ║
    ╚═════════════════════════════════════════════════════════╝
    """
    
    # Colored version of the banner, highlighting PAC for Picture Analyzer Corruption
    if 'colorama' in sys.modules:
        banner_lines = banner.strip().split('\n')
        colored_banner = []
        
        # Color the new gradient ASCII art logo (lines 0-6)
        for i, line in enumerate(banner_lines):
            if i < 7:  # The ASCII art logo lines for the new gradient style
                # For "2" part (first column)
                part1 = line[:11]
                # For "P" part (second column)
                part2 = line[11:24]
                # For "A" part (third column)
                part3 = line[24:38]
                # For "C" part (fourth column)
                part4 = line[38:]
                
                colored_line = f"{colorama.Fore.WHITE}{part1}" + \
                               f"{colorama.Fore.RED}{part2}" + \
                               f"{colorama.Fore.GREEN}{part3}" + \
                               f"{colorama.Fore.BLUE}{part4}{colorama.Style.RESET_ALL}"
                               
                colored_banner.append(colored_line)
            elif i >= 7 and i <= 10:  # The box and text lines
                if i == 8:  # Title line with PAC highlighted
                    parts = line.split("Picture Analyzer & Corruption")
                    if len(parts) == 2:
                        prefix = parts[0]
                        suffix = parts[1]
                        colored_title = f"{colorama.Fore.YELLOW}{prefix}" + \
                                       f"{colorama.Fore.RED}Picture " + \
                                       f"{colorama.Fore.GREEN}Analyzer " + \
                                       f"{colorama.Fore.WHITE}& " + \
                                       f"{colorama.Fore.BLUE}Corruption" + \
                                       f"{colorama.Fore.YELLOW}{suffix}{colorama.Style.RESET_ALL}"
                        colored_banner.append(colored_title)
                    else:
                        colored_banner.append(f"{colorama.Fore.YELLOW}{line}{colorama.Style.RESET_ALL}")
                elif i == 9:  # Jeff Young tribute line
                    colored_banner.append(f"{colorama.Fore.CYAN}{line}{colorama.Style.RESET_ALL}")
                else:  # Box border lines
                    colored_banner.append(f"{colorama.Fore.YELLOW}{line}{colorama.Style.RESET_ALL}")
            else:
                colored_banner.append(f"{colorama.Fore.WHITE}{line}{colorama.Style.RESET_ALL}")
        
        print('\n'.join(colored_banner))
    else:
        print(banner)
    print()

def main():
    print_banner()
    
    # Check for 'q' command to quit
    if len(sys.argv) == 2 and sys.argv[1].lower() == 'q':
        print(f"{colorama.Fore.YELLOW}Exiting 2PAC. Stay safe!{colorama.Style.RESET_ALL}")
        sys.exit(0)
    
    parser = argparse.ArgumentParser(
        description='2PAC: The Picture Analyzer & Corruption killer',
        epilog='Created by Richard Young - "All Eyez On Your Images" - https://github.com/ricyoung/2pac'
    )
    
    # Main action (mutually exclusive)
    action_group = parser.add_mutually_exclusive_group()
    action_group.add_argument('directory', nargs='?', help='Directory to search for image files')
    action_group.add_argument('--list-sessions', action='store_true', help='List all saved sessions')
    action_group.add_argument('--check-file', type=str, help='Check a specific file for corruption (useful for testing)')
    
    # Basic options
    parser.add_argument('--delete', action='store_true', help='Delete corrupt image files (without this flag, runs in dry-run mode)')
    parser.add_argument('--move-to', type=str, help='Move corrupt files to this directory instead of deleting them')
    parser.add_argument('--workers', type=int, default=None, help='Number of worker processes (default: CPU count)')
    parser.add_argument('--non-recursive', action='store_true', help='Only search in the specified directory, not subdirectories')
    parser.add_argument('--output', type=str, help='Save list of corrupt files to this file')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
    parser.add_argument('--no-color', action='store_true', help='Disable colored output')
    parser.add_argument('--version', action='version', version=f'Bad Image Finder v{VERSION} by Richard Young')
    
    # Repair options
    repair_group = parser.add_argument_group('Repair options')
    repair_group.add_argument('--repair', action='store_true', help='Attempt to repair corrupt image files')
    repair_group.add_argument('--backup-dir', type=str, help='Directory to store backups of files before repair')
    repair_group.add_argument('--repair-report', type=str, help='Save list of repaired files to this file')
    
    # Format options
    format_group = parser.add_argument_group('Image format options')
    format_group.add_argument('--formats', type=str, nargs='+', choices=SUPPORTED_FORMATS.keys(), 
                             help=f'Image formats to check (default: all formats)')
    format_group.add_argument('--jpeg', action='store_true', help='Check JPEG files only')
    format_group.add_argument('--png', action='store_true', help='Check PNG files only')
    format_group.add_argument('--tiff', action='store_true', help='Check TIFF files only')
    format_group.add_argument('--gif', action='store_true', help='Check GIF files only')
    format_group.add_argument('--bmp', action='store_true', help='Check BMP files only')
    
    # Validation options
    validation_group = parser.add_argument_group('Validation options')
    validation_group.add_argument('--thorough', action='store_true',
                                 help='Perform thorough image validation (slower but catches more subtle corruption)')
    validation_group.add_argument('--sensitivity', type=str, choices=['low', 'medium', 'high'], default='medium',
                                help='Set validation sensitivity level: low (basic checks), medium (standard checks), high (most strict)')
    validation_group.add_argument('--ignore-eof', action='store_true',
                                help='Ignore missing end-of-file markers (useful for truncated but viewable files)')
    validation_group.add_argument('--check-visual', action='store_true',
                                help='Analyze image content to detect visible corruption like gray/black areas')
    validation_group.add_argument('--visual-strictness', type=str, choices=['low', 'medium', 'high'], default='medium',
                                help='Set strictness level for visual corruption detection: low (most permissive), medium (balanced), high (only clear corruption)')

    # Security options
    security_group = parser.add_argument_group('Security options')
    security_group.add_argument('--security-checks', action='store_true',
                               help='Enable enhanced security validation (file size limits, dimension checks, format verification)')
    security_group.add_argument('--max-file-size', type=int, default=MAX_FILE_SIZE,
                               help=f'Maximum file size in bytes to process (default: {MAX_FILE_SIZE} = 100MB)')
    security_group.add_argument('--max-pixels', type=int, default=MAX_IMAGE_PIXELS,
                               help=f'Maximum image dimensions in pixels (default: {MAX_IMAGE_PIXELS} = 50MP)')
    
    # Progress saving options
    progress_group = parser.add_argument_group('Progress options')
    progress_group.add_argument('--save-interval', type=int, default=5, 
                              help='Save progress every N minutes (0 to disable progress saving)')
    progress_group.add_argument('--progress-dir', type=str, default=DEFAULT_PROGRESS_DIR,
                               help='Directory to store progress files')
    progress_group.add_argument('--resume', type=str, metavar='SESSION_ID',
                              help='Resume from a previously saved session')
    
    args = parser.parse_args()

    # Setup logging
    setup_logging(args.verbose, args.no_color)
    
    # Handle specific file check mode
    if args.check_file:
        file_path = args.check_file
        if not os.path.exists(file_path):
            logging.error(f"Error: File not found: {file_path}")
            sys.exit(1)
            
        print(f"\n{colorama.Style.BRIGHT}Checking file: {file_path}{colorama.Style.RESET_ALL}\n")
        
        # Basic check
        print(f"{colorama.Fore.CYAN}Basic validation:{colorama.Style.RESET_ALL}")
        try:
            with Image.open(file_path) as img:
                print(f"✓ File can be opened by PIL")
                print(f"  Format: {img.format}")
                print(f"  Mode: {img.mode}")
                print(f"  Size: {img.size[0]}x{img.size[1]}")
                
                try:
                    img.verify()
                    print(f"✓ Header verification passed")
                except Exception as e:
                    print(f"❌ Header verification failed: {str(e)}")
                
                try:
                    with Image.open(file_path) as img2:
                        img2.load()
                    print(f"✓ Data loading test passed")
                except Exception as e:
                    print(f"❌ Data loading test failed: {str(e)}")
        except Exception as e:
            print(f"❌ Cannot open file with PIL: {str(e)}")
        
        # Detailed format-specific checks
        if file_path.lower().endswith(tuple(SUPPORTED_FORMATS['JPEG'])):
            print(f"\n{colorama.Fore.CYAN}JPEG structure checks:{colorama.Style.RESET_ALL}")
            is_valid, msg = check_jpeg_structure(file_path)
            if is_valid:
                print(f"✓ JPEG structure valid: {msg}")
            else:
                print(f"❌ JPEG structure invalid: {msg}")
        elif file_path.lower().endswith(tuple(SUPPORTED_FORMATS['PNG'])):
            print(f"\n{colorama.Fore.CYAN}PNG structure checks:{colorama.Style.RESET_ALL}")
            is_valid, msg = check_png_structure(file_path)
            if is_valid:
                print(f"✓ PNG structure valid: {msg}")
            else:
                print(f"❌ PNG structure invalid: {msg}")
        
        # Decode test
        print(f"\n{colorama.Fore.CYAN}Full decode test:{colorama.Style.RESET_ALL}")
        is_valid, msg = try_full_decode_check(file_path)
        if is_valid:
            print(f"✓ Full decode test passed: {msg}")
        else:
            print(f"❌ Full decode test failed: {msg}")
        
        # External tools check
        print(f"\n{colorama.Fore.CYAN}External tools check:{colorama.Style.RESET_ALL}")
        is_valid, msg = try_external_tools(file_path)
        if is_valid:
            print(f"✓ External tools: {msg}")
        else:
            print(f"❌ External tools: {msg}")
            
        # Visual corruption check
        print(f"\n{colorama.Fore.CYAN}Visual content analysis:{colorama.Style.RESET_ALL}")
        is_visually_corrupt, vis_msg = check_visual_corruption(file_path)
        if not is_visually_corrupt:
            print(f"✓ No visual corruption detected: {vis_msg}")
        else:
            print(f"❌ {vis_msg}")
            
        # Final verdict
        print(f"\n{colorama.Fore.CYAN}Final verdict:{colorama.Style.RESET_ALL}")
        is_valid_basic = is_valid_image(file_path, thorough=False)
        is_valid_thorough = is_valid_image(file_path, thorough=True)
        is_valid_visual = not is_visually_corrupt
        
        if is_valid_basic and is_valid_thorough and is_valid_visual:
            print(f"{colorama.Fore.GREEN}This file appears to be valid by all checks.{colorama.Style.RESET_ALL}")
        elif not is_valid_visual:
            print(f"{colorama.Fore.RED}This file shows visible corruption in the image content.{colorama.Style.RESET_ALL}")
            print(f"Recommendation: Use --check-visual to detect this type of corruption.")
        elif is_valid_basic and not is_valid_thorough:
            print(f"{colorama.Fore.YELLOW}This file passes basic validation but fails thorough checks.{colorama.Style.RESET_ALL}")
            print(f"Recommendation: Use --thorough mode to detect this type of corruption.")
        else:
            print(f"{colorama.Fore.RED}This file is corrupt and would be detected by the basic scan.{colorama.Style.RESET_ALL}")
            
        sys.exit(0)
    
    # Handle session listing mode
    if args.list_sessions:
        sessions = list_saved_sessions(args.progress_dir)
        if sessions:
            print(f"\n{colorama.Style.BRIGHT}Saved Sessions:{colorama.Style.RESET_ALL}")
            for i, session in enumerate(sessions):
                ts = datetime.fromisoformat(session['timestamp']).strftime('%Y-%m-%d %H:%M:%S')
                print(f"\n{colorama.Fore.CYAN}Session ID: {session['id']}{colorama.Style.RESET_ALL}")
                print(f"  Created: {ts}")
                print(f"  Directory: {session['directory']}")
                print(f"  Formats: {', '.join(session['formats'])}")
                print(f"  Progress: {session['processed_count']} files processed, "
                      f"{session['bad_count']} corrupt, {session['repaired_count']} repaired")
                
                # Show resume command
                resume_cmd = f"find_bad_images.py --resume {session['id']}"
                if os.path.exists(session['directory']):
                    print(f"  {colorama.Fore.GREEN}Resume command: {resume_cmd}{colorama.Style.RESET_ALL}")
                else:
                    print(f"  {colorama.Fore.YELLOW}Directory no longer exists, cannot resume{colorama.Style.RESET_ALL}")
        else:
            print("No saved sessions found.")
        sys.exit(0)
    
    # Check if directory is specified for a new scan
    if not args.directory and not args.resume:
        logging.error("Error: You must specify a directory to scan or use --resume to continue a session")
        sys.exit(1)
    
    # If we're resuming without a directory, load from previous session
    directory = None
    if args.resume and not args.directory:
        progress = load_progress(args.resume, args.progress_dir)
        if progress:
            directory = Path(progress['directory'])
            logging.info(f"Using directory from saved session: {directory}")
        else:
            logging.error(f"Could not load session {args.resume}")
            sys.exit(1)
    elif args.directory:
        directory = Path(args.directory)
    
    # Verify the directory exists
    if not directory.exists() or not directory.is_dir():
        logging.error(f"Error: {directory} is not a valid directory")
        sys.exit(1)
    
    # Check for incompatible options
    if args.delete and args.move_to:
        logging.error("Error: Cannot use both --delete and --move-to options")
        sys.exit(1)
    
    # Determine which formats to check
    formats = []
    if args.formats:
        formats = args.formats
    elif args.jpeg:
        formats.append('JPEG')
    elif args.png:
        formats.append('PNG')
    elif args.tiff:
        formats.append('TIFF')
    elif args.gif:
        formats.append('GIF')
    elif args.bmp:
        formats.append('BMP')
    else:
        # Default: check all formats
        formats = DEFAULT_FORMATS
    
    dry_run = not (args.delete or args.move_to)
    
    # Colorful mode indicators
    if args.repair:
        mode_str = f"{colorama.Fore.MAGENTA}REPAIR MODE{colorama.Style.RESET_ALL}: Attempting to fix corrupt files"
        logging.info(mode_str)
        
        repairable_formats = [fmt for fmt in formats if fmt in REPAIRABLE_FORMATS]
        if repairable_formats:
            logging.info(f"Repairable formats: {', '.join(repairable_formats)}")
        else:
            logging.warning("None of the selected formats support repair")
    
    if dry_run:
        mode_str = f"{colorama.Fore.YELLOW}DRY RUN MODE{colorama.Style.RESET_ALL}: No files will be deleted or moved"
        logging.info(mode_str)
    elif args.move_to:
        mode_str = f"{colorama.Fore.BLUE}MOVE MODE{colorama.Style.RESET_ALL}: Corrupt files will be moved to {args.move_to}"
        logging.info(mode_str)
    else:
        mode_str = f"{colorama.Fore.RED}DELETE MODE{colorama.Style.RESET_ALL}: Corrupt files will be permanently deleted"
        logging.info(mode_str)
    
    # Add progress saving info
    if args.save_interval > 0:
        save_interval_str = f"{colorama.Fore.CYAN}PROGRESS SAVING{colorama.Style.RESET_ALL}: Every {args.save_interval} minutes"
        logging.info(save_interval_str)
    else:
        logging.info("Progress saving is disabled")
    
    if args.resume:
        resume_str = f"{colorama.Fore.CYAN}RESUMING{colorama.Style.RESET_ALL}: From session {args.resume}"
        logging.info(resume_str)
    
    if args.thorough:
        thorough_str = f"{colorama.Fore.MAGENTA}THOROUGH MODE{colorama.Style.RESET_ALL}: Using deep validation checks (slower but more accurate)"
        logging.info(thorough_str)
        
    # Show sensitivity level
    sensitivity_colors = {
        'low': colorama.Fore.GREEN,
        'medium': colorama.Fore.YELLOW,
        'high': colorama.Fore.RED
    }
    sensitivity_color = sensitivity_colors.get(args.sensitivity, colorama.Fore.YELLOW)
    sensitivity_str = f"{sensitivity_color}SENSITIVITY: {args.sensitivity.upper()}{colorama.Style.RESET_ALL}"
    logging.info(sensitivity_str)
    
    # Show EOF handling
    if args.ignore_eof:
        eof_str = f"{colorama.Fore.CYAN}IGNORING EOF MARKERS{colorama.Style.RESET_ALL}: Allowing truncated but viewable files"
        logging.info(eof_str)
        
    # Show visual corruption checking status
    if args.check_visual:
        strictness_color = {
            'low': colorama.Fore.GREEN,
            'medium': colorama.Fore.YELLOW,
            'high': colorama.Fore.RED
        }.get(args.visual_strictness, colorama.Fore.YELLOW)

        visual_str = f"{colorama.Fore.MAGENTA}VISUAL CHECK{colorama.Style.RESET_ALL}: " + \
                     f"Analyzing image content (strictness: {strictness_color}{args.visual_strictness.upper()}{colorama.Style.RESET_ALL})"
        logging.info(visual_str)

    # Show security checks status
    if args.security_checks:
        security_str = f"{colorama.Fore.RED}SECURITY CHECKS ENABLED{colorama.Style.RESET_ALL}: " + \
                      f"Validating file sizes (max {humanize.naturalsize(MAX_FILE_SIZE)}), " + \
                      f"dimensions (max {MAX_IMAGE_PIXELS:,} pixels), and format integrity"
        logging.info(security_str)

    # Show which formats we're checking
    format_list = ", ".join(formats)
    logging.info(f"Checking image formats: {format_list}")
    logging.info(f"Searching for corrupt image files in {directory}")
    
    try:
        bad_files, repaired_files, total_size_saved = process_images(
            directory,
            formats,
            dry_run=dry_run,
            repair=args.repair,
            max_workers=args.workers,
            recursive=not args.non_recursive,
            move_to=args.move_to,
            repair_dir=args.backup_dir,
            save_progress_interval=args.save_interval,
            resume_session=args.resume,
            progress_dir=args.progress_dir,
            thorough_check=args.thorough,
            sensitivity=args.sensitivity,
            ignore_eof=args.ignore_eof,
            check_visual=args.check_visual,
            visual_strictness=args.visual_strictness,
            enable_security_checks=args.security_checks
        )
        
        # Colorful summary
        count_color = colorama.Fore.RED if bad_files else colorama.Fore.GREEN
        file_count = f"{count_color}{len(bad_files)}{colorama.Style.RESET_ALL}"
        logging.info(f"Found {file_count} corrupt image files")
        
        if args.repair:
            repair_color = colorama.Fore.GREEN if repaired_files else colorama.Fore.YELLOW
            repair_count = f"{repair_color}{len(repaired_files)}{colorama.Style.RESET_ALL}"
            logging.info(f"Successfully repaired {repair_count} files")
            
            if args.repair_report and repaired_files:
                with open(args.repair_report, 'w') as f:
                    for file_path in repaired_files:
                        f.write(f"{file_path}\n")
                logging.info(f"Saved list of repaired files to {args.repair_report}")
        
        savings_str = humanize.naturalsize(total_size_saved)
        savings_color = colorama.Fore.GREEN if total_size_saved > 0 else colorama.Fore.RESET
        savings_msg = f"Total space savings: {savings_color}{savings_str}{colorama.Style.RESET_ALL}"
        logging.info(savings_msg)
        
        if not args.no_color:
            # Add signature at the end of the run
            signature = f"\n{colorama.Fore.CYAN}2PAC v{VERSION} by Richard Young{colorama.Style.RESET_ALL}"
            quote = f"{colorama.Fore.YELLOW}\"{random.choice(QUOTES)}\"{colorama.Style.RESET_ALL}"
            print(signature)
            print(quote)
        
        # Save list of corrupt files if requested
        if args.output and bad_files:
            with open(args.output, 'w') as f:
                for file_path in bad_files:
                    f.write(f"{file_path}\n")
            logging.info(f"Saved list of corrupt files to {args.output}")
        
        if bad_files and dry_run:
            logging.info("Run with --delete to remove these files or --move-to to relocate them")
            
    except KeyboardInterrupt:
        logging.info("Operation cancelled by user")
        sys.exit(130)
    except Exception as e:
        logging.error(f"Error: {str(e)}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()