File size: 18,504 Bytes

820ba22

#!/usr/bin/env python3
"""
Convert SimpleTuner LoRA weights to diffusers-compatible format for AuraFlow.

This script converts LoRA weights saved by SimpleTuner into a format that can be
directly loaded by diffusers' load_lora_weights() method.

Usage:
    python convert_simpletuner_lora.py <input_lora.safetensors> <output_lora.safetensors>

Example:
    python convert_simpletuner_lora.py input_lora.safetensors diffusers_compatible_lora.safetensors
"""

import argparse
import sys
from pathlib import Path
from typing import Dict

import safetensors.torch
import torch


def detect_lora_format(state_dict: Dict[str, torch.Tensor]) -> str:
    """
    Detect the format of the LoRA state dict.
    
    Returns:
        "peft" if already in PEFT/diffusers format
        "mixed" if mixed format (some lora_A/B, some lora.down/up)
        "simpletuner_transformer" if in SimpleTuner format with transformer prefix
        "simpletuner_auraflow" if in SimpleTuner AuraFlow format
        "kohya" if in Kohya format
        "unknown" otherwise
    """
    keys = list(state_dict.keys())
    
    # Check the actual weight naming convention (lora_A/lora_B vs lora_down/lora_up)
    has_lora_a_b = any((".lora_A." in k or ".lora_B." in k) for k in keys)
    has_lora_down_up = any((".lora_down." in k or ".lora_up." in k) for k in keys)
    has_lora_dot_down_up = any((".lora.down." in k or ".lora.up." in k) for k in keys)
    
    # Check prefixes
    has_transformer_prefix = any(k.startswith("transformer.") for k in keys)
    has_lora_transformer_prefix = any(k.startswith("lora_transformer_") for k in keys)
    has_lora_unet_prefix = any(k.startswith("lora_unet_") for k in keys)
    
    # Mixed format: has both lora_A/B AND lora.down/up (SimpleTuner hybrid)
    if has_transformer_prefix and has_lora_a_b and (has_lora_down_up or has_lora_dot_down_up):
        return "mixed"
    
    # Pure PEFT format: transformer.* with ONLY lora_A/lora_B
    if has_transformer_prefix and has_lora_a_b and not has_lora_down_up and not has_lora_dot_down_up:
        return "peft"
    
    # SimpleTuner with transformer prefix but old naming: transformer.* with lora_down/lora_up
    if has_transformer_prefix and (has_lora_down_up or has_lora_dot_down_up):
        return "simpletuner_transformer"
    
    # SimpleTuner AuraFlow format: lora_transformer_* with lora_down/lora_up
    if has_lora_transformer_prefix and has_lora_down_up:
        return "simpletuner_auraflow"
    
    # Traditional Kohya format: lora_unet_* with lora_down/lora_up
    if has_lora_unet_prefix and has_lora_down_up:
        return "kohya"
    
    return "unknown"


def convert_mixed_lora_to_diffusers(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
    """
    Convert mixed LoRA format to pure PEFT format.
    
    SimpleTuner sometimes saves a hybrid format where some layers use lora_A/lora_B
    and others use .lora.down./.lora.up. This converts all to lora_A/lora_B.
    """
    new_state_dict = {}
    converted_count = 0
    kept_count = 0
    skipped_count = 0
    renames = []
    
    # Get all keys
    all_keys = sorted(state_dict.keys())
    
    print("\nProcessing keys:")
    print("-" * 80)
    
    for key in all_keys:
        # Already in correct format (lora_A or lora_B)
        if ".lora_A." in key or ".lora_B." in key:
            new_state_dict[key] = state_dict[key]
            kept_count += 1
        
        # Needs conversion: .lora.down. -> .lora_A.
        elif ".lora.down.weight" in key:
            new_key = key.replace(".lora.down.weight", ".lora_A.weight")
            new_state_dict[new_key] = state_dict[key]
            renames.append((key, new_key))
            converted_count += 1
        
        # Needs conversion: .lora.up. -> .lora_B.
        elif ".lora.up.weight" in key:
            new_key = key.replace(".lora.up.weight", ".lora_B.weight")
            new_state_dict[new_key] = state_dict[key]
            renames.append((key, new_key))
            converted_count += 1
        
        # Skip alpha keys (not used in PEFT format)
        elif ".alpha" in key:
            skipped_count += 1
            continue
        
        # Other keys (shouldn't happen, but keep them just in case)
        else:
            new_state_dict[key] = state_dict[key]
            print(f"⚠ Warning: Unexpected key format: {key}")
    
    print(f"\nSummary:")
    print(f"  ✓ Kept {kept_count} keys already in correct format (lora_A/lora_B)")
    print(f"  ✓ Converted {converted_count} keys from .lora.down/.lora.up to lora_A/lora_B")
    print(f"  ✓ Skipped {skipped_count} alpha keys")
    
    if renames:
        print(f"\nRenames applied ({len(renames)} conversions):")
        print("-" * 80)
        for old_key, new_key in renames:
            # Show the difference more clearly
            if ".lora.down.weight" in old_key:
                layer = old_key.replace(".lora.down.weight", "")
                print(f"  {layer}")
                print(f"    .lora.down.weight  →  .lora_A.weight")
            elif ".lora.up.weight" in old_key:
                layer = old_key.replace(".lora.up.weight", "")
                print(f"  {layer}")
                print(f"    .lora.up.weight    →  .lora_B.weight")
    
    return new_state_dict


def convert_simpletuner_transformer_to_diffusers(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
    """
    Convert SimpleTuner transformer format (already has transformer. prefix but uses lora_down/lora_up)
    to diffusers PEFT format (transformer. prefix with lora_A/lora_B).
    
    This is a simpler conversion since the key structure is already correct.
    """
    new_state_dict = {}
    renames = []
    
    # Get all unique LoRA layer base names (without .lora_down/.lora_up/.alpha suffix)
    all_keys = list(state_dict.keys())
    base_keys = set()
    
    for key in all_keys:
        if ".lora_down.weight" in key:
            base_key = key.replace(".lora_down.weight", "")
            base_keys.add(base_key)
    
    print(f"\nFound {len(base_keys)} LoRA layers to convert")
    print("-" * 80)
    
    # Convert each layer
    for base_key in sorted(base_keys):
        down_key = f"{base_key}.lora_down.weight"
        up_key = f"{base_key}.lora_up.weight"
        alpha_key = f"{base_key}.alpha"
        
        if down_key not in state_dict or up_key not in state_dict:
            print(f"⚠ Warning: Missing weights for {base_key}")
            continue
        
        down_weight = state_dict.pop(down_key)
        up_weight = state_dict.pop(up_key)
        
        # Handle alpha scaling
        has_alpha = False
        if alpha_key in state_dict:
            alpha = state_dict.pop(alpha_key)
            lora_rank = down_weight.shape[0]
            scale = alpha / lora_rank
            
            # Calculate scale_down and scale_up to preserve the scale value
            scale_down = scale
            scale_up = 1.0
            while scale_down * 2 < scale_up:
                scale_down *= 2
                scale_up /= 2
            
            down_weight = down_weight * scale_down
            up_weight = up_weight * scale_up
            has_alpha = True
        
        # Store in PEFT format (lora_A = down, lora_B = up)
        new_down_key = f"{base_key}.lora_A.weight"
        new_up_key = f"{base_key}.lora_B.weight"
        
        new_state_dict[new_down_key] = down_weight
        new_state_dict[new_up_key] = up_weight
        
        renames.append((down_key, new_down_key, has_alpha))
        renames.append((up_key, new_up_key, has_alpha))
    
    # Check for any remaining keys
    remaining = [k for k in state_dict.keys() if not k.startswith("text_encoder")]
    if remaining:
        print(f"⚠ Warning: {len(remaining)} keys were not converted: {remaining[:5]}")
    
    print(f"\nRenames applied ({len(renames)} conversions):")
    print("-" * 80)
    
    # Group by layer
    current_layer = None
    for old_key, new_key, has_alpha in renames:
        layer = old_key.replace(".lora_down.weight", "").replace(".lora_up.weight", "")
        
        if layer != current_layer:
            alpha_str = " (alpha scaled)" if has_alpha else ""
            print(f"\n  {layer}{alpha_str}")
            current_layer = layer
        
        if ".lora_down.weight" in old_key:
            print(f"    .lora_down.weight  →  .lora_A.weight")
        elif ".lora_up.weight" in old_key:
            print(f"    .lora_up.weight    →  .lora_B.weight")
    
    return new_state_dict


def convert_simpletuner_auraflow_to_diffusers(state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
    """
    Convert SimpleTuner AuraFlow LoRA format to diffusers PEFT format.
    
    SimpleTuner typically saves LoRAs in a format similar to Kohya's sd-scripts,
    but for transformer-based models like AuraFlow, the keys may differ.
    """
    new_state_dict = {}
    
    def _convert(original_key, diffusers_key, state_dict, new_state_dict):
        """Helper to convert a single LoRA layer."""
        down_key = f"{original_key}.lora_down.weight"
        if down_key not in state_dict:
            return False
            
        down_weight = state_dict.pop(down_key)
        lora_rank = down_weight.shape[0]
        
        up_weight_key = f"{original_key}.lora_up.weight"
        up_weight = state_dict.pop(up_weight_key)
        
        # Handle alpha scaling
        alpha_key = f"{original_key}.alpha"
        if alpha_key in state_dict:
            alpha = state_dict.pop(alpha_key)
            scale = alpha / lora_rank
            
            # Calculate scale_down and scale_up to preserve the scale value
            scale_down = scale
            scale_up = 1.0
            while scale_down * 2 < scale_up:
                scale_down *= 2
                scale_up /= 2
            
            down_weight = down_weight * scale_down
            up_weight = up_weight * scale_up
        
        # Store in PEFT format (lora_A = down, lora_B = up)
        diffusers_down_key = f"{diffusers_key}.lora_A.weight"
        new_state_dict[diffusers_down_key] = down_weight
        new_state_dict[diffusers_down_key.replace(".lora_A.", ".lora_B.")] = up_weight
        
        return True
    
    # Get all unique LoRA layer names
    all_unique_keys = {
        k.replace(".lora_down.weight", "").replace(".lora_up.weight", "").replace(".alpha", "")
        for k in state_dict
        if ".lora_down.weight" in k or ".lora_up.weight" in k or ".alpha" in k
    }
    
    # Process transformer blocks
    for original_key in sorted(all_unique_keys):
        if original_key.startswith("lora_transformer_single_transformer_blocks_"):
            # Single transformer blocks
            parts = original_key.split("lora_transformer_single_transformer_blocks_")[-1].split("_")
            block_idx = int(parts[0])
            diffusers_key = f"single_transformer_blocks.{block_idx}"
            
            # Map the rest of the key
            remaining = "_".join(parts[1:])
            if "attn_to_q" in remaining:
                diffusers_key += ".attn.to_q"
            elif "attn_to_k" in remaining:
                diffusers_key += ".attn.to_k"
            elif "attn_to_v" in remaining:
                diffusers_key += ".attn.to_v"
            elif "proj_out" in remaining:
                diffusers_key += ".proj_out"
            elif "proj_mlp" in remaining:
                diffusers_key += ".proj_mlp"
            elif "norm_linear" in remaining:
                diffusers_key += ".norm.linear"
            else:
                print(f"Warning: Unhandled single block key pattern: {original_key}")
                continue
                
        elif original_key.startswith("lora_transformer_transformer_blocks_"):
            # Double transformer blocks
            parts = original_key.split("lora_transformer_transformer_blocks_")[-1].split("_")
            block_idx = int(parts[0])
            diffusers_key = f"transformer_blocks.{block_idx}"
            
            # Map the rest of the key
            remaining = "_".join(parts[1:])
            if "attn_to_out_0" in remaining:
                diffusers_key += ".attn.to_out.0"
            elif "attn_to_add_out" in remaining:
                diffusers_key += ".attn.to_add_out"
            elif "attn_to_q" in remaining:
                diffusers_key += ".attn.to_q"
            elif "attn_to_k" in remaining:
                diffusers_key += ".attn.to_k"
            elif "attn_to_v" in remaining:
                diffusers_key += ".attn.to_v"
            elif "attn_add_q_proj" in remaining:
                diffusers_key += ".attn.add_q_proj"
            elif "attn_add_k_proj" in remaining:
                diffusers_key += ".attn.add_k_proj"
            elif "attn_add_v_proj" in remaining:
                diffusers_key += ".attn.add_v_proj"
            elif "ff_net_0_proj" in remaining:
                diffusers_key += ".ff.net.0.proj"
            elif "ff_net_2" in remaining:
                diffusers_key += ".ff.net.2"
            elif "ff_context_net_0_proj" in remaining:
                diffusers_key += ".ff_context.net.0.proj"
            elif "ff_context_net_2" in remaining:
                diffusers_key += ".ff_context.net.2"
            elif "norm1_linear" in remaining:
                diffusers_key += ".norm1.linear"
            elif "norm1_context_linear" in remaining:
                diffusers_key += ".norm1_context.linear"
            else:
                print(f"Warning: Unhandled double block key pattern: {original_key}")
                continue
        
        elif original_key.startswith("lora_te1_") or original_key.startswith("lora_te_"):
            # Text encoder keys - handle separately
            print(f"Found text encoder key: {original_key}")
            continue
        
        else:
            print(f"Warning: Unknown key pattern: {original_key}")
            continue
        
        # Perform the conversion
        _convert(original_key, diffusers_key, state_dict, new_state_dict)
    
    # Add "transformer." prefix to all keys
    transformer_state_dict = {
        f"transformer.{k}": v for k, v in new_state_dict.items() if not k.startswith("text_model.")
    }
    
    # Check for remaining unconverted keys
    if len(state_dict) > 0:
        remaining_keys = [k for k in state_dict.keys() if not k.startswith("lora_te")]
        if remaining_keys:
            print(f"Warning: Some keys were not converted: {remaining_keys[:10]}")
    
    return transformer_state_dict


def convert_lora(input_path: str, output_path: str) -> None:
    """
    Main conversion function.
    
    Args:
        input_path: Path to input LoRA safetensors file
        output_path: Path to output diffusers-compatible safetensors file
    """
    print(f"Loading LoRA from: {input_path}")
    state_dict = safetensors.torch.load_file(input_path)
    
    print(f"Detecting LoRA format...")
    format_type = detect_lora_format(state_dict)
    print(f"Detected format: {format_type}")
    
    if format_type == "peft":
        print("LoRA is already in diffusers-compatible PEFT format!")
        print("No conversion needed. Copying file...")
        import shutil
        shutil.copy(input_path, output_path)
        return
    
    elif format_type == "mixed":
        print("Converting MIXED format LoRA to pure PEFT format...")
        print("(Some layers use lora_A/B, others use .lora.down/.lora.up)")
        converted_state_dict = convert_mixed_lora_to_diffusers(state_dict.copy())
    
    elif format_type == "simpletuner_transformer":
        print("Converting SimpleTuner transformer format to diffusers...")
        print("(has transformer. prefix but uses lora_down/lora_up naming)")
        converted_state_dict = convert_simpletuner_transformer_to_diffusers(state_dict.copy())
    
    elif format_type == "simpletuner_auraflow":
        print("Converting SimpleTuner AuraFlow format to diffusers...")
        converted_state_dict = convert_simpletuner_auraflow_to_diffusers(state_dict.copy())
    
    elif format_type == "kohya":
        print("Note: Detected Kohya format. This converter is optimized for AuraFlow.")
        print("For other models, diffusers has built-in conversion.")
        converted_state_dict = convert_simpletuner_auraflow_to_diffusers(state_dict.copy())
    
    else:
        print("Error: Unknown LoRA format!")
        print("Sample keys from the state dict:")
        for i, key in enumerate(list(state_dict.keys())[:20]):
            print(f"  {key}")
        sys.exit(1)
    
    print(f"Saving converted LoRA to: {output_path}")
    safetensors.torch.save_file(converted_state_dict, output_path)
    
    print("\nConversion complete!")
    print(f"Original keys: {len(state_dict)}")
    print(f"Converted keys: {len(converted_state_dict)}")

def main():
    parser = argparse.ArgumentParser(
        description="Convert SimpleTuner LoRA to diffusers-compatible format",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Convert a SimpleTuner LoRA for AuraFlow
  python convert_simpletuner_lora.py my_lora.safetensors diffusers_lora.safetensors
  
  # Check format without converting
  python convert_simpletuner_lora.py my_lora.safetensors /tmp/test.safetensors
        """
    )
    
    parser.add_argument(
        "input",
        type=str,
        help="Input LoRA file (SimpleTuner format)"
    )
    
    parser.add_argument(
        "output",
        type=str,
        help="Output LoRA file (diffusers-compatible format)"
    )
    
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Only detect format, don't convert"
    )
    
    args = parser.parse_args()
    
    # Validate input file exists
    if not Path(args.input).exists():
        print(f"Error: Input file not found: {args.input}")
        sys.exit(1)
    
    if args.dry_run:
        print(f"Loading LoRA from: {args.input}")
        state_dict = safetensors.torch.load_file(args.input)
        format_type = detect_lora_format(state_dict)
        print(f"Detected format: {format_type}")
        print(f"\nSample keys ({min(10, len(state_dict))} of {len(state_dict)}):")
        for key in list(state_dict.keys())[:10]:
            print(f"  {key}")
        return
    
    convert_lora(args.input, args.output)


if __name__ == "__main__":
    main()