Spaces:

AVLL
/

Automated_Plant_Analysis_Pipeline_Demo

Running

App Files Files Community

Fahimeh Orvati Nia commited on Oct 3

Commit

91a7a12

1 Parent(s): dd1d7f5

minimal pipeline

Browse files

Files changed (11) hide show

requirements.txt +6 -42
sorghum_pipeline/__init__.py +3 -23
sorghum_pipeline/config.py +9 -43
sorghum_pipeline/data/__init__.py +2 -11
sorghum_pipeline/data/loader.py +13 -423
sorghum_pipeline/data/mask_handler.py +4 -11
sorghum_pipeline/data/preprocessor.py +12 -33
sorghum_pipeline/features/__init__.py +2 -16
sorghum_pipeline/output/__init__.py +2 -10
sorghum_pipeline/pipeline.py +77 -192
sorghum_pipeline/segmentation/__init__.py +2 -9

requirements.txt CHANGED Viewed

@@ -1,48 +1,12 @@
-# --- Core demo UI ---
 gradio
 pillow
-# --- Scientific / image processing ---
 numpy
-scipy
-matplotlib
-scikit-image
-opencv-python-headless
-tifffile
-# --- Machine learning / deep learning ---
 torch
 torchvision
-timm              # for pretrained backbones
-segmentation-models-pytorch
-ultralytics       # YOLO models (if you extend later)
-# --- Plant phenotyping ---
-plantcv==4.6
-# --- Data handling & utils ---
-pandas
-tqdm
-pyyaml
-joblib
-# --- Geometry / remote sensing ---
-shapely
-rasterio
-fiona
-# --- For morphology / texture analysis ---
 scikit-learn
-seaborn
-networkx
-skan              # skeleton analysis
-# --- For model configs & logging ---
-omegaconf
-hydra-core
-loguru
-# --- Optional: segmentation research tools ---
-# (comment these out if not needed to reduce build time)
-segment-anything
-git+https://github.com/facebookresearch/segment-anything-2.git@2b90b9f5ceec907a1c18123530e92e794ad901a4

 gradio
 pillow
 numpy
+opencv-python
 torch
 torchvision
+transformers
+scikit-image
 scikit-learn
+scipy
+matplotlib
+plantcv

sorghum_pipeline/__init__.py CHANGED Viewed

@@ -1,31 +1,11 @@
 """
-Sorghum Plant Phenotyping Pipeline
-A comprehensive pipeline for analyzing sorghum plant images including:
-- Data loading and preprocessing
-- Image segmentation and masking
-- Feature extraction (texture, morphology, vegetation indices)
-- Results visualization and export
-Author: Fahime Horvatinia
-Version: 2.0.0
 """
 __version__ = "2.0.0"
-__author__ = "Fahime Horvatinia"
 from .pipeline import SorghumPipeline
 from .config import Config
-from .data import DataLoader
-from .features import TextureExtractor, VegetationIndexExtractor, MorphologyExtractor
-from .output import OutputManager
-__all__ = [
-    "SorghumPipeline",
-    "Config",
-    "DataLoader",
-    "TextureExtractor",
-    "VegetationIndexExtractor",
-    "MorphologyExtractor",
-    "OutputManager"
-]

 """
+Minimal Sorghum Plant Phenotyping Pipeline for Hugging Face Demo.
 """
 __version__ = "2.0.0"
+__author__ = "Fahimeh Orvati Nia"
 from .pipeline import SorghumPipeline
 from .config import Config
+__all__ = ["SorghumPipeline", "Config"]

sorghum_pipeline/config.py CHANGED Viewed

@@ -1,76 +1,42 @@
-"""
-Minimal configuration for the Sorghum Pipeline.
-"""
 import os
-from pathlib import Path
 from dataclasses import dataclass
 @dataclass
 class Paths:
-    """Configuration for file paths."""
     input_folder: str
     output_folder: str
     boundingbox_dir: str = ""
     def __post_init__(self):
-        """Ensure paths are absolute."""
         self.input_folder = os.path.abspath(self.input_folder)
         self.output_folder = os.path.abspath(self.output_folder)
-@dataclass
-class ProcessingParams:
-    """Minimal processing parameters."""
-    target_size: tuple = None
-    min_component_area: int = 1000
-    morphology_kernel_size: int = 7
-    segmentation_threshold: float = 0.5
 @dataclass
 class OutputSettings:
     """Output settings."""
     save_images: bool = True
-    save_plots: bool = False
-    save_metadata: bool = False
     plot_dpi: int = 100
-    segmentation_dir: str = "results"
-    texture_dir: str = "texture_output"
-    morphology_dir: str = "results"
-    vegetation_dir: str = "Vegetation_indices_images"
-@dataclass
-class ModelSettings:
-    """Model settings."""
-    device: str = "auto"
-    model_name: str = "briaai/RMBG-2.0"
-    trust_remote_code: bool = True
-    cache_dir: str = ""
-    local_files_only: bool = False
 class Config:
-    """Minimal configuration class."""
     def __init__(self):
-        """Initialize with defaults."""
-        self.paths = Paths(input_folder="", output_folder="", boundingbox_dir="")
-        self.processing = ProcessingParams()
         self.output = OutputSettings()
-        self.model = ModelSettings()
     def get_device(self) -> str:
-        """Get processing device."""
-        if self.model.device == "auto":
-            import torch
-            return "cuda" if torch.cuda.is_available() else "cpu"
-        return self.model.device
     def validate(self) -> bool:
-        """Validate configuration."""
         if self.paths.input_folder and not os.path.exists(self.paths.input_folder):
-            raise FileNotFoundError(f"Input folder does not exist: {self.paths.input_folder}")
         return True

+"""Minimal configuration."""
 import os
 from dataclasses import dataclass
 @dataclass
 class Paths:
+    """File paths."""
     input_folder: str
     output_folder: str
     boundingbox_dir: str = ""
     def __post_init__(self):
         self.input_folder = os.path.abspath(self.input_folder)
         self.output_folder = os.path.abspath(self.output_folder)
 @dataclass
 class OutputSettings:
     """Output settings."""
     save_images: bool = True
     plot_dpi: int = 100
 class Config:
+    """Minimal config."""
     def __init__(self):
+        self.paths = Paths(input_folder="", output_folder="")
         self.output = OutputSettings()
     def get_device(self) -> str:
+        """Get device."""
+        import torch
+        return "cuda" if torch.cuda.is_available() else "cpu"
     def validate(self) -> bool:
+        """Validate."""
         if self.paths.input_folder and not os.path.exists(self.paths.input_folder):
+            raise FileNotFoundError(f"Input folder not found: {self.paths.input_folder}")
         return True

sorghum_pipeline/data/__init__.py CHANGED Viewed

@@ -1,15 +1,6 @@
-"""
-Data loading and preprocessing modules.
-This package contains all data-related functionality including:
-- Raw image loading
-- Data preprocessing
-- Mask handling
-- Data validation
-"""
-from .loader import DataLoader
 from .preprocessor import ImagePreprocessor
 from .mask_handler import MaskHandler
-__all__ = ["DataLoader", "ImagePreprocessor", "MaskHandler"]

+"""Data preprocessing modules."""
 from .preprocessor import ImagePreprocessor
 from .mask_handler import MaskHandler
+__all__ = ["ImagePreprocessor", "MaskHandler"]

sorghum_pipeline/data/loader.py CHANGED Viewed

@@ -1,444 +1,34 @@
 """
-Data loading functionality for the Sorghum Pipeline.
-This module handles loading raw images, managing plant data,
-and organizing data according to the pipeline requirements.
 """
-import os
-import glob
-import json
 from pathlib import Path
-from typing import Dict, List, Tuple, Optional, Any
 from PIL import Image
-import numpy as np
 import logging
 logger = logging.getLogger(__name__)
 class DataLoader:
-    """Handles loading and organizing plant image data."""
-    # Plants to ignore completely (empty by default)
-    IGNORE_PLANTS = set()
-    # Plants where you want exactly one frame from their own folder
-    EXACT_FRAME = {
-        4: 7, 5: 5, 7: 5, 12: 5, 13: 5, 18: 7, 19: 2, 20: 3,
-        24: 6, 25: 5, 26: 5, 30: 8, 37: 7
-    }
-    # Plants where you want to borrow a frame from a different plant folder
-    BORROW_FRAME = {
-        14: (13, 5), 15: (14, 5), 16: (15, 5), 33: (34, 7),
-        34: (35, 7), 35: (35, 8), 36: (36, 6)
-    }
-    # Overrides provided by user: preferred frame per target plant name
-    FRAME_OVERRIDE_BY_NAME = {
-        'plant1': 9, 'plant2': 10, 'plant3': 9, 'plant5': 7, 'plant6': 9, 'plant8': 5,
-        'plant7': 9, 'plant10': 9, 'plant11': 9, 'plant12': 9,
-        'plant13': 10, 'plant14': 8, 'plant15': 11, 'plant19': 4, 'plant20': 7,
-        'plant21': 9, 'plant22': 10, 'plant25': 4, 'plant26': 2, 'plant27': 10, 'plant28': 9, 'plant29': 2,
-        'plant30': 9, 'plant31': 10, 'plant32': 9, 'plant33': 8,
-        'plant35': 9, 'plant36': 4, 'plant38': 9, 'plant39': 9, 'plant41': 9,
-        'plant42': 6, 'plant43': 10, 'plant44': 9, 'plant45': 7,
-        'plant47': 10, 'plant48': 11,
-    }
-    # Substitutes provided by user: map target plant name -> source plant name
-    PLANT_SUBSTITUTES_BY_NAME = {
-        'plant16': 'plant15', 'plant15': 'plant14', 'plant14': 'plant13',
-        'plant13': 'plant12', 'plant33': 'plant34', 'plant34': 'plant35',
-        'plant24': 'plant25', 'plant25': 'plant25', 'plant35': 'plant36',
-        'plant36': 'plant37', 'plant37': 'plant37', 'plant44': 'plant43',
-        'plant45': 'plant44',
-    }
-    def __init__(self, input_folder: str, debug: bool = False, include_ignored: bool = False, strict_loader: bool = False, excluded_dates: Optional[List[str]] = None):
-        """
-        Initialize the data loader.
-        Args:
-            input_folder: Path to the input dataset folder
-            debug: Enable debug logging
-        """
         self.input_folder = Path(input_folder)
         self.debug = debug
-        self.include_ignored = include_ignored
-        self.strict_loader = strict_loader
         if not self.input_folder.exists():
             raise FileNotFoundError(f"Input folder does not exist: {input_folder}")
-        # Normalize excluded dates as a set of folder names (with dashes)
-        self.excluded_dates = set(excluded_dates or [])
     def load_selected_frames(self) -> Dict[str, Dict[str, Any]]:
-        """
-        Load selected frames according to predefined rules.
-        If strict_loader is True, load only frame numbers from the plant's own folder (no borrowing/special picks).
-        Returns:
-            Dictionary with plant data organized by key format: "YYYY_MM_DD_plantX_frameY"
-        """
-        logger.info("Loading selected frames from dataset...")
-        plants = {}
-        # Detect if input folder is a direct date folder (contains plant folders)
-        first_items = list(self.input_folder.iterdir())
-        has_plant_folders = any(item.is_dir() and item.name.startswith('plant') for item in first_items)
-        def choose_frame_and_source(pid: int) -> Tuple[int, str]:
-            if self.strict_loader:
-                # In strict mode, honor explicit frame overrides AND substitution of source plant
-                plant_name_local = f"plant{pid}"
-                frame_num = self.FRAME_OVERRIDE_BY_NAME.get(
-                    plant_name_local,
-                    self.EXACT_FRAME.get(pid, 8)
-                )
-                source_plant = self.PLANT_SUBSTITUTES_BY_NAME.get(plant_name_local, plant_name_local)
-                return frame_num, source_plant
-            # Original behavior
-            frame_num = self._get_frame_number(pid)
-            source_plant = self._get_source_plant(pid)
-            return frame_num, source_plant
-        if has_plant_folders:
-            # Direct date folder structure
-            date_name = self.input_folder.name
-            date_path = self.input_folder
-            for plant_name in sorted(os.listdir(date_path)):
-                plant_path = date_path / plant_name
-                if not plant_path.is_dir():
-                    continue
-                try:
-                    plant_id = int(plant_name.replace("plant", ""))
-                except ValueError:
-                    continue
-                if (plant_id in self.IGNORE_PLANTS) and (not self.include_ignored):
-                    if self.debug:
-                        logger.debug(f"Ignoring plant {plant_id}")
-                    continue
-                frame_num, source_plant = choose_frame_and_source(plant_id)
-                frame_data = self._load_single_frame(date_path, source_plant, frame_num, plant_name)
-                if frame_data:
-                    key = f"{date_name.replace('-', '_')}_{plant_name}_frame{frame_num}"
-                    plants[key] = frame_data
-                    logger.debug(f"Loaded {key}")
-        else:
-            # Parent folder structure with date subfolders
-            for date_name in sorted(os.listdir(self.input_folder)):
-                date_path = self.input_folder / date_name
-                if not date_path.is_dir():
-                    continue
-                if date_name in self.excluded_dates:
-                    logger.info(f"Skipping excluded date: {date_name}")
-                    continue
-                for plant_name in sorted(os.listdir(date_path)):
-                    plant_path = date_path / plant_name
-                    if not plant_path.is_dir():
-                        continue
-                    try:
-                        plant_id = int(plant_name.replace("plant", ""))
-                    except ValueError:
-                        continue
-                    if (plant_id in self.IGNORE_PLANTS) and (not self.include_ignored):
-                        if self.debug:
-                            logger.debug(f"Ignoring plant {plant_id}")
-                        continue
-                    frame_num, source_plant = choose_frame_and_source(plant_id)
-                    frame_data = self._load_single_frame(date_path, source_plant, frame_num, plant_name)
-                    if frame_data:
-                        key = f"{date_name.replace('-', '_')}_{plant_name}_frame{frame_num}"
-                        plants[key] = frame_data
-                        logger.debug(f"Loaded {key}")
-        logger.info(f"Successfully loaded {len(plants)} plant frames")
-        return plants
     def load_all_frames(self) -> Dict[str, Dict[str, Any]]:
-        """
-        Load all available frames for each plant.
-        Returns:
-            Dictionary with all plant frames
-        """
-        logger.info("Loading all frames from dataset...")
-        plants = {}
-        # Check if we're directly in a date folder (contains plant folders)
-        # or in a parent folder (contains date folders)
-        first_items = list(self.input_folder.iterdir())
-        has_plant_folders = any(item.is_dir() and item.name.startswith('plant') for item in first_items)
-        if has_plant_folders:
-            # We're directly in a date folder
-            logger.info("Detected direct date folder structure")
-            date_name = self.input_folder.name
-            self._load_plants_from_date_folder(self.input_folder, date_name, plants)
-        else:
-            # We're in a parent folder with date subfolders
-            logger.info("Detected parent folder structure")
-            for date_name in sorted(os.listdir(self.input_folder)):
-                date_path = self.input_folder / date_name
-                if not date_path.is_dir():
-                    continue
-                if date_name in self.excluded_dates:
-                    logger.info(f"Skipping excluded date: {date_name}")
-                    continue
-                logger.info(f"Processing date: {date_name}")
-                self._load_plants_from_date_folder(date_path, date_name, plants)
-        logger.info(f"Successfully loaded {len(plants)} plant frames")
-        return plants
-    def _load_plants_from_date_folder(self, date_path: Path, date_name: str, plants: Dict[str, Dict[str, Any]]) -> None:
-        """Load plants from a date folder."""
-        for plant_name in sorted(os.listdir(date_path)):
-            plant_path = date_path / plant_name
-            if not plant_path.is_dir():
-                continue
-            # Extract plant ID
-            try:
-                plant_id = int(plant_name.replace("plant", ""))
-            except ValueError:
-                logger.warning(f"Could not extract plant ID from {plant_name}")
-                continue
-            # Skip ignored plants
-            if (plant_id in self.IGNORE_PLANTS) and (not self.include_ignored):
-                logger.info(f"Skipping ignored plant {plant_id}")
-                continue
-            logger.info(f"Processing plant {plant_id}")
-            # Load all frames for this plant
-            pattern = str(plant_path / f"{plant_name}_frame*.tif")
-            frame_files = sorted(glob.glob(pattern))
-            logger.info(f"Found {len(frame_files)} frame files for {plant_name}")
-            for frame_path in frame_files:
-                frame_data = self._load_frame_from_path(frame_path, plant_name)
-                if frame_data:
-                    frame_id = Path(frame_path).stem.split("_frame")[-1]
-                    key = f"{date_name.replace('-', '_')}_{plant_name}_frame{frame_id}"
-                    plants[key] = frame_data
-                    logger.debug(f"Loaded frame: {key}")
-                else:
-                    logger.warning(f"Failed to load frame: {frame_path}")
-    def load_single_plant(self, date: str, plant: str, frame: int) -> Optional[Dict[str, Any]]:
-        """
-        Load a specific plant frame.
-        Args:
-            date: Date string (e.g., "2025-02-05")
-            plant: Plant name (e.g., "plant1")
-            frame: Frame number
-        Returns:
-            Plant data dictionary or None if not found
-        """
-        date_path = self.input_folder / date
-        if not date_path.exists():
-            logger.error(f"Date folder not found: {date}")
-            return None
-        plant_path = date_path / plant
-        if not plant_path.exists():
-            logger.error(f"Plant folder not found: {plant}")
-            return None
-        filename = f"{plant}_frame{frame}.tif"
-        frame_path = plant_path / filename
-        return self._load_frame_from_path(str(frame_path), plant)
-    def _get_frame_number(self, plant_id: int) -> int:
-        """Get the frame number for a plant ID."""
-        plant_name = f"plant{plant_id}"
-        # Highest priority: explicit overrides by plant name
-        if plant_name in self.FRAME_OVERRIDE_BY_NAME:
-            return int(self.FRAME_OVERRIDE_BY_NAME[plant_name])
-        # Next: original exact/borrrow rules
-        if plant_id in self.EXACT_FRAME:
-            return self.EXACT_FRAME[plant_id]
-        elif plant_id in self.BORROW_FRAME:
-            return self.BORROW_FRAME[plant_id][1]
-        else:
-            return 8  # Default frame
-    def _get_source_plant(self, plant_id: int) -> str:
-        """Get the source plant name for a plant ID."""
-        plant_name = f"plant{plant_id}"
-        # Highest priority: explicit substitutes by plant name
-        if plant_name in self.PLANT_SUBSTITUTES_BY_NAME:
-            return self.PLANT_SUBSTITUTES_BY_NAME[plant_name]
-        # Next: original borrow rules
-        if plant_id in self.BORROW_FRAME:
-            source_id = self.BORROW_FRAME[plant_id][0]
-            return f"plant{source_id}"
-        else:
-            return f"plant{plant_id}"
-    def _load_single_frame(self, date_path: Path, source_plant: str,
-                          frame_num: int, target_plant: str) -> Optional[Dict[str, Any]]:
-        """Load a single frame from the specified path."""
-        filename = f"{source_plant}_frame{frame_num}.tif"
-        frame_path = date_path / source_plant / filename
-        if not frame_path.exists():
-            if self.debug:
-                logger.warning(f"Frame not found: {frame_path}")
-            return None
-        return self._load_frame_from_path(str(frame_path), target_plant)
-    def _load_frame_from_path(self, frame_path: str, plant_name: str) -> Optional[Dict[str, Any]]:
-        """Load frame data from a file path."""
-        try:
-            logger.debug(f"Attempting to load: {frame_path}")
-            image = Image.open(frame_path)
-            filename = Path(frame_path).name
-            logger.debug(f"Successfully loaded image: {filename}, size: {image.size}")
-            return {
-                "raw_image": (image, filename),
-                "plant_name": plant_name,
-                "file_path": frame_path
-            }
-        except Exception as e:
-            logger.error(f"Failed to load {frame_path}: {e}")
-            return None
-    def load_bounding_boxes(self, bbox_dir: str) -> Dict[str, Tuple[int, int, int, int]]:
-        """
-        Load bounding box data from JSON files.
-        Args:
-            bbox_dir: Directory containing bounding box JSON files
-        Returns:
-            Dictionary mapping plant names to bounding box coordinates
-        """
-        bbox_path = Path(bbox_dir)
-        if not bbox_path.exists():
-            raise FileNotFoundError(f"Bounding box directory not found: {bbox_dir}")
-        bbox_lookup = {}
-        for json_file in bbox_path.glob("*.json"):
-            stem = json_file.stem
-            # Normalize stems like plant_33_new -> plant33
-            if stem.startswith('plant_'):
-                parts = stem.split('_')
-                try:
-                    idx = next(i for i,p in enumerate(parts) if p.isdigit())
-                    plant_id = f"plant{parts[idx]}"
-                except Exception:
-                    plant_id = stem.replace('_', '')
-            else:
-                plant_id = stem
-            try:
-                with open(json_file, 'r') as f:
-                    data = json.load(f)
-                shapes = data.get('shapes', [])
-                # Prefer rectangle labeled 'sorghum' (case-insensitive), else first rectangle
-                def _is_sorghum_label(s: dict) -> bool:
-                    for key in ('label', 'name', 'text'):
-                        val = s.get(key)
-                        if isinstance(val, str) and val.lower() == 'sorghum':
-                            return True
-                    return False
-                rect = next((s for s in shapes if s.get('shape_type') == 'rectangle' and _is_sorghum_label(s)), None)
-                if rect is None:
-                    rect = next((s for s in shapes if s.get('shape_type') == 'rectangle'), None)
-                if rect:
-                    (x1, y1), (x2, y2) = rect['points']
-                    bbox_lookup[plant_id] = (
-                        int(max(0, x1)),
-                        int(max(0, y1)),
-                        int(min(1e9, x2)),
-                        int(min(1e9, y2))
-                    )
-                else:
-                    bbox_lookup[plant_id] = None
-            except Exception as e:
-                logger.error(f"Failed to load bounding box {json_file}: {e}")
-        logger.info(f"Loaded {len(bbox_lookup)} bounding boxes")
-        return bbox_lookup
-    def load_hand_labels(self, labels_dir: str) -> Dict[str, np.ndarray]:
-        """
-        Load hand-labeled masks from JSON files.
-        Args:
-            labels_dir: Directory containing label JSON files
-        Returns:
-            Dictionary mapping plant names to mask arrays
-        """
-        labels_path = Path(labels_dir)
-        if not labels_path.exists():
-            logger.warning(f"Labels directory not found: {labels_dir}")
-            return {}
-        masks = {}
-        for json_file in labels_path.glob("*.json"):
-            plant_id = json_file.stem
-            try:
-                with open(json_file, 'r') as f:
-                    data = json.load(f)
-                # Create mask from shapes (assuming we have image dimensions)
-                # This would need to be adapted based on your label format
-                mask = self._create_mask_from_shapes(data)
-                if mask is not None:
-                    masks[plant_id] = mask
-            except Exception as e:
-                logger.error(f"Failed to load label {json_file}: {e}")
-        logger.info(f"Loaded {len(masks)} hand labels")
-        return masks
-    def _create_mask_from_shapes(self, data: Dict) -> Optional[np.ndarray]:
-        """Create a mask array from shape data."""
-        # This is a placeholder - implement based on your label format
-        # For now, return None
-        return None
-    def validate_data(self, plants: Dict[str, Dict[str, Any]]) -> bool:
-        """
-        Validate loaded plant data.
-        Args:
-            plants: Dictionary of plant data
-        Returns:
-            True if data is valid, False otherwise
-        """
-        if not plants:
-            logger.error("No plant data loaded")
-            return False
-        for key, data in plants.items():
-            if "raw_image" not in data:
-                logger.error(f"Missing raw_image in {key}")
-                return False
-            image, filename = data["raw_image"]
-            if not isinstance(image, Image.Image):
-                logger.error(f"Invalid image type in {key}")
-                return False
-        logger.info("Data validation passed")
-        return True

 """
+Minimal data loading (not used in single-image demo mode).
 """
 from pathlib import Path
+from typing import Dict, List, Optional, Any
 from PIL import Image
 import logging
 logger = logging.getLogger(__name__)
 class DataLoader:
+    """Minimal data loader (placeholder - not used in demo)."""
+    def __init__(self, input_folder: str, debug: bool = False,
+                 include_ignored: bool = False, strict_loader: bool = False,
+                 excluded_dates: Optional[List[str]] = None):
+        """Initialize data loader."""
         self.input_folder = Path(input_folder)
         self.debug = debug
         if not self.input_folder.exists():
             raise FileNotFoundError(f"Input folder does not exist: {input_folder}")
     def load_selected_frames(self) -> Dict[str, Dict[str, Any]]:
+        """Load selected frames (not used in minimal demo)."""
+        logger.warning("DataLoader not used in minimal demo mode")
+        return {}
     def load_all_frames(self) -> Dict[str, Dict[str, Any]]:
+        """Load all frames (not used in minimal demo)."""
+        logger.warning("DataLoader not used in minimal demo mode")
+        return {}

sorghum_pipeline/data/mask_handler.py CHANGED Viewed

@@ -1,19 +1,13 @@
-"""
-Minimal mask handling for the Sorghum Pipeline.
-"""
 import numpy as np
 import cv2
-import logging
-logger = logging.getLogger(__name__)
 class MaskHandler:
-    """Minimal mask handling."""
     def __init__(self, min_area: int = 1000, kernel_size: int = 7):
-        """Initialize mask handler."""
         self.min_area = min_area
         self.kernel_size = kernel_size
@@ -22,7 +16,6 @@ class MaskHandler:
         if mask is None:
             return image
         if mask.shape[:2] != image.shape[:2]:
-            mask = cv2.resize(mask.astype(np.uint8), (image.shape[1], image.shape[0]),
-                            interpolation=cv2.INTER_NEAREST)
-        binary = (mask.astype(np.int32) > 0).astype(np.uint8) * 255
         return cv2.bitwise_and(image, image, mask=binary)

+"""Minimal mask handling."""
 import numpy as np
 import cv2
 class MaskHandler:
+    """Minimal mask operations."""
     def __init__(self, min_area: int = 1000, kernel_size: int = 7):
         self.min_area = min_area
         self.kernel_size = kernel_size
         if mask is None:
             return image
         if mask.shape[:2] != image.shape[:2]:
+            mask = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)
+        binary = (mask > 0).astype(np.uint8) * 255
         return cv2.bitwise_and(image, image, mask=binary)

sorghum_pipeline/data/preprocessor.py CHANGED Viewed

@@ -1,26 +1,19 @@
-"""
-Minimal image preprocessing for the Sorghum Pipeline.
-"""
 import numpy as np
-import cv2
 from PIL import Image
 from typing import Dict, Tuple, Any
 from itertools import product
-import logging
-logger = logging.getLogger(__name__)
 class ImagePreprocessor:
-    """Minimal image preprocessing."""
     def __init__(self, target_size=None):
-        """Initialize preprocessor."""
         self.target_size = target_size
     def convert_to_uint8(self, arr: np.ndarray) -> np.ndarray:
-        """Convert array to uint8."""
         arr = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
         if arr.ptp() > 0:
             normalized = (arr - arr.min()) / (arr.ptp() + 1e-6) * 255
@@ -29,38 +22,24 @@ class ImagePreprocessor:
         return np.clip(normalized, 0, 255).astype(np.uint8)
     def process_raw_image(self, pil_img: Image.Image) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
-        """Process 4-band image into composite and spectral bands."""
         d = pil_img.size[0] // 2
-        boxes = [
-            (j, i, j + d, i + d)
-            for i, j in product(range(0, pil_img.height, d), range(0, pil_img.width, d))
-        ]
         stack = np.stack([np.array(pil_img.crop(box), dtype=float) for box in boxes], axis=-1)
         green, red, red_edge, nir = np.split(stack, 4, axis=-1)
-        # Pseudo-RGB composite: (green, red_edge, red)
         composite = np.concatenate([green, red_edge, red], axis=-1)
         composite_uint8 = self.convert_to_uint8(composite)
-        spectral_bands = {
-            "green": green,
-            "red": red,
-            "red_edge": red_edge,
-            "nir": nir
-        }
         return composite_uint8, spectral_bands
     def create_composites(self, plants: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
-        """Create composites for all plants."""
         for key, pdata in plants.items():
-            try:
-                if "raw_image" in pdata:
-                    image, _ = pdata["raw_image"]
-                    composite, spectral_stack = self.process_raw_image(image)
-                    pdata["composite"] = composite
-                    pdata["spectral_stack"] = spectral_stack
-            except Exception as e:
-                logger.error(f"Failed to create composite for {key}: {e}")
         return plants

+"""Minimal image preprocessing."""
 import numpy as np
 from PIL import Image
 from typing import Dict, Tuple, Any
 from itertools import product
 class ImagePreprocessor:
+    """Minimal preprocessor."""
     def __init__(self, target_size=None):
         self.target_size = target_size
     def convert_to_uint8(self, arr: np.ndarray) -> np.ndarray:
+        """Convert to uint8."""
         arr = np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
         if arr.ptp() > 0:
             normalized = (arr - arr.min()) / (arr.ptp() + 1e-6) * 255
         return np.clip(normalized, 0, 255).astype(np.uint8)
     def process_raw_image(self, pil_img: Image.Image) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
+        """Process 4-band to composite + spectral."""
         d = pil_img.size[0] // 2
+        boxes = [(j, i, j + d, i + d) for i, j in product(range(0, pil_img.height, d), range(0, pil_img.width, d))]
         stack = np.stack([np.array(pil_img.crop(box), dtype=float) for box in boxes], axis=-1)
         green, red, red_edge, nir = np.split(stack, 4, axis=-1)
         composite = np.concatenate([green, red_edge, red], axis=-1)
         composite_uint8 = self.convert_to_uint8(composite)
+        spectral_bands = {"green": green, "red": red, "red_edge": red_edge, "nir": nir}
         return composite_uint8, spectral_bands
     def create_composites(self, plants: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+        """Create composites."""
         for key, pdata in plants.items():
+            if "raw_image" in pdata:
+                image, _ = pdata["raw_image"]
+                composite, spectral_stack = self.process_raw_image(image)
+                pdata["composite"] = composite
+                pdata["spectral_stack"] = spectral_stack
         return plants

sorghum_pipeline/features/__init__.py CHANGED Viewed

@@ -1,21 +1,7 @@
-"""
-Feature extraction modules for the Sorghum Pipeline.
-This package contains all feature extraction functionality including:
-- Texture features (LBP, HOG, Lacunarity, EHD)
-- Vegetation indices
-- Morphological features
-- Spectral features
-"""
 from .texture import TextureExtractor
 from .vegetation import VegetationIndexExtractor
 from .morphology import MorphologyExtractor
-from .spectral import SpectralExtractor
-__all__ = [
-    "TextureExtractor",
-    "VegetationIndexExtractor",
-    "MorphologyExtractor",
-    "SpectralExtractor"
-]

+"""Feature extraction modules."""
 from .texture import TextureExtractor
 from .vegetation import VegetationIndexExtractor
 from .morphology import MorphologyExtractor
+__all__ = ["TextureExtractor", "VegetationIndexExtractor", "MorphologyExtractor"]

sorghum_pipeline/output/__init__.py CHANGED Viewed

@@ -1,13 +1,5 @@
-"""
-Output management modules for the Sorghum Pipeline.
-This package contains output functionality including:
-- Result saving
-- Visualization generation
-- Report creation
-- Data export
-"""
 from .manager import OutputManager
-__all__ = ["OutputManager"]

+"""Output management modules."""
 from .manager import OutputManager
+__all__ = ["OutputManager"]

sorghum_pipeline/pipeline.py CHANGED Viewed

@@ -1,16 +1,12 @@
 """
-Main pipeline class for the Sorghum Plant Phenotyping Pipeline.
-Minimal single-image version for Hugging Face demo.
 """
-import os
 import logging
 from pathlib import Path
-from typing import Dict, Any, Optional
 import numpy as np
 import cv2
-from sklearn.decomposition import PCA
 from .config import Config
 from .data import ImagePreprocessor, MaskHandler
@@ -22,223 +18,112 @@ logger = logging.getLogger(__name__)
 class SorghumPipeline:
-    """Minimal pipeline for single-image plant phenotyping."""
     def __init__(self, config: Config):
-        """Initialize the minimal pipeline."""
-        self._setup_logging()
         self.config = config
         self.config.validate()
-        self._initialize_components()
-        logger.info("Sorghum Pipeline initialized")
-    def _setup_logging(self):
-        """Setup logging configuration."""
-        logging.basicConfig(
-            level=logging.INFO,
-            format='%(asctime)s - %(levelname)s - %(message)s',
-            handlers=[logging.StreamHandler()]
-        )
-    def _initialize_components(self):
-        """Initialize pipeline components."""
-        self.preprocessor = ImagePreprocessor(target_size=None)
-        self.mask_handler = MaskHandler(min_area=1000, kernel_size=7)
         self.texture_extractor = TextureExtractor()
         self.vegetation_extractor = VegetationIndexExtractor()
         self.morphology_extractor = MorphologyExtractor()
         self.segmentation_manager = SegmentationManager(
-            model_name="briaai/RMBG-2.0",
             device=self.config.get_device(),
-            threshold=0.5,
             trust_remote_code=True
         )
         self.output_manager = OutputManager(
             output_folder=self.config.paths.output_folder,
             settings=self.config.output
         )
     def run(self, single_image_path: str) -> Dict[str, Any]:
-        """
-        Run minimal pipeline on single image.
-        Args:
-            single_image_path: Path to input image
-        Returns:
-            Dictionary containing results
-        """
-        logger.info("Starting minimal single-image pipeline...")
-        try:
-            import time
-            from PIL import Image as _Image
-            total_start = time.perf_counter()
-            # Load single image
-            _p = Path(single_image_path)
-            _img = _Image.open(str(_p))
-            plants = {
-                "demo_demo_frame1": {
-                    "raw_image": (_img, _p.name),
-                    "plant_name": "demo",
-                    "file_path": str(_p)
-                }
-            }
-            # Create composite
-            plants = self.preprocessor.create_composites(plants)
-            # Segment
-            plants = self._segment_plants(plants)
-            # Extract features
-            plants = self._extract_features(plants)
-            # Generate outputs
-            self._generate_outputs(plants)
-            # Summary
-            summary = self._create_summary(plants)
-            total_time = time.perf_counter() - total_start
-            logger.info(f"Pipeline completed in {total_time:.2f}s")
-            return {
-                "plants": plants,
-                "summary": summary,
-                "config": self.config,
-                "timing_seconds": total_time
             }
-        except Exception as e:
-            logger.error(f"Pipeline failed: {e}")
-            raise
-    def _segment_plants(self, plants: Dict[str, Any]) -> Dict[str, Any]:
-        """Segment plants using BRIA model (full image)."""
         for key, pdata in plants.items():
-            try:
-                composite = pdata['composite']
-                soft_mask = self.segmentation_manager.segment_image_soft(composite)
-                pdata['soft_mask'] = soft_mask
-                pdata['mask'] = (soft_mask * 255.0).astype(np.uint8)
-                logger.info(f"Segmented {key}")
-            except Exception as e:
-                logger.error(f"Segmentation failed for {key}: {e}")
-                pdata['soft_mask'] = np.zeros(composite.shape[:2], dtype=np.float32)
-                pdata['mask'] = np.zeros(composite.shape[:2], dtype=np.uint8)
         return plants
     def _extract_features(self, plants: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract features from plants."""
         for key, pdata in plants.items():
-            try:
-                pdata['texture_features'] = self._extract_texture_features(pdata)
-                pdata['vegetation_indices'] = self._extract_vegetation_indices(pdata)
-                pdata['morphology_features'] = self._extract_morphology_features(pdata)
-                logger.info(f"Features extracted for {key}")
-            except Exception as e:
-                logger.error(f"Feature extraction failed for {key}: {e}")
-                pdata['texture_features'] = {}
-                pdata['vegetation_indices'] = {}
-                pdata['morphology_features'] = {}
-        return plants
-    def _extract_texture_features(self, pdata: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract texture features from pseudo-color image only."""
-        features = {}
-        try:
-            # Only process pseudo-color composite
             composite = pdata['composite']
             mask = pdata.get('mask')
-            if mask is not None:
-                masked = self.mask_handler.apply_mask_to_image(composite, mask)
-                gray_image = cv2.cvtColor(masked, cv2.COLOR_BGR2GRAY)
-            else:
-                gray_image = cv2.cvtColor(composite, cv2.COLOR_BGR2GRAY)
-            band_features = self.texture_extractor.extract_all_texture_features(gray_image)
-            stats = self.texture_extractor.compute_texture_statistics(band_features, mask)
-            features['color'] = {
-                'features': band_features,
-                'statistics': stats
-            }
-        except Exception as e:
-            logger.error(f"Texture extraction failed: {e}")
-            features['color'] = {'features': {}, 'statistics': {}}
-        return features
-    def _extract_vegetation_indices(self, pdata: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract vegetation indices (NDVI, ARI, GNDVI only)."""
-        try:
-            spectral_stack = pdata.get('spectral_stack', {})
-            mask = pdata.get('mask')
-            if not spectral_stack or mask is None:
-                return {}
-            out: Dict[str, Any] = {}
-            for name in ("NDVI", "ARI", "GNDVI"):
-                bands = self.vegetation_extractor.index_bands.get(name, [])
-                if not all(b in spectral_stack for b in bands):
-                    continue
-                arrays = []
-                for b in bands:
-                    arr = spectral_stack[b]
-                    if isinstance(arr, np.ndarray):
-                        arr = arr.squeeze(-1)
-                    arrays.append(np.asarray(arr, dtype=np.float64))
-                values = self.vegetation_extractor.index_formulas[name](*arrays).astype(np.float64)
-                binary_mask = (np.asarray(mask).astype(np.int32) > 0)
-                masked_values = np.where(binary_mask, values, np.nan)
-                valid = masked_values[~np.isnan(masked_values)]
-                stats = {
-                    'mean': float(np.mean(valid)) if valid.size else 0.0,
-                    'std': float(np.std(valid)) if valid.size else 0.0,
-                    'min': float(np.min(valid)) if valid.size else 0.0,
-                    'max': float(np.max(valid)) if valid.size else 0.0,
-                    'median': float(np.median(valid)) if valid.size else 0.0,
-                }
-                out[name] = {'values': masked_values, 'statistics': stats}
-            return out
-        except Exception as e:
-            logger.error(f"Vegetation index extraction failed: {e}")
-            return {}
-    def _extract_morphology_features(self, pdata: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract morphological features."""
-        try:
-            composite = pdata.get('composite')
-            mask = pdata.get('mask')
-            if composite is None or mask is None:
-                return {}
-            return self.morphology_extractor.extract_morphology_features(composite, mask)
-        except Exception as e:
-            logger.error(f"Morphology extraction failed: {e}")
-            return {}
-    def _generate_outputs(self, plants: Dict[str, Any]) -> None:
-        """Generate output files."""
-        self.output_manager.create_output_directories()
-        for key, pdata in plants.items():
-            try:
-                self.output_manager.save_plant_results(key, pdata)
-            except Exception as e:
-                logger.error(f"Output generation failed for {key}: {e}")
-    def _create_summary(self, plants: Dict[str, Any]) -> Dict[str, Any]:
-        """Create summary of results."""
-        return {
-            "total_plants": len(plants),
-            "successful_plants": sum(1 for p in plants.values() if p.get('texture_features')),
-            "features_extracted": {
-                "texture": sum(1 for p in plants.values() if p.get('texture_features')),
-                "vegetation": sum(1 for p in plants.values() if p.get('vegetation_indices')),
-                "morphology": sum(1 for p in plants.values() if p.get('morphology_features'))
             }
-        }

 """
+Minimal single-image pipeline for Hugging Face demo.
 """
 import logging
 from pathlib import Path
+from typing import Dict, Any
 import numpy as np
 import cv2
 from .config import Config
 from .data import ImagePreprocessor, MaskHandler
 class SorghumPipeline:
+    """Minimal pipeline for single-image processing."""
     def __init__(self, config: Config):
+        """Initialize pipeline."""
+        logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')
         self.config = config
         self.config.validate()
+        # Initialize components with defaults
+        self.preprocessor = ImagePreprocessor()
+        self.mask_handler = MaskHandler()
         self.texture_extractor = TextureExtractor()
         self.vegetation_extractor = VegetationIndexExtractor()
         self.morphology_extractor = MorphologyExtractor()
         self.segmentation_manager = SegmentationManager(
             device=self.config.get_device(),
             trust_remote_code=True
         )
         self.output_manager = OutputManager(
             output_folder=self.config.paths.output_folder,
             settings=self.config.output
         )
+        logger.info("Pipeline initialized")
     def run(self, single_image_path: str) -> Dict[str, Any]:
+        """Run pipeline on single image."""
+        logger.info("Processing single image...")
+        from PIL import Image
+        import time
+        start = time.perf_counter()
+        # Load image
+        img = Image.open(single_image_path)
+        plants = {
+            "demo": {
+                "raw_image": (img, Path(single_image_path).name),
+                "plant_name": "demo",
             }
+        }
+        # Process: composite → segment → features → save
+        plants = self.preprocessor.create_composites(plants)
+        plants = self._segment(plants)
+        plants = self._extract_features(plants)
+        self.output_manager.create_output_directories()
+        for key, pdata in plants.items():
+            self.output_manager.save_plant_results(key, pdata)
+        elapsed = time.perf_counter() - start
+        logger.info(f"Completed in {elapsed:.2f}s")
+        return {"plants": plants, "timing": elapsed}
+    def _segment(self, plants: Dict[str, Any]) -> Dict[str, Any]:
+        """Segment using BRIA."""
         for key, pdata in plants.items():
+            composite = pdata['composite']
+            soft_mask = self.segmentation_manager.segment_image_soft(composite)
+            pdata['mask'] = (soft_mask * 255.0).astype(np.uint8)
         return plants
     def _extract_features(self, plants: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract texture, vegetation, and morphology features."""
         for key, pdata in plants.items():
+            # Texture: LBP, HOG, Lacunarity from pseudo-color
             composite = pdata['composite']
             mask = pdata.get('mask')
+            masked = self.mask_handler.apply_mask_to_image(composite, mask) if mask is not None else composite
+            gray = cv2.cvtColor(masked, cv2.COLOR_BGR2GRAY)
+            feats = self.texture_extractor.extract_all_texture_features(gray)
+            stats = self.texture_extractor.compute_texture_statistics(feats, mask)
+            pdata['texture_features'] = {'color': {'features': feats, 'statistics': stats}}
+            # Vegetation: NDVI, ARI, GNDVI
+            spectral = pdata.get('spectral_stack', {})
+            if spectral and mask is not None:
+                pdata['vegetation_indices'] = self._compute_vegetation(spectral, mask)
+            else:
+                pdata['vegetation_indices'] = {}
+            # Morphology: PlantCV size analysis
+            pdata['morphology_features'] = self.morphology_extractor.extract_morphology_features(composite, mask)
+        return plants
+    def _compute_vegetation(self, spectral: Dict[str, np.ndarray], mask: np.ndarray) -> Dict[str, Any]:
+        """Compute NDVI, ARI, GNDVI only."""
+        out = {}
+        for name in ("NDVI", "ARI", "GNDVI"):
+            bands = self.vegetation_extractor.index_bands.get(name, [])
+            if not all(b in spectral for b in bands):
+                continue
+            arrays = [np.asarray(spectral[b].squeeze(-1), dtype=np.float64) for b in bands]
+            values = self.vegetation_extractor.index_formulas[name](*arrays).astype(np.float64)
+            binary_mask = (mask > 0)
+            masked_values = np.where(binary_mask, values, np.nan)
+            valid = masked_values[~np.isnan(masked_values)]
+            stats = {
+                'mean': float(np.mean(valid)) if valid.size else 0.0,
+                'std': float(np.std(valid)) if valid.size else 0.0,
             }
+            out[name] = {'values': masked_values, 'statistics': stats}
+        return out

sorghum_pipeline/segmentation/__init__.py CHANGED Viewed

@@ -1,12 +1,5 @@
-"""
-Segmentation modules for the Sorghum Pipeline.
-This package contains segmentation functionality including:
-- BRIA model integration
-- Mask post-processing
-- Segmentation validation
-"""
 from .manager import SegmentationManager
-__all__ = ["SegmentationManager"]

+"""Segmentation modules."""
 from .manager import SegmentationManager
+__all__ = ["SegmentationManager"]