Spaces:

dev-jas
/

polymer-aging-ml

Sleeping

devjas1 commited on Aug 29

Commit

9d0759c

1 Parent(s): 2fb5cb5

(FEAT): Add batch processing utilities for multi-file uploads

- Implement `create_batch_uploader` to handle batch file uploads.
- Add `process_multiple_files` for processing multiple files in batch mode.
- Include `display_batch_results` to render batch processing results in the UI.
- Enhance error handling for batch operations with `safe_execute`.
- Improve user experience with streamlined batch file management and result visualization.

Files changed (1) hide show

utils/multifile.py +93 -60

utils/multifile.py CHANGED Viewed

@@ -3,32 +3,33 @@ Handles multiple file uploads and iterative processing."""
 from typing import List, Dict, Any, Tuple, Optional
 import time
-import streamlit as st
-import numpy as np
 from .preprocessing import resample_spectrum
 from .errors import ErrorHandler, safe_execute
 from .results_manager import ResultsManager
 from .confidence import calculate_softmax_confidence
 def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[np.ndarray, np.ndarray]:
     """
     Parse spectrum data from text content
     Args:
         text_content: Raw text content of the spectrum file
         filename: Name of the file for error reporting
     Returns:
         Tuple of (x_values, y_values) as numpy arrays
     Raises:
         ValueError: If the data cannot be parsed
     """
     try:
         lines = text_content.strip().split('\n')
-        #==Remove empty lines and comments==
         data_lines = []
         for line in lines:
             line = line.strip()
@@ -38,39 +39,52 @@ def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[n
         if not data_lines:
             raise ValueError("No data lines found in file")
-        #==Try to parse==
         x_vals, y_vals = [], []
         for i, line in enumerate(data_lines):
             try:
-                #=Try comma separation first, then space=
-                if ',' in line:
-                    parts = line.split(',')
-                else:
-                    parts = line.split()
-                if len(parts) < 2:
-                    ErrorHandler.log_warning(f"Line {i+1} has fewer than 2 columns, skipping", f"Parsing {filename}")
-                    continue
-                x_val = float(parts[0].strip())
-                y_val = float(parts[1].split())
-                x_vals.append(x_val)
-                y_vals.append(y_val)
-            except (ValueError, IndexError) as e:
-                ErrorHandler.log_warning(f"Could not parse line {i+1}: {line}", f"Parsing {filename}")
-                continue
-            if len(x_vals) < 10:    #==Need minimum points for interpolation==
-                raise ValueError(f"Insufficient data points ({len(x_vals)}). Need at least 10 points.")
-        return np.array(x_vals), np.array(y_vals)
     except Exception as e:
         raise ValueError(f"Failed to parse spectrum data: {str(e)}")
 def process_single_file(
     filename: str,
     text_content: str,
@@ -81,7 +95,7 @@ def process_single_file(
 ) -> Optional[Dict[str, Any]]:
     """
     Process a single spectrum file
     Args:
         filename: Name of the file
         text_content: Raw text content
@@ -89,15 +103,15 @@ def process_single_file(
         load_model_func: Function to load the model
         run_inference_func: Function to run inference
         label_file_func: Function to extract ground truth label
     Returns:
         Dictionary with processing results or None if failed
     """
     start_time = time.time()
     try:
-        #==Parse spectrum data==
-        x_raw, y_raw, success = safe_execute(
             parse_spectrum_data,
             text_content,
             filename,
@@ -105,11 +119,13 @@ def process_single_file(
             show_error=False
         )
-        if not success:
             return None
-        #==Resample spectrum==
-        x_resampled, y_resampled, success = safe_execute(
             resample_spectrum,
             x_raw,
             y_raw,
@@ -118,11 +134,13 @@ def process_single_file(
             show_error=False
         )
-        if not success:
             return None
-        #==Run inference==
-        prediction, logits_list, probs, inference_time, logits, success = safe_execute(
             run_inference_func,
             y_resampled,
             model_choice,
@@ -130,27 +148,31 @@ def process_single_file(
             show_error=False
         )
-        if not success or prediction is None:
-            ErrorHandler.log_error(Exception("Inference failed"), f"processing {filename}")
             return None
-        #==Calculate confidence==
         if logits is not None:
-            probs_np, max_confidence, confidence_level, confidence_emoji = calculate_softmax_confidence(logits)
         else:
             probs_np = np.array([])
             max_confidence = 0.0
             confidence_level = "LOW"
             confidence_emoji = "🔴"
-        #==Get ground truth==
         try:
             ground_truth = label_file_func(filename)
             ground_truth = ground_truth if ground_truth >= 0 else None
         except Exception:
             ground_truth = None
-        #==Get predicted class==
         label_map = {0: "Stable (Unweathered)", 1: "Weathered (Degraded)"}
         predicted_class = label_map.get(prediction, f"Unknown ({prediction})")
@@ -183,6 +205,7 @@ def process_single_file(
             "processing_time": time.time() - start_time
         }
 def process_multiple_files(
     uploaded_files: List,
     model_choice: str,
@@ -193,7 +216,7 @@ def process_multiple_files(
 ) -> List[Dict[str, Any]]:
     """
     Process multiple uploaded files
     Args:
         uploaded_files: List of uploaded file objects
         model_choice: Selected model name
@@ -201,7 +224,7 @@ def process_multiple_files(
         run_inference_func: Function to run inference
         label_file_func: Function to extract ground truth label
         progress_callback: Optional callback to update progress
     Returns:
         List of processing results
     """
@@ -215,11 +238,12 @@ def process_multiple_files(
             progress_callback(i, total_files, uploaded_file.name)
         try:
-            #==Read file content==
             raw = uploaded_file.read()
-            text_content = raw.decode('utf-8') if isinstance(raw, bytes) else raw
-            #==Process the file==
             result = process_single_file(
                 uploaded_file.name,
                 text_content,
@@ -232,7 +256,7 @@ def process_multiple_files(
             if result:
                 results.append(result)
-                #==Add successful results to the results manager==
                 if result.get("success", False):
                     ResultsManager.add_results(
                         filename=result["filename"],
@@ -260,14 +284,16 @@ def process_multiple_files(
     if progress_callback:
         progress_callback(total_files, total_files, "Complete")
-    ErrorHandler.log_info(f"Completed batch processing: {sum(1 for r in results if r.get('success', False))}/{total_files} successful")
     return results
 def display_batch_results(results: List[Dict[str, Any]]) -> None:
     """
     Display batch processing results in the UI
     Args:
         results: List of processing results
     """
@@ -278,16 +304,18 @@ def display_batch_results(results: List[Dict[str, Any]]) -> None:
     successful = [r for r in results if r.get("success", False)]
     failed = [r for r in results if not r.get("success", False)]
-    #==Summary==
     col1, col2, col3 = st.columns(3)
     with col1:
         st.metric("Total Files", len(results))
     with col2:
-        st.metric("Successful", len(successful), delta=f"{len(successful)/len(results)*100:.1f}%")
     with col3:
-        st.metric("Failed", len(failed), delta=f"-{len(failed)/len(results)*100:.1f}%" if failed else "0%")
-    #==Results tabs==
     tab1, tab2 = st.tabs(["✅Successful", "❌ Failed"])
     with tab1:
@@ -296,12 +324,16 @@ def display_batch_results(results: List[Dict[str, Any]]) -> None:
                 with st.expander(f"{result['filename']}", expanded=False):
                     col1, col2 = st.columns(2)
                     with col1:
-                        st.write(f"**Prediction:** {result['predicted_class']}")
-                        st.write(f"**Confidence:** {result['confidence_emoji']} {result['confidence_level']} ({result['confidence']:.3f})")
                     with col2:
-                        st.write(f"**Processing Time:** {result['processing_time']:.3f}s")
                         if result['ground_truth'] is not None:
-                            gt_label = {0: "Stable", 1: "Weathered"}.get(result['ground_truth'], "Unknown")
                             correct = "✅" if result['prediction'] == result['ground_truth'] else "❌"
                             st.write(f"**Ground Truth:** {gt_label} {correct}")
         else:
@@ -315,10 +347,11 @@ def display_batch_results(results: List[Dict[str, Any]]) -> None:
         else:
             st.success("No failed files!")
 def create_batch_uploader() -> List:
     """
     Create multi-file uploader widget
     Returns:
         List of uploaded files
     """
@@ -330,4 +363,4 @@ def create_batch_uploader() -> List:
         key="batch_uploader"
     )
-    return uploaded_files if uploaded_files else []

 from typing import List, Dict, Any, Tuple, Optional
 import time
+import streamlit as st
+import numpy as np
 from .preprocessing import resample_spectrum
 from .errors import ErrorHandler, safe_execute
 from .results_manager import ResultsManager
 from .confidence import calculate_softmax_confidence
 def parse_spectrum_data(text_content: str, filename: str = "unknown") -> Tuple[np.ndarray, np.ndarray]:
     """
     Parse spectrum data from text content
     Args:
         text_content: Raw text content of the spectrum file
         filename: Name of the file for error reporting
     Returns:
         Tuple of (x_values, y_values) as numpy arrays
     Raises:
         ValueError: If the data cannot be parsed
     """
     try:
         lines = text_content.strip().split('\n')
+        # ==Remove empty lines and comments==
         data_lines = []
         for line in lines:
             line = line.strip()
         if not data_lines:
             raise ValueError("No data lines found in file")
+        # ==Try to parse==
         x_vals, y_vals = [], []
         for i, line in enumerate(data_lines):
             try:
+                # Handle different separators
+                parts = line.replace(",", " ").split()
+                numbers = [p for p in parts if p.replace('.', '', 1).replace(
+                    '-', '', 1).replace('+', '', 1).isdigit()]
+                if len(numbers) >= 2:
+                    x_val = float(numbers[0])
+                    y_val = float(numbers[1])
+                    x_vals.append(x_val)
+                    y_vals.append(y_val)
+            except ValueError:
+                ErrorHandler.log_warning(
+                    f"Could not parse line {i+1}: {line}", f"Parsing {filename}")
+                continue
+        if len(x_vals) < 10:  # ==Need minimum points for interpolation==
+            raise ValueError(
+                f"Insufficient data points ({len(x_vals)}). Need at least 10 points.")
+        x = np.array(x_vals)
+        y = np.array(y_vals)
+        # Check for NaNs
+        if np.any(np.isnan(x)) or np.any(np.isnan(y)):
+            raise ValueError("Input data contains NaN values")
+        # Check monotonic increasing x
+        if not np.all(np.diff(x) > 0):
+            raise ValueError("Wavenumbers must be strictly increasing")
+        # Check reasonable range for Raman spectroscopy
+        if min(x) < 0 or max(x) > 10000 or (max(x) - min(x)) < 100:
+            raise ValueError(
+                f"Invalid wavenumber range: {min(x)} - {max(x)}. Expected ~400-4000 cm⁻¹ with span >100")
+        return x, y
     except Exception as e:
         raise ValueError(f"Failed to parse spectrum data: {str(e)}")
 def process_single_file(
     filename: str,
     text_content: str,
 ) -> Optional[Dict[str, Any]]:
     """
     Process a single spectrum file
     Args:
         filename: Name of the file
         text_content: Raw text content
         load_model_func: Function to load the model
         run_inference_func: Function to run inference
         label_file_func: Function to extract ground truth label
     Returns:
         Dictionary with processing results or None if failed
     """
     start_time = time.time()
     try:
+        # ==Parse spectrum data==
+        result, success = safe_execute(
             parse_spectrum_data,
             text_content,
             filename,
             show_error=False
         )
+        if not success or result is None:
             return None
+        x_raw, y_raw = result
+        # ==Resample spectrum==
+        result, success = safe_execute(
             resample_spectrum,
             x_raw,
             y_raw,
             show_error=False
         )
+        if not success or result is None:
             return None
+        x_resampled, y_resampled = result
+        # ==Run inference==
+        result, success = safe_execute(
             run_inference_func,
             y_resampled,
             model_choice,
             show_error=False
         )
+        if not success or result is None:
+            ErrorHandler.log_error(
+                Exception("Inference failed"), f"processing {filename}")
             return None
+        prediction, logits_list, probs, inference_time, logits = result
+        # ==Calculate confidence==
         if logits is not None:
+            probs_np, max_confidence, confidence_level, confidence_emoji = calculate_softmax_confidence(
+                logits)
         else:
             probs_np = np.array([])
             max_confidence = 0.0
             confidence_level = "LOW"
             confidence_emoji = "🔴"
+        # ==Get ground truth==
         try:
             ground_truth = label_file_func(filename)
             ground_truth = ground_truth if ground_truth >= 0 else None
         except Exception:
             ground_truth = None
+        # ==Get predicted class==
         label_map = {0: "Stable (Unweathered)", 1: "Weathered (Degraded)"}
         predicted_class = label_map.get(prediction, f"Unknown ({prediction})")
             "processing_time": time.time() - start_time
         }
 def process_multiple_files(
     uploaded_files: List,
     model_choice: str,
 ) -> List[Dict[str, Any]]:
     """
     Process multiple uploaded files
     Args:
         uploaded_files: List of uploaded file objects
         model_choice: Selected model name
         run_inference_func: Function to run inference
         label_file_func: Function to extract ground truth label
         progress_callback: Optional callback to update progress
     Returns:
         List of processing results
     """
             progress_callback(i, total_files, uploaded_file.name)
         try:
+            # ==Read file content==
             raw = uploaded_file.read()
+            text_content = raw.decode(
+                'utf-8') if isinstance(raw, bytes) else raw
+            # ==Process the file==
             result = process_single_file(
                 uploaded_file.name,
                 text_content,
             if result:
                 results.append(result)
+                # ==Add successful results to the results manager==
                 if result.get("success", False):
                     ResultsManager.add_results(
                         filename=result["filename"],
     if progress_callback:
         progress_callback(total_files, total_files, "Complete")
+    ErrorHandler.log_info(
+        f"Completed batch processing: {sum(1 for r in results if r.get('success', False))}/{total_files} successful")
     return results
 def display_batch_results(results: List[Dict[str, Any]]) -> None:
     """
     Display batch processing results in the UI
     Args:
         results: List of processing results
     """
     successful = [r for r in results if r.get("success", False)]
     failed = [r for r in results if not r.get("success", False)]
+    # ==Summary==
     col1, col2, col3 = st.columns(3)
     with col1:
         st.metric("Total Files", len(results))
     with col2:
+        st.metric("Successful", len(successful),
+                  delta=f"{len(successful)/len(results)*100:.1f}%")
     with col3:
+        st.metric("Failed", len(
+            failed), delta=f"-{len(failed)/len(results)*100:.1f}%" if failed else "0%")
+    # ==Results tabs==
     tab1, tab2 = st.tabs(["✅Successful", "❌ Failed"])
     with tab1:
                 with st.expander(f"{result['filename']}", expanded=False):
                     col1, col2 = st.columns(2)
                     with col1:
+                        st.write(
+                            f"**Prediction:** {result['predicted_class']}")
+                        st.write(
+                            f"**Confidence:** {result['confidence_emoji']} {result['confidence_level']} ({result['confidence']:.3f})")
                     with col2:
+                        st.write(
+                            f"**Processing Time:** {result['processing_time']:.3f}s")
                         if result['ground_truth'] is not None:
+                            gt_label = {0: "Stable", 1: "Weathered"}.get(
+                                result['ground_truth'], "Unknown")
                             correct = "✅" if result['prediction'] == result['ground_truth'] else "❌"
                             st.write(f"**Ground Truth:** {gt_label} {correct}")
         else:
         else:
             st.success("No failed files!")
 def create_batch_uploader() -> List:
     """
     Create multi-file uploader widget
     Returns:
         List of uploaded files
     """
         key="batch_uploader"
     )
+    return uploaded_files if uploaded_files else []