MogensR commited on
Commit
d7cf15f
·
1 Parent(s): 76d53eb

Update utils/utils.py

Browse files
Files changed (1) hide show
  1. utils/utils.py +1100 -202
utils/utils.py CHANGED
@@ -1,9 +1,9 @@
1
  """
2
- Utility classes for BackgroundFX Pro
3
- Includes FileManager, VideoUtils, and ImageUtils
4
  """
5
 
6
- # Set OMP_NUM_THREADS at the very beginning of utils module too
7
  import os
8
  if 'OMP_NUM_THREADS' not in os.environ:
9
  os.environ['OMP_NUM_THREADS'] = '4'
@@ -16,29 +16,113 @@
16
  from typing import Optional, List, Union, Tuple, Dict, Any
17
  from datetime import datetime
18
  import subprocess
 
 
19
  import cv2
20
  import numpy as np
21
- from PIL import Image, ImageEnhance, ImageFilter
 
22
 
23
  logger = logging.getLogger(__name__)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  class FileManager:
27
  """Manages file operations for BackgroundFX Pro"""
28
 
29
  def __init__(self, base_dir: Optional[str] = None):
30
- """
31
- Initialize FileManager
32
-
33
- Args:
34
- base_dir: Base directory for file operations (defaults to temp dir)
35
- """
36
  if base_dir:
37
  self.base_dir = Path(base_dir)
38
  else:
39
  self.base_dir = Path(tempfile.gettempdir()) / "backgroundfx_pro"
40
 
41
- # Create base directory if it doesn't exist
42
  self.base_dir.mkdir(parents=True, exist_ok=True)
43
 
44
  # Create subdirectories
@@ -53,49 +137,27 @@ def __init__(self, base_dir: Optional[str] = None):
53
  logger.info(f"FileManager initialized with base directory: {self.base_dir}")
54
 
55
  def save_upload(self, file_path: Union[str, Path], filename: Optional[str] = None) -> Path:
56
- """
57
- Save an uploaded file to the uploads directory
58
-
59
- Args:
60
- file_path: Path to the uploaded file
61
- filename: Optional custom filename
62
-
63
- Returns:
64
- Path to the saved file
65
- """
66
  file_path = Path(file_path)
67
 
68
  if filename:
69
  dest_path = self.uploads_dir / filename
70
  else:
71
- # Generate unique filename with timestamp
72
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
73
  dest_path = self.uploads_dir / f"{timestamp}_{file_path.name}"
74
 
75
- # Copy file to uploads directory
76
  shutil.copy2(file_path, dest_path)
77
  logger.info(f"Saved upload: {dest_path}")
78
-
79
  return dest_path
80
 
81
  def create_output_path(self, filename: str, subfolder: Optional[str] = None) -> Path:
82
- """
83
- Create a path for an output file
84
-
85
- Args:
86
- filename: Name of the output file
87
- subfolder: Optional subfolder within outputs
88
-
89
- Returns:
90
- Path for the output file
91
- """
92
  if subfolder:
93
  output_dir = self.outputs_dir / subfolder
94
  output_dir.mkdir(parents=True, exist_ok=True)
95
  else:
96
  output_dir = self.outputs_dir
97
 
98
- # Add timestamp to filename
99
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
100
  name_parts = filename.rsplit('.', 1)
101
  if len(name_parts) == 2:
@@ -106,16 +168,7 @@ def create_output_path(self, filename: str, subfolder: Optional[str] = None) ->
106
  return output_path
107
 
108
  def get_temp_path(self, filename: Optional[str] = None, extension: str = ".tmp") -> Path:
109
- """
110
- Get a temporary file path
111
-
112
- Args:
113
- filename: Optional filename (will be made unique)
114
- extension: File extension
115
-
116
- Returns:
117
- Path for temporary file
118
- """
119
  if filename:
120
  temp_path = self.temp_dir / filename
121
  else:
@@ -125,12 +178,7 @@ def get_temp_path(self, filename: Optional[str] = None, extension: str = ".tmp")
125
  return temp_path
126
 
127
  def cleanup_temp(self, max_age_hours: int = 24):
128
- """
129
- Clean up old temporary files
130
-
131
- Args:
132
- max_age_hours: Maximum age of temp files in hours
133
- """
134
  try:
135
  current_time = datetime.now().timestamp()
136
  max_age_seconds = max_age_hours * 3600
@@ -147,31 +195,12 @@ def cleanup_temp(self, max_age_hours: int = 24):
147
  logger.warning(f"Error during temp cleanup: {e}")
148
 
149
  def get_cache_path(self, key: str, extension: str = ".cache") -> Path:
150
- """
151
- Get a cache file path based on a key
152
-
153
- Args:
154
- key: Cache key
155
- extension: File extension
156
-
157
- Returns:
158
- Path for cache file
159
- """
160
- # Create a safe filename from the key
161
  safe_key = "".join(c if c.isalnum() or c in '-_' else '_' for c in key)
162
  return self.cache_dir / f"{safe_key}{extension}"
163
 
164
  def list_outputs(self, subfolder: Optional[str] = None, extension: Optional[str] = None) -> List[Path]:
165
- """
166
- List output files
167
-
168
- Args:
169
- subfolder: Optional subfolder to list from
170
- extension: Optional file extension filter
171
-
172
- Returns:
173
- List of output file paths
174
- """
175
  if subfolder:
176
  search_dir = self.outputs_dir / subfolder
177
  else:
@@ -188,15 +217,7 @@ def list_outputs(self, subfolder: Optional[str] = None, extension: Optional[str]
188
  return sorted(search_dir.glob(pattern), key=lambda p: p.stat().st_mtime, reverse=True)
189
 
190
  def delete_file(self, file_path: Union[str, Path]) -> bool:
191
- """
192
- Safely delete a file
193
-
194
- Args:
195
- file_path: Path to file to delete
196
-
197
- Returns:
198
- True if successful, False otherwise
199
- """
200
  try:
201
  file_path = Path(file_path)
202
  if file_path.exists() and file_path.is_file():
@@ -209,15 +230,7 @@ def delete_file(self, file_path: Union[str, Path]) -> bool:
209
  return False
210
 
211
  def get_file_info(self, file_path: Union[str, Path]) -> dict:
212
- """
213
- Get information about a file
214
-
215
- Args:
216
- file_path: Path to file
217
-
218
- Returns:
219
- Dictionary with file information
220
- """
221
  file_path = Path(file_path)
222
 
223
  if not file_path.exists():
@@ -235,21 +248,16 @@ def get_file_info(self, file_path: Union[str, Path]) -> dict:
235
  "path": str(file_path.absolute())
236
  }
237
 
 
 
 
238
 
239
  class VideoUtils:
240
  """Utilities for video processing"""
241
 
242
  @staticmethod
243
  def get_video_info(video_path: Union[str, Path]) -> Dict[str, Any]:
244
- """
245
- Get detailed video information
246
-
247
- Args:
248
- video_path: Path to video file
249
-
250
- Returns:
251
- Dictionary with video metadata
252
- """
253
  video_path = str(video_path)
254
  cap = cv2.VideoCapture(video_path)
255
 
@@ -267,7 +275,6 @@ def get_video_info(video_path: Union[str, Path]) -> Dict[str, Any]:
267
  "duration": cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 0
268
  }
269
 
270
- # Get file size
271
  path = Path(video_path)
272
  if path.exists():
273
  info["file_size_mb"] = path.stat().st_size / (1024 * 1024)
@@ -287,18 +294,7 @@ def extract_frames(video_path: Union[str, Path],
287
  output_dir: Union[str, Path],
288
  frame_interval: int = 1,
289
  max_frames: Optional[int] = None) -> List[Path]:
290
- """
291
- Extract frames from video
292
-
293
- Args:
294
- video_path: Path to video file
295
- output_dir: Directory to save frames
296
- frame_interval: Extract every nth frame
297
- max_frames: Maximum number of frames to extract
298
-
299
- Returns:
300
- List of extracted frame paths
301
- """
302
  video_path = str(video_path)
303
  output_dir = Path(output_dir)
304
  output_dir.mkdir(parents=True, exist_ok=True)
@@ -340,23 +336,11 @@ def create_video_from_frames(frame_paths: List[Union[str, Path]],
340
  output_path: Union[str, Path],
341
  fps: float = 30.0,
342
  codec: str = 'mp4v') -> bool:
343
- """
344
- Create video from frame images
345
-
346
- Args:
347
- frame_paths: List of frame image paths
348
- output_path: Output video path
349
- fps: Frames per second
350
- codec: Video codec (fourcc)
351
-
352
- Returns:
353
- True if successful
354
- """
355
  if not frame_paths:
356
  logger.error("No frames provided")
357
  return False
358
 
359
- # Read first frame to get dimensions
360
  first_frame = cv2.imread(str(frame_paths[0]))
361
  if first_frame is None:
362
  logger.error(f"Failed to read first frame: {frame_paths[0]}")
@@ -364,7 +348,6 @@ def create_video_from_frames(frame_paths: List[Union[str, Path]],
364
 
365
  height, width, layers = first_frame.shape
366
 
367
- # Create video writer
368
  fourcc = cv2.VideoWriter_fourcc(*codec)
369
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
370
 
@@ -392,31 +375,17 @@ def resize_video(input_path: Union[str, Path],
392
  target_width: Optional[int] = None,
393
  target_height: Optional[int] = None,
394
  maintain_aspect: bool = True) -> bool:
395
- """
396
- Resize video to target dimensions
397
-
398
- Args:
399
- input_path: Input video path
400
- output_path: Output video path
401
- target_width: Target width (None to auto-calculate)
402
- target_height: Target height (None to auto-calculate)
403
- maintain_aspect: Maintain aspect ratio
404
-
405
- Returns:
406
- True if successful
407
- """
408
  cap = cv2.VideoCapture(str(input_path))
409
  if not cap.isOpened():
410
  logger.error(f"Failed to open video: {input_path}")
411
  return False
412
 
413
- # Get original dimensions
414
  orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
415
  orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
416
  fps = cap.get(cv2.CAP_PROP_FPS)
417
  fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
418
 
419
- # Calculate target dimensions
420
  if maintain_aspect:
421
  if target_width and not target_height:
422
  aspect = orig_width / orig_height
@@ -430,7 +399,6 @@ def resize_video(input_path: Union[str, Path],
430
  if not target_height:
431
  target_height = orig_height
432
 
433
- # Create video writer
434
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (target_width, target_height))
435
 
436
  try:
@@ -456,16 +424,7 @@ def resize_video(input_path: Union[str, Path],
456
  @staticmethod
457
  def extract_audio(video_path: Union[str, Path],
458
  audio_path: Union[str, Path]) -> bool:
459
- """
460
- Extract audio from video using ffmpeg
461
-
462
- Args:
463
- video_path: Input video path
464
- audio_path: Output audio path
465
-
466
- Returns:
467
- True if successful
468
- """
469
  try:
470
  cmd = [
471
  'ffmpeg', '-i', str(video_path),
@@ -493,17 +452,7 @@ def extract_audio(video_path: Union[str, Path],
493
  def add_audio_to_video(video_path: Union[str, Path],
494
  audio_path: Union[str, Path],
495
  output_path: Union[str, Path]) -> bool:
496
- """
497
- Add audio track to video using ffmpeg
498
-
499
- Args:
500
- video_path: Input video path
501
- audio_path: Input audio path
502
- output_path: Output video path with audio
503
-
504
- Returns:
505
- True if successful
506
- """
507
  try:
508
  cmd = [
509
  'ffmpeg', '-i', str(video_path),
@@ -528,57 +477,1006 @@ def add_audio_to_video(video_path: Union[str, Path],
528
  except Exception as e:
529
  logger.error(f"Error adding audio: {e}")
530
  return False
 
 
 
 
 
 
 
531
 
532
  @staticmethod
533
- def get_frame_at_time(video_path: Union[str, Path],
534
- time_seconds: float) -> Optional[np.ndarray]:
535
- """
536
- Get frame at specific time in video
537
-
538
- Args:
539
- video_path: Path to video
540
- time_seconds: Time in seconds
541
-
542
- Returns:
543
- Frame as numpy array or None
544
- """
545
- cap = cv2.VideoCapture(str(video_path))
546
- if not cap.isOpened():
547
- logger.error(f"Failed to open video: {video_path}")
548
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  try:
551
- fps = cap.get(cv2.CAP_PROP_FPS)
552
- frame_number = int(fps * time_seconds)
553
 
554
- cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
555
- ret, frame = cap.read()
556
 
557
- if ret:
558
- return frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
  else:
560
- logger.warning(f"Could not read frame at time {time_seconds}s")
561
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
- finally:
564
- cap.release()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
- # Create default instances for convenience
568
- _default_file_manager = None
 
569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
- def get_file_manager(base_dir: Optional[str] = None) -> FileManager:
572
- """
573
- Get or create the default FileManager instance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
- Args:
576
- base_dir: Optional base directory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
- Returns:
579
- FileManager instance
580
- """
581
- global _default_file_manager
582
- if _default_file_manager is None or base_dir is not None:
583
- _default_file_manager = FileManager(base_dir)
584
- return _default_file_manager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Unified Utilities Module for BackgroundFX Pro
3
+ Combines FileManager, VideoUtils, ImageUtils, and CV utilities
4
  """
5
 
6
+ # Set OMP_NUM_THREADS at the very beginning to prevent libgomp errors
7
  import os
8
  if 'OMP_NUM_THREADS' not in os.environ:
9
  os.environ['OMP_NUM_THREADS'] = '4'
 
16
  from typing import Optional, List, Union, Tuple, Dict, Any
17
  from datetime import datetime
18
  import subprocess
19
+ import time
20
+
21
  import cv2
22
  import numpy as np
23
+ import torch
24
+ from PIL import Image, ImageEnhance, ImageFilter, ImageDraw
25
 
26
  logger = logging.getLogger(__name__)
27
 
28
+ # ============================================================================
29
+ # CONFIGURATION AND CONSTANTS
30
+ # ============================================================================
31
+
32
+ # Version control flags for CV functions
33
+ USE_ENHANCED_SEGMENTATION = True
34
+ USE_AUTO_TEMPORAL_CONSISTENCY = True
35
+ USE_INTELLIGENT_PROMPTING = True
36
+ USE_ITERATIVE_REFINEMENT = True
37
+
38
+ # Professional background templates
39
+ PROFESSIONAL_BACKGROUNDS = {
40
+ "office_modern": {
41
+ "name": "Modern Office",
42
+ "type": "gradient",
43
+ "colors": ["#f8f9fa", "#e9ecef", "#dee2e6"],
44
+ "direction": "diagonal",
45
+ "description": "Clean, contemporary office environment",
46
+ "brightness": 0.95,
47
+ "contrast": 1.1
48
+ },
49
+ "studio_blue": {
50
+ "name": "Professional Blue",
51
+ "type": "gradient",
52
+ "colors": ["#1e3c72", "#2a5298", "#3498db"],
53
+ "direction": "radial",
54
+ "description": "Broadcast-quality blue studio",
55
+ "brightness": 0.9,
56
+ "contrast": 1.2
57
+ },
58
+ "studio_green": {
59
+ "name": "Broadcast Green",
60
+ "type": "color",
61
+ "colors": ["#00b894"],
62
+ "chroma_key": True,
63
+ "description": "Professional green screen replacement",
64
+ "brightness": 1.0,
65
+ "contrast": 1.0
66
+ },
67
+ "minimalist": {
68
+ "name": "Minimalist White",
69
+ "type": "gradient",
70
+ "colors": ["#ffffff", "#f1f2f6", "#ddd"],
71
+ "direction": "soft_radial",
72
+ "description": "Clean, minimal background",
73
+ "brightness": 0.98,
74
+ "contrast": 0.9
75
+ },
76
+ "warm_gradient": {
77
+ "name": "Warm Sunset",
78
+ "type": "gradient",
79
+ "colors": ["#ff7675", "#fd79a8", "#fdcb6e"],
80
+ "direction": "diagonal",
81
+ "description": "Warm, inviting atmosphere",
82
+ "brightness": 0.85,
83
+ "contrast": 1.15
84
+ },
85
+ "tech_dark": {
86
+ "name": "Tech Dark",
87
+ "type": "gradient",
88
+ "colors": ["#0c0c0c", "#2d3748", "#4a5568"],
89
+ "direction": "vertical",
90
+ "description": "Modern tech/gaming setup",
91
+ "brightness": 0.7,
92
+ "contrast": 1.3
93
+ }
94
+ }
95
+
96
+ # ============================================================================
97
+ # CUSTOM EXCEPTIONS
98
+ # ============================================================================
99
+
100
+ class SegmentationError(Exception):
101
+ """Custom exception for segmentation failures"""
102
+ pass
103
+
104
+ class MaskRefinementError(Exception):
105
+ """Custom exception for mask refinement failures"""
106
+ pass
107
+
108
+ class BackgroundReplacementError(Exception):
109
+ """Custom exception for background replacement failures"""
110
+ pass
111
+
112
+ # ============================================================================
113
+ # FILE MANAGER CLASS
114
+ # ============================================================================
115
 
116
  class FileManager:
117
  """Manages file operations for BackgroundFX Pro"""
118
 
119
  def __init__(self, base_dir: Optional[str] = None):
120
+ """Initialize FileManager"""
 
 
 
 
 
121
  if base_dir:
122
  self.base_dir = Path(base_dir)
123
  else:
124
  self.base_dir = Path(tempfile.gettempdir()) / "backgroundfx_pro"
125
 
 
126
  self.base_dir.mkdir(parents=True, exist_ok=True)
127
 
128
  # Create subdirectories
 
137
  logger.info(f"FileManager initialized with base directory: {self.base_dir}")
138
 
139
  def save_upload(self, file_path: Union[str, Path], filename: Optional[str] = None) -> Path:
140
+ """Save an uploaded file to the uploads directory"""
 
 
 
 
 
 
 
 
 
141
  file_path = Path(file_path)
142
 
143
  if filename:
144
  dest_path = self.uploads_dir / filename
145
  else:
 
146
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
147
  dest_path = self.uploads_dir / f"{timestamp}_{file_path.name}"
148
 
 
149
  shutil.copy2(file_path, dest_path)
150
  logger.info(f"Saved upload: {dest_path}")
 
151
  return dest_path
152
 
153
  def create_output_path(self, filename: str, subfolder: Optional[str] = None) -> Path:
154
+ """Create a path for an output file"""
 
 
 
 
 
 
 
 
 
155
  if subfolder:
156
  output_dir = self.outputs_dir / subfolder
157
  output_dir.mkdir(parents=True, exist_ok=True)
158
  else:
159
  output_dir = self.outputs_dir
160
 
 
161
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
162
  name_parts = filename.rsplit('.', 1)
163
  if len(name_parts) == 2:
 
168
  return output_path
169
 
170
  def get_temp_path(self, filename: Optional[str] = None, extension: str = ".tmp") -> Path:
171
+ """Get a temporary file path"""
 
 
 
 
 
 
 
 
 
172
  if filename:
173
  temp_path = self.temp_dir / filename
174
  else:
 
178
  return temp_path
179
 
180
  def cleanup_temp(self, max_age_hours: int = 24):
181
+ """Clean up old temporary files"""
 
 
 
 
 
182
  try:
183
  current_time = datetime.now().timestamp()
184
  max_age_seconds = max_age_hours * 3600
 
195
  logger.warning(f"Error during temp cleanup: {e}")
196
 
197
  def get_cache_path(self, key: str, extension: str = ".cache") -> Path:
198
+ """Get a cache file path based on a key"""
 
 
 
 
 
 
 
 
 
 
199
  safe_key = "".join(c if c.isalnum() or c in '-_' else '_' for c in key)
200
  return self.cache_dir / f"{safe_key}{extension}"
201
 
202
  def list_outputs(self, subfolder: Optional[str] = None, extension: Optional[str] = None) -> List[Path]:
203
+ """List output files"""
 
 
 
 
 
 
 
 
 
204
  if subfolder:
205
  search_dir = self.outputs_dir / subfolder
206
  else:
 
217
  return sorted(search_dir.glob(pattern), key=lambda p: p.stat().st_mtime, reverse=True)
218
 
219
  def delete_file(self, file_path: Union[str, Path]) -> bool:
220
+ """Safely delete a file"""
 
 
 
 
 
 
 
 
221
  try:
222
  file_path = Path(file_path)
223
  if file_path.exists() and file_path.is_file():
 
230
  return False
231
 
232
  def get_file_info(self, file_path: Union[str, Path]) -> dict:
233
+ """Get information about a file"""
 
 
 
 
 
 
 
 
234
  file_path = Path(file_path)
235
 
236
  if not file_path.exists():
 
248
  "path": str(file_path.absolute())
249
  }
250
 
251
+ # ============================================================================
252
+ # VIDEO UTILS CLASS
253
+ # ============================================================================
254
 
255
  class VideoUtils:
256
  """Utilities for video processing"""
257
 
258
  @staticmethod
259
  def get_video_info(video_path: Union[str, Path]) -> Dict[str, Any]:
260
+ """Get detailed video information"""
 
 
 
 
 
 
 
 
261
  video_path = str(video_path)
262
  cap = cv2.VideoCapture(video_path)
263
 
 
275
  "duration": cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 0
276
  }
277
 
 
278
  path = Path(video_path)
279
  if path.exists():
280
  info["file_size_mb"] = path.stat().st_size / (1024 * 1024)
 
294
  output_dir: Union[str, Path],
295
  frame_interval: int = 1,
296
  max_frames: Optional[int] = None) -> List[Path]:
297
+ """Extract frames from video"""
 
 
 
 
 
 
 
 
 
 
 
298
  video_path = str(video_path)
299
  output_dir = Path(output_dir)
300
  output_dir.mkdir(parents=True, exist_ok=True)
 
336
  output_path: Union[str, Path],
337
  fps: float = 30.0,
338
  codec: str = 'mp4v') -> bool:
339
+ """Create video from frame images"""
 
 
 
 
 
 
 
 
 
 
 
340
  if not frame_paths:
341
  logger.error("No frames provided")
342
  return False
343
 
 
344
  first_frame = cv2.imread(str(frame_paths[0]))
345
  if first_frame is None:
346
  logger.error(f"Failed to read first frame: {frame_paths[0]}")
 
348
 
349
  height, width, layers = first_frame.shape
350
 
 
351
  fourcc = cv2.VideoWriter_fourcc(*codec)
352
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
353
 
 
375
  target_width: Optional[int] = None,
376
  target_height: Optional[int] = None,
377
  maintain_aspect: bool = True) -> bool:
378
+ """Resize video to target dimensions"""
 
 
 
 
 
 
 
 
 
 
 
 
379
  cap = cv2.VideoCapture(str(input_path))
380
  if not cap.isOpened():
381
  logger.error(f"Failed to open video: {input_path}")
382
  return False
383
 
 
384
  orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
385
  orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
386
  fps = cap.get(cv2.CAP_PROP_FPS)
387
  fourcc = int(cap.get(cv2.CAP_PROP_FOURCC))
388
 
 
389
  if maintain_aspect:
390
  if target_width and not target_height:
391
  aspect = orig_width / orig_height
 
399
  if not target_height:
400
  target_height = orig_height
401
 
 
402
  out = cv2.VideoWriter(str(output_path), fourcc, fps, (target_width, target_height))
403
 
404
  try:
 
424
  @staticmethod
425
  def extract_audio(video_path: Union[str, Path],
426
  audio_path: Union[str, Path]) -> bool:
427
+ """Extract audio from video using ffmpeg"""
 
 
 
 
 
 
 
 
 
428
  try:
429
  cmd = [
430
  'ffmpeg', '-i', str(video_path),
 
452
  def add_audio_to_video(video_path: Union[str, Path],
453
  audio_path: Union[str, Path],
454
  output_path: Union[str, Path]) -> bool:
455
+ """Add audio track to video using ffmpeg"""
 
 
 
 
 
 
 
 
 
 
456
  try:
457
  cmd = [
458
  'ffmpeg', '-i', str(video_path),
 
477
  except Exception as e:
478
  logger.error(f"Error adding audio: {e}")
479
  return False
480
+
481
+ # ============================================================================
482
+ # IMAGE UTILS CLASS
483
+ # ============================================================================
484
+
485
+ class ImageUtils:
486
+ """Utilities for image processing and manipulation"""
487
 
488
  @staticmethod
489
+ def load_image(image_path: Union[str, Path]) -> Optional[Image.Image]:
490
+ """Load an image using PIL"""
491
+ try:
492
+ return Image.open(str(image_path))
493
+ except Exception as e:
494
+ logger.error(f"Failed to load image {image_path}: {e}")
 
 
 
 
 
 
 
 
 
495
  return None
496
+
497
+ @staticmethod
498
+ def resize_image(image: Image.Image,
499
+ max_width: Optional[int] = None,
500
+ max_height: Optional[int] = None,
501
+ maintain_aspect: bool = True) -> Image.Image:
502
+ """Resize image to fit within max dimensions"""
503
+ if not max_width and not max_height:
504
+ return image
505
+
506
+ width, height = image.size
507
+
508
+ if maintain_aspect:
509
+ scale = 1.0
510
+ if max_width:
511
+ scale = min(scale, max_width / width)
512
+ if max_height:
513
+ scale = min(scale, max_height / height)
514
+
515
+ new_width = int(width * scale)
516
+ new_height = int(height * scale)
517
+ else:
518
+ new_width = max_width or width
519
+ new_height = max_height or height
520
+
521
+ return image.resize((new_width, new_height), Image.Resampling.LANCZOS)
522
+
523
+ @staticmethod
524
+ def convert_to_cv2(pil_image: Image.Image) -> np.ndarray:
525
+ """Convert PIL Image to OpenCV format"""
526
+ if pil_image.mode != 'RGB':
527
+ pil_image = pil_image.convert('RGB')
528
+
529
+ np_image = np.array(pil_image)
530
+ return cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
531
+
532
+ @staticmethod
533
+ def convert_from_cv2(cv2_image: np.ndarray) -> Image.Image:
534
+ """Convert OpenCV image to PIL format"""
535
+ rgb_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
536
+ return Image.fromarray(rgb_image)
537
+
538
+ @staticmethod
539
+ def apply_blur(image: Image.Image, radius: float = 5.0) -> Image.Image:
540
+ """Apply Gaussian blur to image"""
541
+ return image.filter(ImageFilter.GaussianBlur(radius=radius))
542
+
543
+ @staticmethod
544
+ def adjust_brightness(image: Image.Image, factor: float = 1.0) -> Image.Image:
545
+ """Adjust image brightness"""
546
+ enhancer = ImageEnhance.Brightness(image)
547
+ return enhancer.enhance(factor)
548
+
549
+ @staticmethod
550
+ def adjust_contrast(image: Image.Image, factor: float = 1.0) -> Image.Image:
551
+ """Adjust image contrast"""
552
+ enhancer = ImageEnhance.Contrast(image)
553
+ return enhancer.enhance(factor)
554
+
555
+ @staticmethod
556
+ def adjust_saturation(image: Image.Image, factor: float = 1.0) -> Image.Image:
557
+ """Adjust image saturation"""
558
+ enhancer = ImageEnhance.Color(image)
559
+ return enhancer.enhance(factor)
560
+
561
+ @staticmethod
562
+ def crop_center(image: Image.Image, crop_width: int, crop_height: int) -> Image.Image:
563
+ """Crop image from center"""
564
+ width, height = image.size
565
+
566
+ left = (width - crop_width) // 2
567
+ top = (height - crop_height) // 2
568
+ right = left + crop_width
569
+ bottom = top + crop_height
570
+
571
+ return image.crop((left, top, right, bottom))
572
+
573
+ @staticmethod
574
+ def create_thumbnail(image: Image.Image, size: Tuple[int, int] = (128, 128)) -> Image.Image:
575
+ """Create thumbnail preserving aspect ratio"""
576
+ img_copy = image.copy()
577
+ img_copy.thumbnail(size, Image.Resampling.LANCZOS)
578
+ return img_copy
579
+
580
+ @staticmethod
581
+ def apply_mask(image: Image.Image, mask: Image.Image, alpha: float = 1.0) -> Image.Image:
582
+ """Apply mask to image"""
583
+ if image.mode != 'RGBA':
584
+ image = image.convert('RGBA')
585
+
586
+ if mask.mode != 'L':
587
+ mask = mask.convert('L')
588
 
589
+ if mask.size != image.size:
590
+ mask = mask.resize(image.size, Image.Resampling.LANCZOS)
591
+
592
+ if alpha < 1.0:
593
+ mask = ImageEnhance.Brightness(mask).enhance(alpha)
594
+
595
+ image.putalpha(mask)
596
+ return image
597
+
598
+ @staticmethod
599
+ def composite_images(foreground: Image.Image,
600
+ background: Image.Image,
601
+ position: Tuple[int, int] = (0, 0),
602
+ alpha: float = 1.0) -> Image.Image:
603
+ """Composite foreground image over background"""
604
+ if foreground.mode != 'RGBA':
605
+ foreground = foreground.convert('RGBA')
606
+ if background.mode != 'RGBA':
607
+ background = background.convert('RGBA')
608
+
609
+ if alpha < 1.0:
610
+ foreground = foreground.copy()
611
+ foreground.putalpha(
612
+ ImageEnhance.Brightness(foreground.split()[3]).enhance(alpha)
613
+ )
614
+
615
+ output = background.copy()
616
+ output.paste(foreground, position, foreground)
617
+
618
+ return output
619
+
620
+ @staticmethod
621
+ def get_image_info(image_path: Union[str, Path]) -> Dict[str, Any]:
622
+ """Get image file information"""
623
  try:
624
+ image_path = Path(image_path)
 
625
 
626
+ if not image_path.exists():
627
+ return {"exists": False}
628
 
629
+ with Image.open(str(image_path)) as img:
630
+ info = {
631
+ "exists": True,
632
+ "filename": image_path.name,
633
+ "format": img.format,
634
+ "mode": img.mode,
635
+ "size": img.size,
636
+ "width": img.width,
637
+ "height": img.height,
638
+ "file_size_mb": image_path.stat().st_size / (1024 * 1024)
639
+ }
640
+
641
+ if hasattr(img, '_getexif') and img._getexif():
642
+ info["has_exif"] = True
643
+ else:
644
+ info["has_exif"] = False
645
+
646
+ return info
647
+
648
+ except Exception as e:
649
+ logger.error(f"Error getting image info for {image_path}: {e}")
650
+ return {"exists": False, "error": str(e)}
651
+
652
+ @staticmethod
653
+ def save_image(image: Image.Image,
654
+ output_path: Union[str, Path],
655
+ quality: int = 95,
656
+ optimize: bool = True) -> bool:
657
+ """Save image with specified quality"""
658
+ try:
659
+ output_path = Path(output_path)
660
+ output_path.parent.mkdir(parents=True, exist_ok=True)
661
+
662
+ save_kwargs = {}
663
+ ext = output_path.suffix.lower()
664
+
665
+ if ext in ['.jpg', '.jpeg']:
666
+ save_kwargs['quality'] = quality
667
+ save_kwargs['optimize'] = optimize
668
+ elif ext == '.png':
669
+ save_kwargs['optimize'] = optimize
670
+
671
+ image.save(str(output_path), **save_kwargs)
672
+ logger.info(f"Saved image to: {output_path}")
673
+ return True
674
+
675
+ except Exception as e:
676
+ logger.error(f"Failed to save image to {output_path}: {e}")
677
+ return False
678
+
679
+ # ============================================================================
680
+ # COMPUTER VISION FUNCTIONS (from utilities.py)
681
+ # ============================================================================
682
+
683
+ def segment_person_hq(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
684
+ """High-quality person segmentation with intelligent automation"""
685
+ if not USE_ENHANCED_SEGMENTATION:
686
+ return segment_person_hq_original(image, predictor, fallback_enabled)
687
+
688
+ logger.debug("Using ENHANCED segmentation with intelligent automation")
689
+
690
+ if image is None or image.size == 0:
691
+ raise SegmentationError("Invalid input image")
692
+
693
+ try:
694
+ if predictor is None:
695
+ if fallback_enabled:
696
+ logger.warning("SAM2 predictor not available, using fallback")
697
+ return _fallback_segmentation(image)
698
+ else:
699
+ raise SegmentationError("SAM2 predictor not available")
700
+
701
+ try:
702
+ predictor.set_image(image)
703
+ except Exception as e:
704
+ logger.error(f"Failed to set image in predictor: {e}")
705
+ if fallback_enabled:
706
+ return _fallback_segmentation(image)
707
+ else:
708
+ raise SegmentationError(f"Predictor setup failed: {e}")
709
+
710
+ if USE_INTELLIGENT_PROMPTING:
711
+ mask = _segment_with_intelligent_prompts(image, predictor)
712
+ else:
713
+ mask = _segment_with_basic_prompts(image, predictor)
714
+
715
+ if USE_ITERATIVE_REFINEMENT and mask is not None:
716
+ mask = _auto_refine_mask_iteratively(image, mask, predictor)
717
+
718
+ if not _validate_mask_quality(mask, image.shape[:2]):
719
+ logger.warning("Mask quality validation failed")
720
+ if fallback_enabled:
721
+ return _fallback_segmentation(image)
722
  else:
723
+ raise SegmentationError("Poor mask quality")
724
+
725
+ logger.debug(f"Enhanced segmentation successful - mask range: {mask.min()}-{mask.max()}")
726
+ return mask
727
+
728
+ except SegmentationError:
729
+ raise
730
+ except Exception as e:
731
+ logger.error(f"Unexpected segmentation error: {e}")
732
+ if fallback_enabled:
733
+ return _fallback_segmentation(image)
734
+ else:
735
+ raise SegmentationError(f"Unexpected error: {e}")
736
+
737
+ def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
738
+ """Original version of person segmentation for rollback"""
739
+ if image is None or image.size == 0:
740
+ raise SegmentationError("Invalid input image")
741
+
742
+ try:
743
+ if predictor is None:
744
+ if fallback_enabled:
745
+ logger.warning("SAM2 predictor not available, using fallback")
746
+ return _fallback_segmentation(image)
747
+ else:
748
+ raise SegmentationError("SAM2 predictor not available")
749
+
750
+ try:
751
+ predictor.set_image(image)
752
+ except Exception as e:
753
+ logger.error(f"Failed to set image in predictor: {e}")
754
+ if fallback_enabled:
755
+ return _fallback_segmentation(image)
756
+ else:
757
+ raise SegmentationError(f"Predictor setup failed: {e}")
758
+
759
+ h, w = image.shape[:2]
760
+
761
+ points = np.array([
762
+ [w//2, h//4],
763
+ [w//2, h//2],
764
+ [w//2, 3*h//4],
765
+ [w//3, h//2],
766
+ [2*w//3, h//2],
767
+ [w//2, h//6],
768
+ [w//4, 2*h//3],
769
+ [3*w//4, 2*h//3],
770
+ ], dtype=np.float32)
771
+
772
+ labels = np.ones(len(points), dtype=np.int32)
773
+
774
+ try:
775
+ with torch.no_grad():
776
+ masks, scores, _ = predictor.predict(
777
+ point_coords=points,
778
+ point_labels=labels,
779
+ multimask_output=True
780
+ )
781
+ except Exception as e:
782
+ logger.error(f"SAM2 prediction failed: {e}")
783
+ if fallback_enabled:
784
+ return _fallback_segmentation(image)
785
+ else:
786
+ raise SegmentationError(f"Prediction failed: {e}")
787
+
788
+ if masks is None or len(masks) == 0:
789
+ logger.warning("SAM2 returned no masks")
790
+ if fallback_enabled:
791
+ return _fallback_segmentation(image)
792
+ else:
793
+ raise SegmentationError("No masks generated")
794
+
795
+ if scores is None or len(scores) == 0:
796
+ logger.warning("SAM2 returned no scores")
797
+ best_mask = masks[0]
798
+ else:
799
+ best_idx = np.argmax(scores)
800
+ best_mask = masks[best_idx]
801
+ logger.debug(f"Selected mask {best_idx} with score {scores[best_idx]:.3f}")
802
+
803
+ mask = _process_mask(best_mask)
804
+
805
+ if not _validate_mask_quality(mask, image.shape[:2]):
806
+ logger.warning("Mask quality validation failed")
807
+ if fallback_enabled:
808
+ return _fallback_segmentation(image)
809
+ else:
810
+ raise SegmentationError("Poor mask quality")
811
+
812
+ logger.debug(f"Segmentation successful - mask range: {mask.min()}-{mask.max()}")
813
+ return mask
814
+
815
+ except SegmentationError:
816
+ raise
817
+ except Exception as e:
818
+ logger.error(f"Unexpected segmentation error: {e}")
819
+ if fallback_enabled:
820
+ return _fallback_segmentation(image)
821
+ else:
822
+ raise SegmentationError(f"Unexpected error: {e}")
823
+
824
+ def refine_mask_hq(image: np.ndarray, mask: np.ndarray, matanyone_processor: Any,
825
+ fallback_enabled: bool = True) -> np.ndarray:
826
+ """Enhanced mask refinement with MatAnyone and robust fallbacks"""
827
+ if image is None or mask is None:
828
+ raise MaskRefinementError("Invalid input image or mask")
829
+
830
+ try:
831
+ mask = _process_mask(mask)
832
+
833
+ if matanyone_processor is not None:
834
+ try:
835
+ logger.debug("Attempting MatAnyone refinement")
836
+ refined_mask = _matanyone_refine(image, mask, matanyone_processor)
837
 
838
+ if refined_mask is not None and _validate_mask_quality(refined_mask, image.shape[:2]):
839
+ logger.debug("MatAnyone refinement successful")
840
+ return refined_mask
841
+ else:
842
+ logger.warning("MatAnyone produced poor quality mask")
843
+
844
+ except Exception as e:
845
+ logger.warning(f"MatAnyone refinement failed: {e}")
846
+
847
+ if fallback_enabled:
848
+ logger.debug("Using enhanced OpenCV refinement")
849
+ return enhance_mask_opencv_advanced(image, mask)
850
+ else:
851
+ raise MaskRefinementError("MatAnyone failed and fallback disabled")
852
+
853
+ except MaskRefinementError:
854
+ raise
855
+ except Exception as e:
856
+ logger.error(f"Unexpected mask refinement error: {e}")
857
+ if fallback_enabled:
858
+ return enhance_mask_opencv_advanced(image, mask)
859
+ else:
860
+ raise MaskRefinementError(f"Unexpected error: {e}")
861
+
862
+ def enhance_mask_opencv_advanced(image: np.ndarray, mask: np.ndarray) -> np.ndarray:
863
+ """Advanced OpenCV-based mask enhancement with multiple techniques"""
864
+ try:
865
+ if len(mask.shape) == 3:
866
+ mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
867
+
868
+ if mask.max() <= 1.0:
869
+ mask = (mask * 255).astype(np.uint8)
870
+
871
+ refined_mask = cv2.bilateralFilter(mask, 9, 75, 75)
872
+ refined_mask = _guided_filter_approx(image, refined_mask, radius=8, eps=0.2)
873
+
874
+ kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
875
+ refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_CLOSE, kernel_close)
876
+
877
+ kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
878
+ refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_OPEN, kernel_open)
879
+
880
+ refined_mask = cv2.GaussianBlur(refined_mask, (3, 3), 0.8)
881
+
882
+ _, refined_mask = cv2.threshold(refined_mask, 127, 255, cv2.THRESH_BINARY)
883
+
884
+ return refined_mask
885
+
886
+ except Exception as e:
887
+ logger.warning(f"Enhanced OpenCV refinement failed: {e}")
888
+ return cv2.GaussianBlur(mask, (5, 5), 1.0)
889
 
890
+ def replace_background_hq(frame: np.ndarray, mask: np.ndarray, background: np.ndarray,
891
+ fallback_enabled: bool = True) -> np.ndarray:
892
+ """Enhanced background replacement with comprehensive error handling"""
893
+ if frame is None or mask is None or background is None:
894
+ raise BackgroundReplacementError("Invalid input frame, mask, or background")
895
+
896
+ try:
897
+ background = cv2.resize(background, (frame.shape[1], frame.shape[0]),
898
+ interpolation=cv2.INTER_LANCZOS4)
899
+
900
+ if len(mask.shape) == 3:
901
+ mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
902
+
903
+ if mask.dtype != np.uint8:
904
+ mask = mask.astype(np.uint8)
905
+
906
+ if mask.max() <= 1.0:
907
+ logger.debug("Converting normalized mask to 0-255 range")
908
+ mask = (mask * 255).astype(np.uint8)
909
+
910
+ try:
911
+ result = _advanced_compositing(frame, mask, background)
912
+ logger.debug("Advanced compositing successful")
913
+ return result
914
+
915
+ except Exception as e:
916
+ logger.warning(f"Advanced compositing failed: {e}")
917
+ if fallback_enabled:
918
+ return _simple_compositing(frame, mask, background)
919
+ else:
920
+ raise BackgroundReplacementError(f"Advanced compositing failed: {e}")
921
+
922
+ except BackgroundReplacementError:
923
+ raise
924
+ except Exception as e:
925
+ logger.error(f"Unexpected background replacement error: {e}")
926
+ if fallback_enabled:
927
+ return _simple_compositing(frame, mask, background)
928
+ else:
929
+ raise BackgroundReplacementError(f"Unexpected error: {e}")
930
+
931
+ def create_professional_background(bg_config: Dict[str, Any], width: int, height: int) -> np.ndarray:
932
+ """Enhanced professional background creation with quality improvements"""
933
+ try:
934
+ if bg_config["type"] == "color":
935
+ background = _create_solid_background(bg_config, width, height)
936
+ elif bg_config["type"] == "gradient":
937
+ background = _create_gradient_background_enhanced(bg_config, width, height)
938
+ else:
939
+ background = np.full((height, width, 3), (128, 128, 128), dtype=np.uint8)
940
+
941
+ background = _apply_background_adjustments(background, bg_config)
942
+
943
+ return background
944
+
945
+ except Exception as e:
946
+ logger.error(f"Background creation error: {e}")
947
+ return np.full((height, width, 3), (128, 128, 128), dtype=np.uint8)
948
+
949
+ def validate_video_file(video_path: str) -> Tuple[bool, str]:
950
+ """Enhanced video file validation with detailed checks"""
951
+ if not video_path or not os.path.exists(video_path):
952
+ return False, "Video file not found"
953
+
954
+ try:
955
+ file_size = os.path.getsize(video_path)
956
+ if file_size == 0:
957
+ return False, "Video file is empty"
958
+
959
+ if file_size > 2 * 1024 * 1024 * 1024:
960
+ return False, "Video file too large (>2GB)"
961
+
962
+ cap = cv2.VideoCapture(video_path)
963
+ if not cap.isOpened():
964
+ return False, "Cannot open video file"
965
+
966
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
967
+ fps = cap.get(cv2.CAP_PROP_FPS)
968
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
969
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
970
+
971
+ cap.release()
972
+
973
+ if frame_count == 0:
974
+ return False, "Video appears to be empty (0 frames)"
975
+
976
+ if fps <= 0 or fps > 120:
977
+ return False, f"Invalid frame rate: {fps}"
978
+
979
+ if width <= 0 or height <= 0:
980
+ return False, f"Invalid resolution: {width}x{height}"
981
+
982
+ if width > 4096 or height > 4096:
983
+ return False, f"Resolution too high: {width}x{height} (max 4096x4096)"
984
+
985
+ duration = frame_count / fps
986
+ if duration > 300:
987
+ return False, f"Video too long: {duration:.1f}s (max 300s)"
988
+
989
+ return True, f"Valid video: {width}x{height}, {fps:.1f}fps, {duration:.1f}s"
990
+
991
+ except Exception as e:
992
+ return False, f"Error validating video: {str(e)}"
993
 
994
+ # ============================================================================
995
+ # HELPER FUNCTIONS (from utilities.py)
996
+ # ============================================================================
997
 
998
+ def _segment_with_intelligent_prompts(image: np.ndarray, predictor: Any) -> np.ndarray:
999
+ """Intelligent automatic prompt generation for segmentation"""
1000
+ try:
1001
+ h, w = image.shape[:2]
1002
+ pos_points, neg_points = _generate_smart_prompts(image)
1003
+
1004
+ if len(pos_points) == 0:
1005
+ pos_points = np.array([[w//2, h//2]], dtype=np.float32)
1006
+
1007
+ points = np.vstack([pos_points, neg_points])
1008
+ labels = np.hstack([
1009
+ np.ones(len(pos_points), dtype=np.int32),
1010
+ np.zeros(len(neg_points), dtype=np.int32)
1011
+ ])
1012
+
1013
+ logger.debug(f"Using {len(pos_points)} positive, {len(neg_points)} negative points")
1014
+
1015
+ with torch.no_grad():
1016
+ masks, scores, _ = predictor.predict(
1017
+ point_coords=points,
1018
+ point_labels=labels,
1019
+ multimask_output=True
1020
+ )
1021
+
1022
+ if masks is None or len(masks) == 0:
1023
+ raise SegmentationError("No masks generated")
1024
+
1025
+ if scores is not None and len(scores) > 0:
1026
+ best_idx = np.argmax(scores)
1027
+ best_mask = masks[best_idx]
1028
+ logger.debug(f"Selected mask {best_idx} with score {scores[best_idx]:.3f}")
1029
+ else:
1030
+ best_mask = masks[0]
1031
+
1032
+ return _process_mask(best_mask)
1033
+
1034
+ except Exception as e:
1035
+ logger.error(f"Intelligent prompting failed: {e}")
1036
+ raise
1037
 
1038
+ def _segment_with_basic_prompts(image: np.ndarray, predictor: Any) -> np.ndarray:
1039
+ """Basic prompting method for segmentation"""
1040
+ h, w = image.shape[:2]
1041
+
1042
+ positive_points = np.array([
1043
+ [w//2, h//3],
1044
+ [w//2, h//2],
1045
+ [w//2, 2*h//3],
1046
+ ], dtype=np.float32)
1047
+
1048
+ negative_points = np.array([
1049
+ [w//10, h//10],
1050
+ [9*w//10, h//10],
1051
+ [w//10, 9*h//10],
1052
+ [9*w//10, 9*h//10],
1053
+ ], dtype=np.float32)
1054
+
1055
+ points = np.vstack([positive_points, negative_points])
1056
+ labels = np.array([1, 1, 1, 0, 0, 0, 0], dtype=np.int32)
1057
+
1058
+ with torch.no_grad():
1059
+ masks, scores, _ = predictor.predict(
1060
+ point_coords=points,
1061
+ point_labels=labels,
1062
+ multimask_output=True
1063
+ )
1064
+
1065
+ if masks is None or len(masks) == 0:
1066
+ raise SegmentationError("No masks generated")
1067
+
1068
+ best_idx = np.argmax(scores) if scores is not None and len(scores) > 0 else 0
1069
+ best_mask = masks[best_idx]
1070
 
1071
+ return _process_mask(best_mask)
1072
+
1073
+ def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
1074
+ """Generate optimal positive/negative points automatically"""
1075
+ try:
1076
+ h, w = image.shape[:2]
1077
+
1078
+ try:
1079
+ saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
1080
+ success, saliency_map = saliency.computeSaliency(image)
1081
+
1082
+ if success:
1083
+ saliency_thresh = cv2.threshold(saliency_map, 0.7, 1, cv2.THRESH_BINARY)[1]
1084
+ contours, _ = cv2.findContours((saliency_thresh * 255).astype(np.uint8),
1085
+ cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
1086
+
1087
+ positive_points = []
1088
+ if contours:
1089
+ for contour in sorted(contours, key=cv2.contourArea, reverse=True)[:3]:
1090
+ M = cv2.moments(contour)
1091
+ if M["m00"] != 0:
1092
+ cx = int(M["m10"] / M["m00"])
1093
+ cy = int(M["m01"] / M["m00"])
1094
+ if 0 < cx < w and 0 < cy < h:
1095
+ positive_points.append([cx, cy])
1096
+
1097
+ if positive_points:
1098
+ logger.debug(f"Generated {len(positive_points)} saliency-based points")
1099
+ positive_points = np.array(positive_points, dtype=np.float32)
1100
+ else:
1101
+ raise Exception("No valid saliency points found")
1102
+
1103
+ except Exception as e:
1104
+ logger.debug(f"Saliency method failed: {e}, using fallback")
1105
+ positive_points = np.array([
1106
+ [w//2, h//3],
1107
+ [w//2, h//2],
1108
+ [w//2, 2*h//3],
1109
+ ], dtype=np.float32)
1110
 
1111
+ negative_points = np.array([
1112
+ [10, 10],
1113
+ [w-10, 10],
1114
+ [10, h-10],
1115
+ [w-10, h-10],
1116
+ [w//2, 5],
1117
+ [w//2, h-5],
1118
+ ], dtype=np.float32)
1119
+
1120
+ return positive_points, negative_points
1121
+
1122
+ except Exception as e:
1123
+ logger.warning(f"Smart prompt generation failed: {e}")
1124
+ h, w = image.shape[:2]
1125
+ positive_points = np.array([[w//2, h//2]], dtype=np.float32)
1126
+ negative_points = np.array([[10, 10], [w-10, 10]], dtype=np.float32)
1127
+ return positive_points, negative_points
1128
+
1129
+ def _auto_refine_mask_iteratively(image: np.ndarray, initial_mask: np.ndarray,
1130
+ predictor: Any, max_iterations: int = 2) -> np.ndarray:
1131
+ """Automatically refine mask based on quality assessment"""
1132
+ try:
1133
+ current_mask = initial_mask.copy()
1134
+
1135
+ for iteration in range(max_iterations):
1136
+ quality_score = _assess_mask_quality(current_mask, image)
1137
+ logger.debug(f"Iteration {iteration}: quality score = {quality_score:.3f}")
1138
+
1139
+ if quality_score > 0.85:
1140
+ logger.debug(f"Quality sufficient after {iteration} iterations")
1141
+ break
1142
+
1143
+ problem_areas = _find_mask_errors(current_mask, image)
1144
+
1145
+ if np.any(problem_areas):
1146
+ corrective_points, corrective_labels = _generate_corrective_prompts(
1147
+ image, current_mask, problem_areas
1148
+ )
1149
+
1150
+ if len(corrective_points) > 0:
1151
+ try:
1152
+ with torch.no_grad():
1153
+ masks, scores, _ = predictor.predict(
1154
+ point_coords=corrective_points,
1155
+ point_labels=corrective_labels,
1156
+ mask_input=current_mask[None, :, :],
1157
+ multimask_output=False
1158
+ )
1159
+
1160
+ if masks is not None and len(masks) > 0:
1161
+ refined_mask = _process_mask(masks[0])
1162
+
1163
+ if _assess_mask_quality(refined_mask, image) > quality_score:
1164
+ current_mask = refined_mask
1165
+ logger.debug(f"Improved mask in iteration {iteration}")
1166
+ else:
1167
+ logger.debug(f"Refinement didn't improve quality in iteration {iteration}")
1168
+ break
1169
+
1170
+ except Exception as e:
1171
+ logger.debug(f"Refinement iteration {iteration} failed: {e}")
1172
+ break
1173
+ else:
1174
+ logger.debug("No problem areas detected")
1175
+ break
1176
+
1177
+ return current_mask
1178
+
1179
+ except Exception as e:
1180
+ logger.warning(f"Iterative refinement failed: {e}")
1181
+ return initial_mask
1182
+
1183
+ def _assess_mask_quality(mask: np.ndarray, image: np.ndarray) -> float:
1184
+ """Assess mask quality automatically"""
1185
+ try:
1186
+ h, w = image.shape[:2]
1187
+ scores = []
1188
+
1189
+ mask_area = np.sum(mask > 127)
1190
+ total_area = h * w
1191
+ area_ratio = mask_area / total_area
1192
+
1193
+ if 0.05 <= area_ratio <= 0.8:
1194
+ area_score = 1.0
1195
+ elif area_ratio < 0.05:
1196
+ area_score = area_ratio / 0.05
1197
+ else:
1198
+ area_score = max(0, 1.0 - (area_ratio - 0.8) / 0.2)
1199
+ scores.append(area_score)
1200
+
1201
+ mask_binary = mask > 127
1202
+ if np.any(mask_binary):
1203
+ mask_center_y, mask_center_x = np.where(mask_binary)
1204
+ center_y = np.mean(mask_center_y) / h
1205
+ center_x = np.mean(mask_center_x) / w
1206
+
1207
+ center_score = 1.0 - min(abs(center_x - 0.5), abs(center_y - 0.5))
1208
+ scores.append(center_score)
1209
+ else:
1210
+ scores.append(0.0)
1211
+
1212
+ edges = cv2.Canny(mask, 50, 150)
1213
+ edge_density = np.sum(edges > 0) / total_area
1214
+ smoothness_score = max(0, 1.0 - edge_density * 10)
1215
+ scores.append(smoothness_score)
1216
+
1217
+ num_labels, _ = cv2.connectedComponents(mask)
1218
+ connectivity_score = max(0, 1.0 - (num_labels - 2) * 0.2)
1219
+ scores.append(connectivity_score)
1220
+
1221
+ weights = [0.3, 0.2, 0.3, 0.2]
1222
+ overall_score = np.average(scores, weights=weights)
1223
+
1224
+ return overall_score
1225
+
1226
+ except Exception as e:
1227
+ logger.warning(f"Quality assessment failed: {e}")
1228
+ return 0.5
1229
+
1230
+ def _find_mask_errors(mask: np.ndarray, image: np.ndarray) -> np.ndarray:
1231
+ """Identify problematic areas in mask"""
1232
+ try:
1233
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
1234
+ edges = cv2.Canny(gray, 50, 150)
1235
+ mask_edges = cv2.Canny(mask, 50, 150)
1236
+ edge_discrepancy = cv2.bitwise_xor(edges, mask_edges)
1237
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
1238
+ error_regions = cv2.dilate(edge_discrepancy, kernel, iterations=1)
1239
+ return error_regions > 0
1240
+ except Exception as e:
1241
+ logger.warning(f"Error detection failed: {e}")
1242
+ return np.zeros_like(mask, dtype=bool)
1243
+
1244
+ def _generate_corrective_prompts(image: np.ndarray, mask: np.ndarray,
1245
+ problem_areas: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
1246
+ """Generate corrective prompts based on problem areas"""
1247
+ try:
1248
+ contours, _ = cv2.findContours(problem_areas.astype(np.uint8),
1249
+ cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
1250
+
1251
+ corrective_points = []
1252
+ corrective_labels = []
1253
+
1254
+ for contour in contours:
1255
+ if cv2.contourArea(contour) > 100:
1256
+ M = cv2.moments(contour)
1257
+ if M["m00"] != 0:
1258
+ cx = int(M["m10"] / M["m00"])
1259
+ cy = int(M["m01"] / M["m00"])
1260
+
1261
+ current_mask_value = mask[cy, cx]
1262
+
1263
+ if current_mask_value < 127:
1264
+ corrective_points.append([cx, cy])
1265
+ corrective_labels.append(1)
1266
+ else:
1267
+ corrective_points.append([cx, cy])
1268
+ corrective_labels.append(0)
1269
+
1270
+ return (np.array(corrective_points, dtype=np.float32) if corrective_points else np.array([]).reshape(0, 2),
1271
+ np.array(corrective_labels, dtype=np.int32) if corrective_labels else np.array([], dtype=np.int32))
1272
+
1273
+ except Exception as e:
1274
+ logger.warning(f"Corrective prompt generation failed: {e}")
1275
+ return np.array([]).reshape(0, 2), np.array([], dtype=np.int32)
1276
+
1277
+ def _process_mask(mask: np.ndarray) -> np.ndarray:
1278
+ """Process raw mask to ensure correct format and range"""
1279
+ try:
1280
+ if len(mask.shape) > 2:
1281
+ mask = mask.squeeze()
1282
+
1283
+ if len(mask.shape) > 2:
1284
+ mask = mask[:, :, 0] if mask.shape[2] > 0 else mask.sum(axis=2)
1285
+
1286
+ if mask.dtype == bool:
1287
+ mask = mask.astype(np.uint8) * 255
1288
+ elif mask.dtype == np.float32 or mask.dtype == np.float64:
1289
+ if mask.max() <= 1.0:
1290
+ mask = (mask * 255).astype(np.uint8)
1291
+ else:
1292
+ mask = np.clip(mask, 0, 255).astype(np.uint8)
1293
+ else:
1294
+ mask = mask.astype(np.uint8)
1295
+
1296
+ kernel = np.ones((3, 3), np.uint8)
1297
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
1298
+ mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
1299
+
1300
+ _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
1301
+
1302
+ return mask
1303
+
1304
+ except Exception as e:
1305
+ logger.error(f"Mask processing failed: {e}")
1306
+ h, w = mask.shape[:2] if len(mask.shape) >= 2 else (256, 256)
1307
+ fallback = np.zeros((h, w), dtype=np.uint8)
1308
+ fallback[h//4:3*h//4, w//4:3*w//4] = 255
1309
+ return fallback
1310
+
1311
+ def _validate_mask_quality(mask: np.ndarray, image_shape: Tuple[int, int]) -> bool:
1312
+ """Validate that the mask meets quality criteria"""
1313
+ try:
1314
+ h, w = image_shape
1315
+ mask_area = np.sum(mask > 127)
1316
+ total_area = h * w
1317
+
1318
+ area_ratio = mask_area / total_area
1319
+ if area_ratio < 0.05 or area_ratio > 0.8:
1320
+ logger.warning(f"Suspicious mask area ratio: {area_ratio:.3f}")
1321
+ return False
1322
+
1323
+ mask_binary = mask > 127
1324
+ mask_center_y, mask_center_x = np.where(mask_binary)
1325
+
1326
+ if len(mask_center_y) == 0:
1327
+ logger.warning("Empty mask")
1328
+ return False
1329
+
1330
+ center_y = np.mean(mask_center_y)
1331
+ center_x = np.mean(mask_center_x)
1332
+
1333
+ if center_y < h * 0.2 or center_y > h * 0.9:
1334
+ logger.warning(f"Mask center too far from expected person location: y={center_y/h:.2f}")
1335
+ return False
1336
+
1337
+ return True
1338
+
1339
+ except Exception as e:
1340
+ logger.warning(f"Mask validation error: {e}")
1341
+ return True
1342
+
1343
+ def _fallback_segmentation(image: np.ndarray) -> np.ndarray:
1344
+ """Fallback segmentation when AI models fail"""
1345
+ try:
1346
+ logger.info("Using fallback segmentation strategy")
1347
+ h, w = image.shape[:2]
1348
+
1349
+ try:
1350
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
1351
+
1352
+ edge_pixels = np.concatenate([
1353
+ gray[0, :], gray[-1, :], gray[:, 0], gray[:, -1]
1354
+ ])
1355
+ bg_color = np.median(edge_pixels)
1356
+
1357
+ diff = np.abs(gray.astype(float) - bg_color)
1358
+ mask = (diff > 30).astype(np.uint8) * 255
1359
+
1360
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
1361
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
1362
+ mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
1363
+
1364
+ if _validate_mask_quality(mask, image.shape[:2]):
1365
+ logger.info("Background subtraction fallback successful")
1366
+ return mask
1367
+
1368
+ except Exception as e:
1369
+ logger.warning(f"Background subtraction fallback failed: {e}")
1370
+
1371
+ mask = np.zeros((h, w), dtype=np.uint8)
1372
+
1373
+ center_x, center_y = w // 2, h // 2
1374
+ radius_x, radius_y = w // 3, h // 2.5
1375
+
1376
+ y, x = np.ogrid[:h, :w]
1377
+ mask_ellipse = ((x - center_x) / radius_x) ** 2 + ((y - center_y) / radius_y) ** 2 <= 1
1378
+ mask[mask_ellipse] = 255
1379
+
1380
+ logger.info("Using geometric fallback mask")
1381
+ return mask
1382
+
1383
+ except Exception as e:
1384
+ logger.error(f"All fallback strategies failed: {e}")
1385
+ h, w = image.shape[:2]
1386
+ mask = np.zeros((h, w), dtype=np.uint8)
1387
+ mask[h//6:5*h//6, w//4:3*w//4] = 255
1388
+ return mask
1389
+
1390
+ def _matanyone_refine(image: np.ndarray, mask: np.ndarray, processor: Any) -> Optional[np.ndarray]:
1391
+ """Attempt MatAnyone mask refinement"""
1392
+ try:
1393
+ if hasattr(processor, 'infer'):
1394
+ refined_mask = processor.infer(image, mask)
1395
+ elif hasattr(processor, 'process'):
1396
+ refined_mask = processor.process(image, mask)
1397
+ elif callable(processor):
1398
+ refined_mask = processor(image, mask)
1399
+ else:
1400
+ logger.warning("Unknown MatAnyone interface")
1401
+ return None
1402
+
1403
+ if refined_mask is None:
1404
+ return None
1405
+
1406
+ refined_mask = _process_mask(refined_mask)
1407
+ logger.debug("MatAnyone refinement successful")
1408
+ return refined_mask
1409
+
1410
+ except Exception as e:
1411
+ logger.warning(f"MatAnyone processing error: {e}")
1412
+ return None
1413
+
1414
+ def _guided_filter_approx(guide: np.ndarray, mask: np.ndarray, radius: int = 8, eps: float = 0.2) -> np.ndarray:
1415
+ """Approximation of guided filter for edge-aware smoothing"""
1416
+ try:
1417
+ guide_gray = cv2.cvtColor(guide, cv2.COLOR_BGR2GRAY) if len(guide.shape) == 3 else guide
1418
+ guide_gray = guide_gray.astype(np.float32) / 255.0
1419
+ mask_float = mask.astype(np.float32) / 255.0
1420
+
1421
+ kernel_size = 2 * radius + 1
1422
+
1423
+ mean_guide = cv2.boxFilter(guide_gray, -1, (kernel_size, kernel_size))
1424
+ mean_mask = cv2.boxFilter(mask_float, -1, (kernel_size, kernel_size))
1425
+ corr_guide_mask = cv2.boxFilter(guide_gray * mask_float, -1, (kernel_size, kernel_size))
1426
+
1427
+ cov_guide_mask = corr_guide_mask - mean_guide * mean_mask
1428
+ mean_guide_sq = cv2.boxFilter(guide_gray * guide_gray, -1, (kernel_size, kernel_size))
1429
+ var_guide = mean_guide_sq - mean_guide * mean_guide
1430
+
1431
+ a = cov_guide_mask / (var_guide + eps)
1432
+ b = mean_mask - a * mean_guide
1433
+
1434
+ mean_a = cv2.boxFilter(a, -1, (kernel_size, kernel_size))
1435
+ mean_b = cv2.boxFilter(b, -1, (kernel_size, kernel_size))
1436
+
1437
+ output = mean_a * guide_gray + mean_b
1438
+ output = np.clip(output * 255, 0, 255).astype(np.uint8)
1439
+
1440
+ return output
1441
+
1442
+ except Exception as e:
1443
+ logger.warning(f"Guided filter approximation failed: {e}")
1444
+ return mask
1445
+
1446
+ def _advanced_compositing(frame: np.ndarray, mask: np.ndarray, background: np.ndarray) -> np.ndarray:
1447
+ """Advanced compositing with edge feathering and color correction"""
1448
+ try:
1449
+ threshold = 100
1450
+ _, mask_binary = cv2.threshold(mask, threshold, 255, cv2.THRESH_BINARY)
1451
+
1452
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
1453
+ mask_binary = cv2.morphologyEx(mask_binary, cv2.MORPH_CLOSE, kernel)
1454
+ mask_binary = cv2.morphologyEx(mask_binary, cv2.MORPH_OPEN, kernel)
1455
+
1456
+ mask_smooth = cv2.GaussianBlur(mask_binary.astype(np.float32), (5, 5), 1.0)
1457
+ mask_smooth = mask_smooth / 255.0
1458
+
1459
+ mask_smooth = np.power(mask_smooth, 0.8)
1460
+
1461
+ mask_smooth = np.where(mask_smooth > 0.5,
1462
+ np.minimum(mask_smooth * 1.1, 1.0),
1463
+ mask_smooth * 0.9)
1464
+
1465
+ frame_adjusted = _color_match_edges(frame, background, mask_smooth)
1466
+
1467
+ alpha_3ch = np.stack([mask_smooth] * 3, axis=2)
1468
+
1469
+ frame_float = frame_adjusted.astype(np.float32)
1470
+ background_float = background.astype(np.float32)
1471
+
1472
+ result = frame_float * alpha_3ch + background_float * (1 - alpha_3ch)
1473
+ result = np.clip(result, 0, 255).astype(np.uint8)
1474
+
1475
+ return result
1476
+
1477
+ except Exception as e:
1478
+ logger.error(f"Advanced compositing error: {e}")
1479
+ raise
1480
+
1481
+ def _color_match_edges(frame: np.ndarray, background: np.ndarray, alpha: np.ndarray) -> np.ndarray:
1482
+ """