MogensR commited on
Commit
04848bf
·
1 Parent(s): a647170

Update utils/cv_processing.py

Browse files
Files changed (1) hide show
  1. utils/cv_processing.py +76 -16
utils/cv_processing.py CHANGED
@@ -8,6 +8,7 @@
8
  - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
9
  - replace_background_hq(frame, mask, background, fallback_enabled=True)
10
  - create_professional_background(key_or_cfg, width, height)
 
11
  - validate_video_file(video_path) -> (bool, reason)
12
 
13
  Design:
@@ -59,7 +60,7 @@ def _to_mask01(m: np.ndarray) -> np.ndarray:
59
  return None
60
  if m.ndim == 3:
61
  m = m[..., 0]
62
- m = m.astype(np.float32)
63
  if m.max() > 1.0:
64
  m = m / 255.0
65
  return np.clip(m, 0.0, 1.0)
@@ -82,6 +83,13 @@ def _vertical_gradient(top: Tuple[int,int,int], bottom: Tuple[int,int,int], widt
82
  bg[y, :] = (r, g, b)
83
  return bg
84
 
 
 
 
 
 
 
 
85
  def _looks_like_mask(x: Any) -> bool:
86
  return (
87
  isinstance(x, np.ndarray)
@@ -116,6 +124,36 @@ def create_professional_background(key_or_cfg: Any, width: int, height: int) ->
116
  dark = (int(color[0]*0.7), int(color[1]*0.7), int(color[2]*0.7))
117
  return _vertical_gradient(dark, color, width, height)
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  # ----------------------------------------------------------------------------
120
  # Segmentation
121
  # ----------------------------------------------------------------------------
@@ -162,12 +200,19 @@ def segment_person_hq(
162
  h, w = rgb.shape[:2]
163
  center = np.array([[w // 2, h // 2]])
164
  labels = np.array([1])
165
- masks, scores, _ = predictor.predict(
 
166
  point_coords=center,
167
  point_labels=labels,
168
  multimask_output=True
169
  )
170
 
 
 
 
 
 
 
171
  m = np.array(masks)
172
  if m.ndim == 3: # (N,H,W)
173
  idx = int(np.argmax(scores)) if scores is not None else 0
@@ -225,11 +270,16 @@ def refine_mask_hq(
225
  Backward-compat:
226
  - accepts use_matanyone (False → skip model)
227
  - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
 
228
  """
229
  # tolerate legacy order: refine_mask_hq(mask, frame, ...)
230
  if _looks_like_mask(frame) and isinstance(mask, np.ndarray) and mask.ndim == 3 and mask.shape[2] == 3:
231
  frame, mask = mask, frame
232
 
 
 
 
 
233
  mask01 = _to_mask01(mask)
234
 
235
  try:
@@ -246,23 +296,32 @@ def refine_mask_hq(
246
  img_t = img_t.to(device)
247
  mask_t = mask_t.to(device)
248
 
 
249
  if hasattr(matanyone, "step"):
250
- with torch.inference_mode():
251
- out = matanyone.step(
252
- image_tensor=img_t,
253
- mask_tensor=mask_t,
254
- objects=None,
255
- first_frame_pred=True
256
- )
257
- if hasattr(matanyone, "output_prob_to_mask"):
258
- out = matanyone.output_prob_to_mask(out)
259
- return _tensor_to_mask01(out)
260
-
 
 
 
 
261
  if hasattr(matanyone, "process"):
262
- refined = matanyone.process(frame, mask01)
263
- return _to_mask01(np.asarray(refined))
 
 
 
 
264
 
265
- logger.warning("MatAnyOne provided but neither 'step' nor 'process' found.")
266
 
267
  except Exception as e:
268
  logger.warning("MatAnyOne refinement failed: %s", e)
@@ -358,6 +417,7 @@ def validate_video_file(video_path: str) -> Tuple[bool, str]:
358
  "refine_mask_hq",
359
  "replace_background_hq",
360
  "create_professional_background",
 
361
  "validate_video_file",
362
  "PROFESSIONAL_BACKGROUNDS",
363
  ]
 
8
  - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
9
  - replace_background_hq(frame, mask, background, fallback_enabled=True)
10
  - create_professional_background(key_or_cfg, width, height)
11
+ - create_gradient_background(spec, width, height)
12
  - validate_video_file(video_path) -> (bool, reason)
13
 
14
  Design:
 
60
  return None
61
  if m.ndim == 3:
62
  m = m[..., 0]
63
+ m = m.astype(np.float32, copy=False)
64
  if m.max() > 1.0:
65
  m = m / 255.0
66
  return np.clip(m, 0.0, 1.0)
 
83
  bg[y, :] = (r, g, b)
84
  return bg
85
 
86
+ def _rotate_image(img: np.ndarray, angle_deg: float) -> np.ndarray:
87
+ if float(angle_deg) % 360 == 0:
88
+ return img
89
+ h, w = img.shape[:2]
90
+ M = cv2.getRotationMatrix2D((w/2, h/2), float(angle_deg), 1.0)
91
+ return cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
92
+
93
  def _looks_like_mask(x: Any) -> bool:
94
  return (
95
  isinstance(x, np.ndarray)
 
124
  dark = (int(color[0]*0.7), int(color[1]*0.7), int(color[2]*0.7))
125
  return _vertical_gradient(dark, color, width, height)
126
 
127
+ def create_gradient_background(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
128
+ """
129
+ spec: {
130
+ "type": "linear" | "radial",
131
+ "start": (r,g,b),
132
+ "end": (r,g,b),
133
+ "angle_deg": float # for linear only
134
+ }
135
+ Returns RGB uint8 (H,W,3).
136
+ """
137
+ gtype = str(spec.get("type", "linear")).lower()
138
+ start = tuple(int(c) for c in spec.get("start", (34,34,34)))
139
+ end = tuple(int(c) for c in spec.get("end", (200,200,200)))
140
+ if gtype == "radial":
141
+ yy, xx = np.mgrid[0:height, 0:width]
142
+ cx, cy = width / 2.0, height / 2.0
143
+ dist = np.sqrt((xx - cx) ** 2 + (yy - cy) ** 2)
144
+ dist = dist / (dist.max() + 1e-6)
145
+ dist = np.clip(dist, 0.0, 1.0).astype(np.float32)
146
+ bg = np.zeros((height, width, 3), dtype=np.uint8)
147
+ for i, (s, e) in enumerate(zip(start, end)):
148
+ channel = (s * (1.0 - dist) + e * dist).astype(np.float32)
149
+ bg[..., i] = np.clip(channel, 0, 255).astype(np.uint8)
150
+ return bg
151
+ else:
152
+ # linear: vertical interpolate then rotate to angle
153
+ angle = float(spec.get("angle_deg", 0.0))
154
+ bg = _vertical_gradient(start, end, width, height)
155
+ return _rotate_image(bg, angle)
156
+
157
  # ----------------------------------------------------------------------------
158
  # Segmentation
159
  # ----------------------------------------------------------------------------
 
200
  h, w = rgb.shape[:2]
201
  center = np.array([[w // 2, h // 2]])
202
  labels = np.array([1])
203
+
204
+ res = predictor.predict(
205
  point_coords=center,
206
  point_labels=labels,
207
  multimask_output=True
208
  )
209
 
210
+ # SAM2 predictors often return (masks, scores, logits)
211
+ if isinstance(res, tuple) and len(res) >= 1:
212
+ masks, scores = res[0], (res[1] if len(res) > 1 else None)
213
+ else:
214
+ masks, scores = res, None
215
+
216
  m = np.array(masks)
217
  if m.ndim == 3: # (N,H,W)
218
  idx = int(np.argmax(scores)) if scores is not None else 0
 
270
  Backward-compat:
271
  - accepts use_matanyone (False → skip model)
272
  - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
273
+ - accepts mat_core=<processor> in kwargs
274
  """
275
  # tolerate legacy order: refine_mask_hq(mask, frame, ...)
276
  if _looks_like_mask(frame) and isinstance(mask, np.ndarray) and mask.ndim == 3 and mask.shape[2] == 3:
277
  frame, mask = mask, frame
278
 
279
+ # prefer explicitly passed matanyone, else legacy kw
280
+ if matanyone is None and "mat_core" in _compat_kwargs:
281
+ matanyone = _compat_kwargs.get("mat_core")
282
+
283
  mask01 = _to_mask01(mask)
284
 
285
  try:
 
296
  img_t = img_t.to(device)
297
  mask_t = mask_t.to(device)
298
 
299
+ # Preferred path
300
  if hasattr(matanyone, "step"):
301
+ try:
302
+ with torch.inference_mode():
303
+ out = matanyone.step(
304
+ image_tensor=img_t,
305
+ mask_tensor=mask_t,
306
+ objects=None,
307
+ first_frame_pred=True
308
+ )
309
+ if hasattr(matanyone, "output_prob_to_mask"):
310
+ out = matanyone.output_prob_to_mask(out)
311
+ return _tensor_to_mask01(out)
312
+ except Exception as e:
313
+ logger.warning("MatAnyOne .step path failed: %s ; trying .process fallback if available", e)
314
+
315
+ # Generic fallback
316
  if hasattr(matanyone, "process"):
317
+ try:
318
+ refined = matanyone.process(frame, mask01) # accepts numpy/PIL in many builds
319
+ refined = np.asarray(refined).astype(np.float32)
320
+ return _to_mask01(refined)
321
+ except Exception as e:
322
+ logger.warning("MatAnyOne .process path also failed: %s", e)
323
 
324
+ logger.warning("MatAnyOne provided but neither 'step' nor 'process' usable.")
325
 
326
  except Exception as e:
327
  logger.warning("MatAnyOne refinement failed: %s", e)
 
417
  "refine_mask_hq",
418
  "replace_background_hq",
419
  "create_professional_background",
420
+ "create_gradient_background",
421
  "validate_video_file",
422
  "PROFESSIONAL_BACKGROUNDS",
423
  ]