aducsdr commited on
Commit
46ec8e0
·
verified ·
1 Parent(s): 04f69f6

Update aduc_framework/managers/seedvr_manager.py

Browse files
aduc_framework/managers/seedvr_manager.py CHANGED
@@ -2,13 +2,10 @@
2
  #
3
  # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
  #
5
- # Version: 6.0.0 (GPU Isolation Fix)
6
  #
7
- # Esta versão implementa a solução definitiva para os erros de dispositivo
8
- # usando a variável de ambiente CUDA_VISIBLE_DEVICES. Cada worker agora opera
9
- # em um ambiente completamente isolado, vendo apenas sua própria GPU.
10
- # Isso força todo o código de terceiros a usar o dispositivo correto e
11
- # elimina a necessidade de gerenciar manualmente o torch.distributed.
12
 
13
  import torch
14
  import os
@@ -26,12 +23,10 @@ import shutil
26
  from omegaconf import OmegaConf
27
  import yaml
28
 
29
- # Imports relativos para o hardware_manager
30
  from ..tools.hardware_manager import hardware_manager
31
 
32
  logger = logging.getLogger(__name__)
33
 
34
- # --- Caminhos Globais ---
35
  APP_ROOT = Path("/home/user/app")
36
  DEPS_DIR = APP_ROOT / "deps"
37
  SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
@@ -43,7 +38,6 @@ class SeedVrWorker:
43
  self.global_device_id = device_id
44
  self.local_device_name = 'cuda:0'
45
  self.gpu_index = self.global_device_id.split(':')[-1]
46
-
47
  self.runner = None
48
  self.is_initialized = False
49
  self.setup_complete = self._check_and_run_global_setup()
@@ -55,22 +49,17 @@ class SeedVrWorker:
55
  setup_flag = DEPS_DIR / "seedvr.setup.complete"
56
  if str(APP_ROOT) not in sys.path:
57
  sys.path.insert(0, str(APP_ROOT))
58
-
59
- if setup_flag.exists():
60
- return True
61
 
62
  logger.info("--- Iniciando Setup Global do SeedVR (primeira execução) ---")
63
-
64
  if not SEEDVR_SPACE_DIR.exists():
65
  DEPS_DIR.mkdir(exist_ok=True, parents=True)
66
  subprocess.run(["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)], check=True)
67
 
68
  required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b", "data"]
69
  for dirname in required_dirs:
70
- source = SEEDVR_SPACE_DIR / dirname
71
- target = APP_ROOT / dirname
72
- if not target.exists():
73
- shutil.copytree(source, target)
74
 
75
  try:
76
  import apex
@@ -81,10 +70,11 @@ class SeedVrWorker:
81
 
82
  ckpt_dir = APP_ROOT / 'ckpts'
83
  ckpt_dir.mkdir(exist_ok=True)
 
 
84
  model_urls = {
85
  'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
86
  'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
87
- #'dit_7b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-7B/resolve/main/seedvr2_ema_7b.pth',
88
  'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
89
  'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
90
  }
@@ -95,8 +85,8 @@ class SeedVrWorker:
95
  logger.info("--- Setup Global do SeedVR Concluído ---")
96
  return True
97
 
98
- def initialize_runner(self, model_version: str):
99
- """Carrega os modelos para a VRAM do dispositivo, usando um ambiente de GPU isolado."""
100
  if self.runner is not None: return
101
 
102
  os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index
@@ -104,13 +94,11 @@ class SeedVrWorker:
104
  from projects.video_diffusion_sr.infer import VideoDiffusionInfer
105
  from common.config import load_config
106
 
107
- logger.info(f"Worker {self.global_device_id}: Inicializando runner... (Processo vê apenas {self.local_device_name})")
108
 
109
- config_path_str = f'configs_{model_version.lower()}'
110
- checkpoint_path_str = f'seedvr2_ema_{model_version.lower()}.pth'
111
-
112
- config_path = APP_ROOT / config_path_str / 'main.yaml'
113
- checkpoint_path = APP_ROOT / 'ckpts' / checkpoint_path_str
114
 
115
  config = load_config(str(config_path))
116
  self.runner = VideoDiffusionInfer(config)
@@ -120,7 +108,7 @@ class SeedVrWorker:
120
  self.runner.configure_vae_model()
121
 
122
  self.is_initialized = True
123
- logger.info(f"Worker {self.global_device_id}: Runner pronto na VRAM.")
124
 
125
  def unload_runner(self):
126
  """Descarrega os modelos da VRAM e limpa o ambiente."""
@@ -135,7 +123,7 @@ class SeedVrWorker:
135
  if 'CUDA_VISIBLE_DEVICES' in os.environ:
136
  del os.environ['CUDA_VISIBLE_DEVICES']
137
 
138
- def process_video_internal(self, input_video_path, output_video_path, prompt, model_version, steps, seed):
139
  """Executa a inferência em um ambiente de GPU isolado."""
140
  os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index
141
  device = torch.device(self.local_device_name)
@@ -182,7 +170,7 @@ class SeedVrWorker:
182
  self.runner.vae.to(device)
183
  samples = self.runner.vae_decode(video_tensors)
184
  final_sample = samples[0]
185
- input_video_sample = cond_latents[0] # Usar o latente de condição como base
186
  if final_sample.shape[1] < input_video_sample.shape[1]:
187
  input_video_sample = input_video_sample[:, :final_sample.shape[1]]
188
 
@@ -214,8 +202,9 @@ class SeedVrPoolManager:
214
  logger.info(f"SEEDVR CLEANUP THREAD: Iniciando limpeza de {worker.global_device_id} em background...")
215
  worker.unload_runner()
216
 
 
217
  def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
218
- model_version: str = '3B', steps: int = 100, seed: int = 666) -> str:
219
  worker_to_use = None
220
  try:
221
  with self.lock:
@@ -230,13 +219,15 @@ class SeedVrPoolManager:
230
  cleanup_thread.start()
231
  self.last_cleanup_thread = cleanup_thread
232
 
233
- worker_to_use.initialize_runner(model_version)
 
234
 
235
  self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
236
 
237
  logger.info(f"SEEDVR POOL MANAGER: Processando vídeo na GPU {worker_to_use.global_device_id}...")
 
238
  return worker_to_use.process_video_internal(
239
- input_video_path, output_video_path, prompt, model_version, steps, seed
240
  )
241
  except Exception as e:
242
  logger.error(f"SEEDVR POOL MANAGER: Erro durante o processamento de vídeo: {e}", exc_info=True)
 
2
  #
3
  # Copyright (C) 2025 Carlos Rodrigues dos Santos
4
  #
5
+ # Version: 6.1.0 (Force 3B Model)
6
  #
7
+ # Esta versão remove a seleção de modelo e força o uso exclusivo do SeedVR 3B.
8
+ # Também otimiza o setup para não baixar o checkpoint do modelo 7B.
 
 
 
9
 
10
  import torch
11
  import os
 
23
  from omegaconf import OmegaConf
24
  import yaml
25
 
 
26
  from ..tools.hardware_manager import hardware_manager
27
 
28
  logger = logging.getLogger(__name__)
29
 
 
30
  APP_ROOT = Path("/home/user/app")
31
  DEPS_DIR = APP_ROOT / "deps"
32
  SEEDVR_SPACE_DIR = DEPS_DIR / "SeedVR_Space"
 
38
  self.global_device_id = device_id
39
  self.local_device_name = 'cuda:0'
40
  self.gpu_index = self.global_device_id.split(':')[-1]
 
41
  self.runner = None
42
  self.is_initialized = False
43
  self.setup_complete = self._check_and_run_global_setup()
 
49
  setup_flag = DEPS_DIR / "seedvr.setup.complete"
50
  if str(APP_ROOT) not in sys.path:
51
  sys.path.insert(0, str(APP_ROOT))
52
+ if setup_flag.exists(): return True
 
 
53
 
54
  logger.info("--- Iniciando Setup Global do SeedVR (primeira execução) ---")
 
55
  if not SEEDVR_SPACE_DIR.exists():
56
  DEPS_DIR.mkdir(exist_ok=True, parents=True)
57
  subprocess.run(["git", "clone", "--depth", "1", SEEDVR_SPACE_URL, str(SEEDVR_SPACE_DIR)], check=True)
58
 
59
  required_dirs = ["projects", "common", "models", "configs_3b", "configs_7b", "data"]
60
  for dirname in required_dirs:
61
+ source, target = SEEDVR_SPACE_DIR / dirname, APP_ROOT / dirname
62
+ if not target.exists(): shutil.copytree(source, target)
 
 
63
 
64
  try:
65
  import apex
 
70
 
71
  ckpt_dir = APP_ROOT / 'ckpts'
72
  ckpt_dir.mkdir(exist_ok=True)
73
+
74
+ # <<< MODIFICAÇÃO: Removido o download do modelo 7B >>>
75
  model_urls = {
76
  'vae': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/ema_vae.pth',
77
  'dit_3b': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/seedvr2_ema_3b.pth',
 
78
  'pos_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/pos_emb.pt',
79
  'neg_emb': 'https://huggingface.co/ByteDance-Seed/SeedVR2-3B/resolve/main/neg_emb.pt'
80
  }
 
85
  logger.info("--- Setup Global do SeedVR Concluído ---")
86
  return True
87
 
88
+ def initialize_runner(self):
89
+ """Carrega o modelo 3B para a VRAM do dispositivo."""
90
  if self.runner is not None: return
91
 
92
  os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index
 
94
  from projects.video_diffusion_sr.infer import VideoDiffusionInfer
95
  from common.config import load_config
96
 
97
+ logger.info(f"Worker {self.global_device_id}: Inicializando runner SeedVR 3B... (Processo vê apenas {self.local_device_name})")
98
 
99
+ # <<< MODIFICAÇÃO: Caminhos fixados para o modelo 3B >>>
100
+ config_path = APP_ROOT / 'configs_3b' / 'main.yaml'
101
+ checkpoint_path = APP_ROOT / 'ckpts' / 'seedvr2_ema_3b.pth'
 
 
102
 
103
  config = load_config(str(config_path))
104
  self.runner = VideoDiffusionInfer(config)
 
108
  self.runner.configure_vae_model()
109
 
110
  self.is_initialized = True
111
+ logger.info(f"Worker {self.global_device_id}: Runner 3B pronto na VRAM.")
112
 
113
  def unload_runner(self):
114
  """Descarrega os modelos da VRAM e limpa o ambiente."""
 
123
  if 'CUDA_VISIBLE_DEVICES' in os.environ:
124
  del os.environ['CUDA_VISIBLE_DEVICES']
125
 
126
+ def process_video_internal(self, input_video_path, output_video_path, prompt, steps, seed):
127
  """Executa a inferência em um ambiente de GPU isolado."""
128
  os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index
129
  device = torch.device(self.local_device_name)
 
170
  self.runner.vae.to(device)
171
  samples = self.runner.vae_decode(video_tensors)
172
  final_sample = samples[0]
173
+ input_video_sample = cond_latents[0]
174
  if final_sample.shape[1] < input_video_sample.shape[1]:
175
  input_video_sample = input_video_sample[:, :final_sample.shape[1]]
176
 
 
202
  logger.info(f"SEEDVR CLEANUP THREAD: Iniciando limpeza de {worker.global_device_id} em background...")
203
  worker.unload_runner()
204
 
205
+ # <<< MODIFICAÇÃO: Removido o argumento 'model_version' da assinatura pública >>>
206
  def process_video(self, input_video_path: str, output_video_path: str, prompt: str,
207
+ steps: int = 100, seed: int = 666) -> str:
208
  worker_to_use = None
209
  try:
210
  with self.lock:
 
219
  cleanup_thread.start()
220
  self.last_cleanup_thread = cleanup_thread
221
 
222
+ # Chama initialize_runner sem argumentos, pois ele agora sabe que deve usar o 3B
223
+ worker_to_use.initialize_runner()
224
 
225
  self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
226
 
227
  logger.info(f"SEEDVR POOL MANAGER: Processando vídeo na GPU {worker_to_use.global_device_id}...")
228
+ # Passa os argumentos para a função interna, sem 'model_version'
229
  return worker_to_use.process_video_internal(
230
+ input_video_path, output_video_path, prompt, steps, seed
231
  )
232
  except Exception as e:
233
  logger.error(f"SEEDVR POOL MANAGER: Erro durante o processamento de vídeo: {e}", exc_info=True)