Test3

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

cb3f487

verified ·

1 Parent(s): 31d7902

Update video_service.py

Browse files

Files changed (1) hide show

video_service.py +104 -101

video_service.py CHANGED Viewed

@@ -17,6 +17,110 @@ import subprocess
 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 def run_setup():
     """Executa o script setup.py para clonar as dependências necessárias."""
     setup_script_path = "setup.py"
@@ -150,107 +254,6 @@ class VideoService:
         except Exception:
             pass
-    def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
-        try:
-            import psutil
-            import pynvml as nvml
-            nvml.nvmlInit()
-            handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
-            # Try v3, then fall back to the generic name if binding differs
-            try:
-                procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
-            except Exception:
-                procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
-            results = []
-            for p in procs:
-                pid = int(p.pid)
-                used_mb = None
-                try:
-                    # NVML returns bytes; some bindings may use NVML_VALUE_NOT_AVAILABLE
-                    if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
-                        used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
-                except Exception:
-                    used_mb = None
-                name = "unknown"
-                user = "unknown"
-                try:
-                    pr = psutil.Process(pid)
-                    name = pr.name()
-                    user = pr.username()
-                except Exception:
-                    pass
-                results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
-            nvml.nvmlShutdown()
-            return results
-        except Exception:
-            return []
-    def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
-        # CSV, no header, no units gives lines: "PID,process_name,used_memory"
-        cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
-        try:
-            out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
-        except Exception:
-            return []
-        results = []
-        for line in out.strip().splitlines():
-            parts = [p.strip() for p in line.split(",")]
-            if len(parts) >= 3:
-                try:
-                    pid = int(parts[0])
-                    name = parts[1]
-                    used_mb = int(parts[2])
-                    user = "unknown"
-                    try:
-                        import psutil
-                        pr = psutil.Process(pid)
-                        user = pr.username()
-                    except Exception:
-                        pass
-                    results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
-                except Exception:
-                    continue
-        return results
-    def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
-        if not processes:
-            return "  - Processos ativos: (nenhum)\n"
-        # sort by used_mb desc, then pid
-        processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
-        lines = ["  - Processos ativos (PID | USER | NAME | VRAM MB):"]
-        for p in processes:
-            star = "*" if p["pid"] == current_pid else " "
-            used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
-            lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
-        return "\n".join(lines) + "\n"
-    # Integração no método existente:
-    def _log_gpu_memory(self, stage_name: str):
-        import torch
-        if self.device != "cuda":
-            return
-        device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
-        current_reserved_b = torch.cuda.memory_reserved(device_index)
-        current_reserved_mb = current_reserved_b / (1024 ** 2)
-        total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
-        total_memory_mb = total_memory_b / (1024 ** 2)
-        peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
-        delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
-        # Coleta de processos: tenta NVML, depois fallback para nvidia-smi
-        processes = _query_gpu_processes_via_nvml(device_index)
-        if not processes:
-            processes = _query_gpu_processes_via_nvidiasmi(device_index)
-        print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
-        print(f"  - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
-        print(f"  - Variação desde o último log: {delta_mb:+.2f} MB")
-        if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
-            print(f"  - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
-        # Imprime tabela de processos
-        print(_gpu_process_table(processes, os.getpid()), end="")
-        print("--------------------------------------------------\n")
-        self.last_memory_reserved_mb = current_reserved_mb
     def _load_config(self):
         config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml"

 # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
+def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
+    try:
+        import psutil
+        import pynvml as nvml
+        nvml.nvmlInit()
+        handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
+        # Try v3, then fall back to the generic name if binding differs
+        try:
+            procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
+        except Exception:
+            procs = nvml.nvmlDeviceGetComputeRunningProcesses(handle)
+        results = []
+        for p in procs:
+            pid = int(p.pid)
+            used_mb = None
+            try:
+                # NVML returns bytes; some bindings may use NVML_VALUE_NOT_AVAILABLE
+                if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
+                    used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
+            except Exception:
+                used_mb = None
+            name = "unknown"
+            user = "unknown"
+            try:
+                pr = psutil.Process(pid)
+                name = pr.name()
+                user = pr.username()
+            except Exception:
+                pass
+            results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
+        nvml.nvmlShutdown()
+        return results
+    except Exception:
+        return []
+def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
+    # CSV, no header, no units gives lines: "PID,process_name,used_memory"
+    cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
+    try:
+        out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
+    except Exception:
+        return []
+    results = []
+    for line in out.strip().splitlines():
+        parts = [p.strip() for p in line.split(",")]
+        if len(parts) >= 3:
+            try:
+                pid = int(parts[0])
+                name = parts[1]
+                used_mb = int(parts[2])
+                user = "unknown"
+                try:
+                    import psutil
+                    pr = psutil.Process(pid)
+                    user = pr.username()
+                except Exception:
+                    pass
+                results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
+            except Exception:
+                continue
+    return results
+def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
+    if not processes:
+        return "  - Processos ativos: (nenhum)\n"
+    # sort by used_mb desc, then pid
+    processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
+    lines = ["  - Processos ativos (PID | USER | NAME | VRAM MB):"]
+    for p in processes:
+        star = "*" if p["pid"] == current_pid else " "
+        used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
+        lines.append(f"    {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
+    return "\n".join(lines) + "\n"
+# Integração no método existente:
+def _log_gpu_memory(self, stage_name: str):
+    import torch
+    if self.device != "cuda":
+        return
+    device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
+    current_reserved_b = torch.cuda.memory_reserved(device_index)
+    current_reserved_mb = current_reserved_b / (1024 ** 2)
+    total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
+    total_memory_mb = total_memory_b / (1024 ** 2)
+    peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
+    delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
+    # Coleta de processos: tenta NVML, depois fallback para nvidia-smi
+    processes = _query_gpu_processes_via_nvml(device_index)
+    if not processes:
+        processes = _query_gpu_processes_via_nvidiasmi(device_index)
+    print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
+    print(f"  - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
+    print(f"  - Variação desde o último log: {delta_mb:+.2f} MB")
+    if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
+        print(f"  - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
+    # Imprime tabela de processos
+    print(_gpu_process_table(processes, os.getpid()), end="")
+    print("--------------------------------------------------\n")
+    self.last_memory_reserved_mb = current_reserved_mb
 def run_setup():
     """Executa o script setup.py para clonar as dependências necessárias."""
     setup_script_path = "setup.py"
         except Exception:
             pass
     def _load_config(self):
         config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml"