Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,8 +3,10 @@ import gradio as gr
|
|
| 3 |
import spaces
|
| 4 |
import torch
|
| 5 |
import os
|
|
|
|
| 6 |
import gc
|
| 7 |
import librosa
|
|
|
|
| 8 |
from PIL import Image, ImageSequence
|
| 9 |
from decord import VideoReader, cpu
|
| 10 |
from moviepy.editor import VideoFileClip
|
|
@@ -64,19 +66,22 @@ def frames_from_video(path):
|
|
| 64 |
|
| 65 |
def audio_from_video(path):
|
| 66 |
clip = VideoFileClip(path)
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
| 68 |
clip.close()
|
| 69 |
-
return
|
| 70 |
|
| 71 |
def load_audio(path):
|
| 72 |
audio_np, _ = librosa.load(path, sr = AUDIO_SR, mono = True)
|
| 73 |
return audio_np
|
| 74 |
|
| 75 |
def build_video_omni(path, prefix, instruction):
|
| 76 |
-
frames
|
| 77 |
-
audio
|
| 78 |
contents = [prefix + instruction]
|
| 79 |
-
total
|
| 80 |
for i in range(total):
|
| 81 |
frame = frames[i] if i < len(frames) else frames[-1]
|
| 82 |
chunk = audio[AUDIO_SR * i : AUDIO_SR * (i + 1)]
|
|
|
|
| 3 |
import spaces
|
| 4 |
import torch
|
| 5 |
import os
|
| 6 |
+
import math
|
| 7 |
import gc
|
| 8 |
import librosa
|
| 9 |
+
import tempfile
|
| 10 |
from PIL import Image, ImageSequence
|
| 11 |
from decord import VideoReader, cpu
|
| 12 |
from moviepy.editor import VideoFileClip
|
|
|
|
| 66 |
|
| 67 |
def audio_from_video(path):
|
| 68 |
clip = VideoFileClip(path)
|
| 69 |
+
with tempfile.NamedTemporaryFile(suffix = ".wav", delete = True) as tmp:
|
| 70 |
+
clip.audio.write_audiofile(tmp.name, codec = "pcm_s16le",
|
| 71 |
+
fps = AUDIO_SR, verbose = False, logger = None)
|
| 72 |
+
audio_np, _ = librosa.load(tmp.name, sr = AUDIO_SR, mono = True)
|
| 73 |
clip.close()
|
| 74 |
+
return audio_np
|
| 75 |
|
| 76 |
def load_audio(path):
|
| 77 |
audio_np, _ = librosa.load(path, sr = AUDIO_SR, mono = True)
|
| 78 |
return audio_np
|
| 79 |
|
| 80 |
def build_video_omni(path, prefix, instruction):
|
| 81 |
+
frames = frames_from_video(path)
|
| 82 |
+
audio = audio_from_video(path)
|
| 83 |
contents = [prefix + instruction]
|
| 84 |
+
total = max(len(frames), math.ceil(len(audio) / AUDIO_SR))
|
| 85 |
for i in range(total):
|
| 86 |
frame = frames[i] if i < len(frames) else frames[-1]
|
| 87 |
chunk = audio[AUDIO_SR * i : AUDIO_SR * (i + 1)]
|