Added stuff
Browse files
app.py
CHANGED
|
@@ -48,8 +48,8 @@ def get_audio(audio_path, duration=10, target_sr=16000):
|
|
| 48 |
|
| 49 |
def captioning(model,audio_path):
|
| 50 |
audio_tensor = get_audio(audio_path = audio_path)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
with torch.no_grad():
|
| 54 |
output = model.generate(
|
| 55 |
samples=audio_tensor,
|
|
@@ -102,11 +102,11 @@ if st.session_state.audio_input:
|
|
| 102 |
st.text(captions)
|
| 103 |
if st.session_state.captions:
|
| 104 |
if st.button("Generate Image and video from text prompt"):
|
| 105 |
-
st.session_state.image = image_service(captions).images[0]
|
| 106 |
image = st.session_state.image
|
| 107 |
video = video_model(
|
| 108 |
-
prompt = captions,
|
| 109 |
-
image=image,
|
| 110 |
num_inference_steps=50
|
| 111 |
).frames[0]
|
| 112 |
st.session_state.video = video
|
|
|
|
| 48 |
|
| 49 |
def captioning(model,audio_path):
|
| 50 |
audio_tensor = get_audio(audio_path = audio_path)
|
| 51 |
+
if device is not None:
|
| 52 |
+
audio_tensor = audio_tensor.to(device)
|
| 53 |
with torch.no_grad():
|
| 54 |
output = model.generate(
|
| 55 |
samples=audio_tensor,
|
|
|
|
| 102 |
st.text(captions)
|
| 103 |
if st.session_state.captions:
|
| 104 |
if st.button("Generate Image and video from text prompt"):
|
| 105 |
+
st.session_state.image = image_service(st.session_state.captions).images[0]
|
| 106 |
image = st.session_state.image
|
| 107 |
video = video_model(
|
| 108 |
+
prompt = st.session_state.captions,
|
| 109 |
+
image=st.session_state.image,
|
| 110 |
num_inference_steps=50
|
| 111 |
).frames[0]
|
| 112 |
st.session_state.video = video
|