Spaces:

Staticaliza
/

Sense

Paused

App Files Files Community

Staticaliza commited on May 28

Commit

7820541

verified ·

1 Parent(s): 4cab0f7

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import spaces
 import torch
 import os
 import librosa
-from PIL import Image
 from decord import VideoReader, cpu
 from transformers import AutoModel, AutoTokenizer, AutoProcessor
@@ -31,9 +31,16 @@ footer {
 }
 '''
 filetypes = {
     "Image": [".jpg", ".jpeg", ".png", ".bmp"],
-    "Gif": [".gif"],
     "Video": [".mp4", ".mov", ".avi", ".mkv"],
     "Audio": [".wav", ".mp3", ".flac", ".aac"],
 }
@@ -60,14 +67,13 @@ def encode_gif(path):
 @spaces.GPU(duration=60)
 def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
     print(input)
     print(instruction)
-    if not input_file:
         return "No input provided."
-    extension = os.path.splitext(input_file)[1].lower()
     filetype = None
     for category, extensions in filetypes.items():
         if extension in extensions:
@@ -76,20 +82,22 @@ def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7,
     content = []
     if filetype == "Image":
-        image = Image.open(input_file).convert("RGB")
         content.append(image)
-    elif filetype in ["Video", "Gif"]:
-        frames = encode_gif(input_file) if filetype == "Gif" else encode_video(input_file)
         content.extend(frames)
-        audio = librosa.load(input_file, sr=16000, mono=True)
         content.append(audio)
     elif filetype == "Audio":
-        audio = librosa.load(input_file, sr=16000, mono=True)
         content.append(audio)
     else:
         return "Unsupported file type."
-    content.append(instruction)
     inputs_payload = [{"role": "user", "content": content}]
     params = {

 import torch
 import os
 import librosa
+from PIL import Image, ImageSequence
 from decord import VideoReader, cpu
 from transformers import AutoModel, AutoTokenizer, AutoProcessor
 }
 '''
+input_prefixes = {
+    "Image": "(A image file called █ has been attached) ",
+    "GIF": "(A GIF file called █ has been attached) ",
+    "Video": "(A video with audio file called █ has been attached) ",
+    "Audio": "(A audio file called █ has been attached) ",
+}
 filetypes = {
     "Image": [".jpg", ".jpeg", ".png", ".bmp"],
+    "GIF": [".gif"],
     "Video": [".mp4", ".mov", ".avi", ".mkv"],
     "Audio": [".wav", ".mp3", ".flac", ".aac"],
 }
 @spaces.GPU(duration=60)
 def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
     print(input)
     print(instruction)
+    if not input:
         return "No input provided."
+    extension = os.path.splitext(input)[1].lower()
     filetype = None
     for category, extensions in filetypes.items():
         if extension in extensions:
     content = []
     if filetype == "Image":
+        image = Image.open(input).convert("RGB")
         content.append(image)
+    elif filetype in ["Video", "GIF"]:
+        frames = encode_gif(input) if filetype == "GIF" else encode_video(input_file)
         content.extend(frames)
+        audio = librosa.load(input, sr=16000, mono=True)
         content.append(audio)
     elif filetype == "Audio":
+        audio = librosa.load(input, sr=16000, mono=True)
         content.append(audio)
     else:
         return "Unsupported file type."
+    filename = os.path.basename(input_file)
+    prefix = input_prefixes[filetype].replace("█", filename)
+    content.append(prefix + instruction)
     inputs_payload = [{"role": "user", "content": content}]
     params = {