Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on Jun 7, 2024

Commit

36f3eef

verified ·

1 Parent(s): d913e50

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -1

app.py CHANGED Viewed

@@ -358,7 +358,69 @@ def save_video(video_file):
         f.write(video_file.getbuffer())
     return video_file.name
-def process_video(video_path, seconds_per_frame=2):
     base64Frames = []
     base_video_path, _ = os.path.splitext(video_path)
     video = cv2.VideoCapture(video_path)

         f.write(video_file.getbuffer())
     return video_file.name
+def process_video(video_input, user_prompt):
+    SaveNewFile=True
+    video_file_name=''
+    if isinstance(video_input, str):
+        video_file_name = video_input
+        with open(video_input, "rb") as video_file:
+            video_input = video_file.read()
+            SaveNewFile=False # file is there and this is just prompt inference
+    else:
+        video_file_name = video_input.name
+        video_input = video_input.read()
+        SaveNewFile=True
+    st.markdown('Processing video: ' + video_file_name)
+    base64Frames, audio_path = process_video(video_file_name, seconds_per_frame=1)
+    # Get the transcript for the video model call
+    transcript = process_audio_for_video(video_input)
+    # Generate a summary with visual and audio
+    response = client.chat.completions.create(
+        model=MODEL,
+        messages=[
+            {"role": "system", "content": """You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
+            {"role": "user", "content": [
+                "These are the frames from the video.",
+                *map(lambda x: {"type": "image_url",
+                                "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
+                {"type": "text", "text": f"The audio transcription is: {transcript}"},
+                {"type": "text", "text": user_prompt}
+            ]},
+        ],
+        temperature=0,
+    )
+    video_response = response.choices[0].message.content
+    st.markdown(video_response)
+    # Save markdown on video AI output from gpt4o
+    filename_md = generate_filename(video_file_name + '- ' + video_response, "md")
+    # Save markdown on video AI output from gpt4o
+    filename_mp4 = filename_md.replace('.md', '.' + video_file_name.split('.')[-1])
+    create_file(filename_md, video_response, '', True)
+    with open(filename_md, "w", encoding="utf-8") as f:
+        f.write(video_response)
+    # Extract boldface terms from video_response then autoname save file
+    boldface_terms = extract_title(video_response).replace(':','')
+    filename_stem, extension = os.path.splitext(video_file_name)
+    filename_video = f"{filename_stem}  {''.join(boldface_terms)}{extension}"
+    if SaveNewFile:
+        newfilename = save_video(video_input, filename_video)
+        filename_md = newfilename.replace('.mp4', '.md')
+        create_file(filename_md, '', video_response, True)
+    else:
+        filename = generate_filename(filename_md, "md")
+        create_file(filename, video_file_name, video_response, should_save)
+    return video_response
+def process_video_old(video_path, seconds_per_frame=2):
     base64Frames = []
     base_video_path, _ = os.path.splitext(video_path)
     video = cv2.VideoCapture(video_path)