Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| import openai | |
| import requests | |
| import csv | |
| import argparse | |
| from models.vlog import Vlogger | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--video_path', default='examples/huaqiang.mp4') | |
| parser.add_argument('--alpha', default=10, type=int, help='Determine the maximum segment number for KTS algorithm, the larger the value, the fewer segments.') | |
| parser.add_argument('--beta', default=1, type=int, help='The smallest time gap between successive clips, in seconds.') | |
| parser.add_argument('--data_dir', default='./examples', type=str, help='Directory for saving videos and logs.') | |
| parser.add_argument('--tmp_dir', default='./tmp', type=str, help='Directory for saving intermediate files.') | |
| # * Models settings * | |
| parser.add_argument('--openai_api_key', default='xxx', type=str, help='OpenAI API key') | |
| parser.add_argument('--image_caption', action='store_true', dest='image_caption', default=True, help='Set this flag to True if you want to use BLIP Image Caption') | |
| parser.add_argument('--dense_caption', action='store_true', dest='dense_caption', default=True, help='Set this flag to True if you want to use Dense Caption') | |
| parser.add_argument('--feature_extractor', default='openai/clip-vit-base-patch32', help='Select the feature extractor model for video segmentation') | |
| parser.add_argument('--feature_extractor_device', choices=['cuda', 'cpu'], default='cuda', help='Select the device: cuda or cpu') | |
| parser.add_argument('--image_captioner', choices=['blip', 'blip2'], dest='captioner_base_model', default='blip2', help='blip2 requires 15G GPU memory, blip requires 6G GPU memory') | |
| parser.add_argument('--image_captioner_device', choices=['cuda', 'cpu'], default='cuda', help='Select the device: cuda or cpu, gpu memory larger than 14G is recommended') | |
| parser.add_argument('--dense_captioner_device', choices=['cuda', 'cpu'], default='cuda', help='Select the device: cuda or cpu, < 6G GPU is not recommended>') | |
| parser.add_argument('--audio_translator', default='large') | |
| parser.add_argument('--audio_translator_device', choices=['cuda', 'cpu'], default='cuda') | |
| parser.add_argument('--gpt_version', choices=['gpt-3.5-turbo'], default='gpt-3.5-turbo') | |
| args = parser.parse_args() | |
| def get_empty_state(): | |
| return {"total_tokens": 0, "messages": []} | |
| def submit_api_key_fn(api_key, vlogger): | |
| try: | |
| vlogger.init_llm_with_api_key(api_key) | |
| return gr.update(value = "OpenAI key submitted successful π"), True, vlogger | |
| except Exception as e: | |
| return gr.update(value = f"Error {e}"), False, vlogger | |
| def submit_message(prompt, state, vlogger, api_key_submitted, vlog_loaded): | |
| if not api_key_submitted: | |
| return gr.update(value=''), [("π", "Please enter your OpenAI API key π"),], state, vlogger | |
| if not vlog_loaded: | |
| return gr.update(value=''), [("π", "Please follow the instruction to select a video and generate the document for chatting π"),], state, vlogger | |
| history = state['messages'] | |
| if not prompt: | |
| return gr.update(value=''), [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)], state, vlogger | |
| prompt_msg = { "role": "user", "content": prompt } | |
| try: | |
| history.append(prompt_msg) | |
| answer = vlogger.chat2video(prompt) | |
| history.append({"role": "system", "content": answer}) | |
| except Exception as e: | |
| history.append(prompt_msg) | |
| history.append({ | |
| "role": "system", | |
| "content": f"Error: {e}" | |
| }) | |
| chat_messages = [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)] | |
| return '', chat_messages, state, vlogger | |
| def clear_conversation(vlogger): | |
| vlogger.clean_history() | |
| # return input_message, video_inp, chatbot, vlog_outp, state, vlogger, vlog_loaded | |
| return gr.update(value=None, visible=True), gr.update(value=None, interactive=False), None, gr.update(value=None, visible=True), get_empty_state(), vlogger, False | |
| def vlog_fn(vid_path, vlogger, api_key_submitted): | |
| if not api_key_submitted: | |
| log_text = "====== Please enter your OpenAI API key first π =====" | |
| return gr.update(value=log_text, visible=True), False, vlogger | |
| print(vid_path) | |
| if vid_path is None: | |
| log_text = "====== Please select an video from examples first π€ =====" | |
| vloaded_flag = False | |
| else: | |
| log_list = vlogger.video2log(vid_path) | |
| log_text = "\n".join(log_list) | |
| vloaded_flag = True | |
| return gr.update(value=log_text, visible=True), vloaded_flag, vlogger | |
| css = """ | |
| #col-container {max-width: 90%; margin-left: auto; margin-right: auto;} | |
| #video_inp {min-height: 300px} | |
| #chatbox {min-height: 100px;} | |
| #header {text-align: center; | |
| #hint {font-size: 0.9em; padding: 0.5em; margin: 0;} | |
| .message { font-size: 1.2em; } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| state = gr.State(get_empty_state()) | |
| vlogger = gr.State(Vlogger(args)) | |
| vlog_loaded = gr.State(False) | |
| api_key_submitted = gr.State(False) | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("""## ποΈ VLog Demo | |
| Powered by BLIP2, GRIT, Whisper, ChatGPT and LangChain | |
| Github: [https://github.com/showlab/VLog](https://github.com/showlab/VLog)""", | |
| elem_id="header") | |
| gr.Markdown("*Instruction*: For the current demo, please enter OpenAI api key, select an example video, click the button to generate a document and try chatting over the video π", elem_id="hint") | |
| with gr.Row(): | |
| with gr.Column(scale=6): | |
| video_inp = gr.Video(label="video_input", interactive=False) | |
| chatbot = gr.Chatbot(elem_id="chatbox") | |
| input_message = gr.Textbox(show_label=False, placeholder="Enter text and press enter", visible=True).style(container=False) | |
| btn_submit = gr.Button("Submit") | |
| btn_clear_conversation = gr.Button("π Start New Conversation") | |
| with gr.Column(scale=6): | |
| vlog_btn = gr.Button("Generate Video Document") | |
| vlog_outp = gr.Textbox(label="Document output", lines=30) | |
| with gr.Column(scale=1): | |
| openai_api_key = gr.Textbox( | |
| placeholder="Input OpenAI API key and press Enter", | |
| show_label=False, | |
| label = "OpenAI API Key", | |
| lines=1, | |
| type="password" | |
| ) | |
| examples = gr.Examples( | |
| examples=[ | |
| ["examples/basketball_vlog.mp4"], | |
| ["examples/travel_in_roman.mp4"], | |
| ["examples/C8lMW0MODFs.mp4"], | |
| ["examples/outcGtbnMuQ.mp4"], | |
| ["examples/huaqiang.mp4"], | |
| ], | |
| inputs=[video_inp], | |
| ) | |
| gr.HTML('''<br><br><br><center>You can duplicate this Space to skip the queue:<a href="https://huggingface.co/spaces/TencentARC/VLog?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a><br></center>''') | |
| btn_submit.click(submit_message, [input_message, state, vlogger, api_key_submitted, vlog_loaded], [input_message, chatbot, state, vlogger]) | |
| input_message.submit(submit_message, [input_message, state, vlogger, api_key_submitted, vlog_loaded], [input_message, chatbot, state, vlogger]) | |
| btn_clear_conversation.click(clear_conversation, [vlogger], [input_message, video_inp, chatbot, vlog_outp, state, vlogger, vlog_loaded]) | |
| vlog_btn.click(vlog_fn, [video_inp, vlogger, api_key_submitted], [vlog_outp, vlog_loaded, vlogger]) | |
| openai_api_key.submit(submit_api_key_fn, [openai_api_key, vlogger], [vlog_outp, api_key_submitted, vlogger]) | |
| demo.load(queur=False) | |
| demo.queue(concurrency_count=5) | |
| demo.launch(height='800px') | |