Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	update
Browse files- app.py +17 -6
- constants.py +5 -5
- file/result.csv +22 -22
- src/compute.py +8 -3
    	
        app.py
    CHANGED
    
    | @@ -34,6 +34,7 @@ def add_new_eval( | |
| 34 | 
             
                model_link: str,
         | 
| 35 | 
             
                model_type: str,
         | 
| 36 | 
             
                model_size: str,
         | 
|  | |
| 37 | 
             
                notes: str,
         | 
| 38 | 
             
            ):
         | 
| 39 | 
             
                if input_file is None:
         | 
| @@ -72,6 +73,7 @@ def add_new_eval( | |
| 72 | 
             
                        model_name,
         | 
| 73 | 
             
                        model_type,
         | 
| 74 | 
             
                        model_size,
         | 
|  | |
| 75 | 
             
                        input_data[0],
         | 
| 76 | 
             
                        input_data[1],
         | 
| 77 | 
             
                        input_data[2],
         | 
| @@ -97,6 +99,11 @@ def add_new_eval( | |
| 97 | 
             
                        input_data[22],
         | 
| 98 | 
             
                        input_data[23],
         | 
| 99 | 
             
                        input_data[24],
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 100 | 
             
                        notes,
         | 
| 101 | 
             
                        ]
         | 
| 102 | 
             
                    # print(len(new_data), col)
         | 
| @@ -108,12 +115,12 @@ def add_new_eval( | |
| 108 | 
             
                    csv_data.to_csv(CSV_DIR, index=False)
         | 
| 109 |  | 
| 110 | 
             
                    # push newly added result
         | 
| 111 | 
            -
                    api.upload_file(
         | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
             | 
| 116 | 
            -
                    )
         | 
| 117 | 
             
                return 0
         | 
| 118 |  | 
| 119 | 
             
            def get_baseline_df():
         | 
| @@ -227,6 +234,9 @@ with block: | |
| 227 | 
             
                                model_size = gr.Textbox(
         | 
| 228 | 
             
                                    label="Model size", placeholder="7B(Input content format must be 'number+B' or '-', default is '-')"
         | 
| 229 | 
             
                                )
         | 
|  | |
|  | |
|  | |
| 230 | 
             
                                notes = gr.Textbox(
         | 
| 231 | 
             
                                    label="Notes", placeholder="Other details of the model or evaluation, e.g., which answer prompt is used."
         | 
| 232 | 
             
                                )
         | 
| @@ -246,6 +256,7 @@ with block: | |
| 246 | 
             
                                    model_link,
         | 
| 247 | 
             
                                    model_type,
         | 
| 248 | 
             
                                    model_size,
         | 
|  | |
| 249 | 
             
                                    notes,
         | 
| 250 | 
             
                                ],
         | 
| 251 | 
             
                                # outputs = submission_result,
         | 
|  | |
| 34 | 
             
                model_link: str,
         | 
| 35 | 
             
                model_type: str,
         | 
| 36 | 
             
                model_size: str,
         | 
| 37 | 
            +
                num_frame: str,
         | 
| 38 | 
             
                notes: str,
         | 
| 39 | 
             
            ):
         | 
| 40 | 
             
                if input_file is None:
         | 
|  | |
| 73 | 
             
                        model_name,
         | 
| 74 | 
             
                        model_type,
         | 
| 75 | 
             
                        model_size,
         | 
| 76 | 
            +
                        num_frame,
         | 
| 77 | 
             
                        input_data[0],
         | 
| 78 | 
             
                        input_data[1],
         | 
| 79 | 
             
                        input_data[2],
         | 
|  | |
| 99 | 
             
                        input_data[22],
         | 
| 100 | 
             
                        input_data[23],
         | 
| 101 | 
             
                        input_data[24],
         | 
| 102 | 
            +
                        input_data[25],
         | 
| 103 | 
            +
                        input_data[26],
         | 
| 104 | 
            +
                        input_data[27],
         | 
| 105 | 
            +
                        input_data[28],
         | 
| 106 | 
            +
                        input_data[29],
         | 
| 107 | 
             
                        notes,
         | 
| 108 | 
             
                        ]
         | 
| 109 | 
             
                    # print(len(new_data), col)
         | 
|  | |
| 115 | 
             
                    csv_data.to_csv(CSV_DIR, index=False)
         | 
| 116 |  | 
| 117 | 
             
                    # push newly added result
         | 
| 118 | 
            +
                    # api.upload_file(
         | 
| 119 | 
            +
                    #     path_or_fileobj=CSV_DIR,
         | 
| 120 | 
            +
                    #     path_in_repo=CSV_DIR,
         | 
| 121 | 
            +
                    #     repo_id="lyx97/TempCompass",
         | 
| 122 | 
            +
                    #     repo_type="space",
         | 
| 123 | 
            +
                    # )
         | 
| 124 | 
             
                return 0
         | 
| 125 |  | 
| 126 | 
             
            def get_baseline_df():
         | 
|  | |
| 234 | 
             
                                model_size = gr.Textbox(
         | 
| 235 | 
             
                                    label="Model size", placeholder="7B(Input content format must be 'number+B' or '-', default is '-')"
         | 
| 236 | 
             
                                )
         | 
| 237 | 
            +
                                num_frame = gr.Textbox(
         | 
| 238 | 
            +
                                    label="Frames", placeholder="The number of frames sampled from video, default is '-')"
         | 
| 239 | 
            +
                                )
         | 
| 240 | 
             
                                notes = gr.Textbox(
         | 
| 241 | 
             
                                    label="Notes", placeholder="Other details of the model or evaluation, e.g., which answer prompt is used."
         | 
| 242 | 
             
                                )
         | 
|  | |
| 256 | 
             
                                    model_link,
         | 
| 257 | 
             
                                    model_type,
         | 
| 258 | 
             
                                    model_size,
         | 
| 259 | 
            +
                                    num_frame,
         | 
| 260 | 
             
                                    notes,
         | 
| 261 | 
             
                                ],
         | 
| 262 | 
             
                                # outputs = submission_result,
         | 
    	
        constants.py
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 | 
             
            # this is .py for store constants 
         | 
| 2 | 
            -
            MODEL_INFO = ["Model", "Model Type", "Model Size"]
         | 
| 3 |  | 
| 4 | 
            -
            TASK_INFO = ["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
         | 
| 5 | 
             
                            "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", 
         | 
| 6 | 
             
                            "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
         | 
| 7 | 
             
                            "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
         | 
| @@ -9,9 +9,9 @@ TASK_INFO = ["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Match | |
| 9 | 
             
                            "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation",
         | 
| 10 | 
             
                            "Notes"]
         | 
| 11 |  | 
| 12 | 
            -
            AVG_INFO = ["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation"]
         | 
| 13 | 
            -
            DATA_TITILE_TYPE = ["markdown", "markdown", "markdown",
         | 
| 14 | 
            -
                                "number", "number", "number", "number", "number",
         | 
| 15 | 
             
                                "number", "number", "number", "number",
         | 
| 16 | 
             
                                "number", "number", "number", "number",
         | 
| 17 | 
             
                                "number", "number", "number", "number",
         | 
|  | |
| 1 | 
             
            # this is .py for store constants 
         | 
| 2 | 
            +
            MODEL_INFO = ["Model", "Model Type", "Model Size", "Frames"]
         | 
| 3 |  | 
| 4 | 
            +
            TASK_INFO = ["Avg. All", "Avg. Action", "Avg. Direction", "Avg. Speed", "Avg. Event Order", "Avg. Attribute Change", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
         | 
| 5 | 
             
                            "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", 
         | 
| 6 | 
             
                            "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
         | 
| 7 | 
             
                            "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
         | 
|  | |
| 9 | 
             
                            "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation",
         | 
| 10 | 
             
                            "Notes"]
         | 
| 11 |  | 
| 12 | 
            +
            AVG_INFO = ["Avg. All", "Avg. Action", "Avg. Direction", "Avg. Speed", "Avg. Event Order", "Avg. Attribute Change", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation"]
         | 
| 13 | 
            +
            DATA_TITILE_TYPE = ["markdown", "markdown", "markdown", "markdown",
         | 
| 14 | 
            +
                                "number", "number", "number", "number", "number", "number", "number", "number", "number", "number",
         | 
| 15 | 
             
                                "number", "number", "number", "number",
         | 
| 16 | 
             
                                "number", "number", "number", "number",
         | 
| 17 | 
             
                                "number", "number", "number", "number",
         | 
    	
        file/result.csv
    CHANGED
    
    | @@ -1,22 +1,22 @@ | |
| 1 | 
            -
            Model,Model Type,Model Size,Avg. All,Avg. Multi-Choice,Avg. Yes/No,Avg. Caption Matching,Avg. Caption Generation,Action. Multi-Choice,Action. Yes/No,Action. Caption Matching,Action. Caption Generation,Direction. Multi-Choice,Direction. Yes/No,Direction. Caption Matching,Direction. Caption Generation,Speed. Multi-Choice,Speed. Yes/No,Speed. Caption Matching,Speed. Caption Generation,Event Order. Multi-Choice,Event Order. Yes/No,Event Order. Caption Matching,Event Order. Caption Generation,Attribute Change. Multi-Choice,Attribute Change. Yes/No,Attribute Change. Caption Matching,Attribute Change. Caption Generation,Notes
         | 
| 2 | 
            -
            [Video-LLaVA-7B](https://huggingface.co/LanguageBind/Video-LLaVA-7B),VideoLLM,7B,49.77,45.57,56.38,63.34,34.83,76.04,74.32,87.88,50.76,35.22,51.82,53.82,28.67,35.65,50.28,58.42,23.2,37.75,49.21,59.0,38.25,40.97,51.12,58.33,33.59, | 
| 3 | 
            -
            [VideoChat2-vicuna-stage3](https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/scripts/videochat_vicuna/config_7b_stage3.py),VideoLLM,7B,48.81,42.91,58.01,53.69,38.52,76.92,72.8,73.4,54.04,29.55,53.85,51.68,30.96,33.44,53.82,49.48,32.73,35.43,51.31,48.0,34.25,36.81,53.79,45.83,41.41, | 
| 4 | 
            -
            [LLaMA-VID-7B-short-video](https://huggingface.co/YanweiLi/llama-vid-7b-full-224-video-fps-1),VideoLLM,7B,45.61,38.04,52.96,56.02,34.78,61.24,63.01,73.4,53.03,29.85,48.79,50.46,27.98,29.65,49.16,51.2,21.91,33.77,48.43,53.67,35.5,34.03,52.68,51.74,35.94, | 
| 5 | 
            -
            [mPLUG-Owl-video](https://huggingface.co/MAGAer13/mplug-owl-llama-7b-video),VideoLLM,7B,44.15,36.39,54.42,48.5,34.43,55.92,64.36,57.24,46.46,32.24,50.61,41.9,28.21,31.55,51.21,43.64,30.41,27.48,51.31,48.67,31.25,32.99,52.01,51.74,36.46, | 
| 6 | 
            -
            [PandaGPT-13B-maxlen-400](https://huggingface.co/openllmplayground/pandagpt_13b_max_len_400),VideoLLM,13B,41.64,34.37,51.81,51.56,27.5,36.69,53.04,49.16,23.74,34.03,49.6,55.96,25.69,33.75,50.84,49.48,26.03,32.12,53.66,54.33,29.75,35.07,52.23,48.26,32.55, | 
| 7 | 
            -
            [Valley2-7B](https://huggingface.co/luoruipu1/Valley2-7b),VideoLLM,7B,37.49,29.56,53.49,34.6,26.35,43.49,58.11,36.7,24.75,29.55,52.02,33.64,20.41,24.92,52.51,36.77,21.91,18.54,50.26,36.0,35.75,29.86,52.9,29.86,29.43, | 
| 8 | 
            -
            [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),VideoLLM,7B,43.54,37.66,50.67,53.69,31.84,61.54,52.53,69.02,40.91,28.96,50.0,49.24,28.44,29.02,49.53,47.42,24.48,36.09,51.05,52.67,31.75,30.9,50.0,50.35,33.85, | 
| 9 | 
            -
            [Video-LLaMA-2-13B](https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned),VideoLLM,13B,43.4,31.27,53.73,54.16,32.24,52.07,68.07,70.71,54.29,25.07,45.95,45.26,21.33,28.08,48.79,56.01,13.92,26.82,51.83,52.0,38.5,22.22,50.89,47.57,33.85, | 
| 10 | 
            -
            Gemini-1.5-pro,VideoLLM,-,67.1,63.92,70.32,77.45,57.88,96.75,88.18,97.98,87.88,43.28,54.45,63.0,36.7,55.21,63.87,65.64,38.14,55.63,68.85,81.0,55.25,67.71,73.21,80.9,73.7, | 
| 11 | 
            -
            [LLaVA-1.5-13B](https://huggingface.co/liuhaotian/llava-v1.5-13b),ImageLLM,13B,51.3,47.41,56.38,64.27,38.42,85.5,74.66,92.59,67.42,31.04,48.79,57.49,31.88,40.06,48.98,58.76,24.74,35.43,49.48,59.0,33.0,42.36,55.36,53.82,35.42, | 
| 12 | 
            -
            [SPHINX-v2](https://huggingface.co/Alpha-VLLM/LLaMA2-Accessory/tree/main/finetune/mm/SPHINX/SPHINX-v2-1k),ImageLLM,13B,51.92,50.63,59.07,64.34,34.88,89.94,79.05,93.27,67.93,36.72,51.21,55.96,19.04,43.53,54.75,54.3,20.36,37.42,54.45,59.67,37.25,42.36,50.45,59.03,30.99, | 
| 13 | 
            -
            [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),ImageLLM,7B,52.9,51.08,60.01,64.07,37.28,88.46,81.42,91.58,62.63,37.01,51.62,54.13,27.75,41.32,59.78,54.98,29.64,41.39,50.79,63.67,34.75,44.44,49.11,56.6,32.29, | 
| 14 | 
            -
            [Reka-Core-20240501](https://docs.reka.ai/image-video-and-audio-chat),VideoLLM | 
| 15 | 
            -
            [LLaVA-NeXT-Video-32B-Qwen](https://huggingface.co/lmms-lab/LLaVA-NeXT-Video-32B-Qwen),VideoLLM,32B,66.74,68.67,69.38,76.51,54.64,95.86,86.82,96.3,85.86,42.09,54.05,59.02,30.28,49.53,57.73,62.54,27.32,78.48,74.35,89.0,65.75,78.47,72.99,77.08,66.15, | 
| 16 | 
            -
            [LLaVA-NeXT-Video-7B-DPO](https://huggingface.co/lmms-lab/LLaVA-NeXT-Video-7B-DPO),VideoLLM,7B,53.75,50.57,61.19,63.01,40.22,87.57,83.78,92.59,63.38,35.82,51.42,54.74,32.8,41.32,56.42,55.33,27.06,39.74,54.97,56.0,39.5,45.83,53.12,56.94,38.8, | 
| 17 | 
            -
            [LongVA-7B](https://huggingface.co/lmms-lab/LongVA-7B),VideoLLM,7B,56.95,56.14,62.13,65.67,44.71,92.31,86.15,94.61,75.76,36.72,50.4,54.43,35.32,43.22,53.07,53.26,32.22,54.3,61.78,64.33,35.25,52.43,54.46,62.5,45.83, | 
| 18 | 
            -
            [Llama-3-VILA1.5-8B](https://huggingface.co/Efficient-Large-Model/Llama-3-VILA1.5-8B),VideoLLM,8B,58.79,56.39,63.64,68.93,47.16,92.9,84.8,94.95,74.75,33.73,52.23,58.72,36.24,44.16,54.38,60.48,31.7,50.0,61.26,66.0,46.75,60.07,61.38,65.28,47.14, | 
| 19 | 
            -
            [InternVL2-8B](https://huggingface.co/OpenGVLab/InternVL2-8B),VideoLLM,8B,66.03,65.57,68.24,77.11,55.39,93.79,84.8,96.63,84.6,43.88,53.24,59.94,38.76,51.1,61.27,67.01,31.19,67.22,70.68,84.0,60.75,71.88,69.2,79.51,63.02,
         | 
| 20 | 
            -
            [InternLM-XComposer-2.5](https://huggingface.co/internlm/internlm-xcomposer2d5-7b),VideoLLM,7B,62.1,61.33,64.49,72.39,52.05,94.97,84.29,97.98,88.13,42.69,52.02,57.19,33.49,41.64,56.24,60.48,38.14,59.93,64.14,75.33,46.5,66.67,62.28,72.22,55.73,
         | 
| 21 | 
            -
            [LLaVA-OneVision-Qwen-2-7B](https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov),VideoLLM,7B,64.22,64.81,69.67,73.79,49.9,96.45,85.98,95.96,79.29,40.6,55.26,56.88,30.73,45.43,57.36,61.86,25.26,69.54,76.18,81.33,56.75,72.22,73.21,74.31,59.11,
         | 
| 22 | 
            -
            [MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6),VideoLLM,8B,66.26,63.04,68.49,76.38,58.48,96.75,87.67,98.99,90.66,42.09,55.67,60.55,39.45,45.11,58.29,64.26,40.72,59.6,65.97,86.33,64.25,71.18,71.65,72.92,58.85,
         | 
|  | |
| 1 | 
            +
            Model,Model Type,Model Size,Frames,Avg. All,Avg. Action,Avg. Direction,Avg. Speed,Avg. Event Order,Avg. Attribute Change,Avg. Multi-Choice,Avg. Yes/No,Avg. Caption Matching,Avg. Caption Generation,Action. Multi-Choice,Action. Yes/No,Action. Caption Matching,Action. Caption Generation,Direction. Multi-Choice,Direction. Yes/No,Direction. Caption Matching,Direction. Caption Generation,Speed. Multi-Choice,Speed. Yes/No,Speed. Caption Matching,Speed. Caption Generation,Event Order. Multi-Choice,Event Order. Yes/No,Event Order. Caption Matching,Event Order. Caption Generation,Attribute Change. Multi-Choice,Attribute Change. Yes/No,Attribute Change. Caption Matching,Attribute Change. Caption Generation,Notes
         | 
| 2 | 
            +
            [Video-LLaVA-7B](https://huggingface.co/LanguageBind/Video-LLaVA-7B),VideoLLM,7B,8,49.77,71.41,42.4,41.94,45.66,45.74,45.57,56.38,63.34,34.83,76.04,74.32,87.88,50.76,35.22,51.82,53.82,28.67,35.65,50.28,58.42,23.2,37.75,49.21,59.0,38.25,40.97,51.12,58.33,33.59,
         | 
| 3 | 
            +
            [VideoChat2-vicuna-stage3](https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/scripts/videochat_vicuna/config_7b_stage3.py),VideoLLM,7B,16,48.81,69.19,42.02,43.44,42.2,45.31,42.91,58.01,53.69,38.52,76.92,72.8,73.4,54.04,29.55,53.85,51.68,30.96,33.44,53.82,49.48,32.73,35.43,51.31,48.0,34.25,36.81,53.79,45.83,41.41,
         | 
| 4 | 
            +
            [LLaMA-VID-7B-short-video](https://huggingface.co/YanweiLi/llama-vid-7b-full-224-video-fps-1),VideoLLM,7B,1fps,45.61,62.11,39.45,38.62,42.63,44.11,38.04,52.96,56.02,34.78,61.24,63.01,73.4,53.03,29.85,48.79,50.46,27.98,29.65,49.16,51.2,21.91,33.77,48.43,53.67,35.5,34.03,52.68,51.74,35.94,
         | 
| 5 | 
            +
            [mPLUG-Owl-video](https://huggingface.co/MAGAer13/mplug-owl-llama-7b-video),VideoLLM,7B,8,44.15,56.93,38.82,40.44,39.74,43.82,36.39,54.42,48.5,34.43,55.92,64.36,57.24,46.46,32.24,50.61,41.9,28.21,31.55,51.21,43.64,30.41,27.48,51.31,48.67,31.25,32.99,52.01,51.74,36.46,
         | 
| 6 | 
            +
            [PandaGPT-13B-maxlen-400](https://huggingface.co/openllmplayground/pandagpt_13b_max_len_400),VideoLLM,13B,10,41.64,41.77,41.08,40.77,42.2,42.54,34.37,51.81,51.56,27.5,36.69,53.04,49.16,23.74,34.03,49.6,55.96,25.69,33.75,50.84,49.48,26.03,32.12,53.66,54.33,29.75,35.07,52.23,48.26,32.55,
         | 
| 7 | 
            +
            [Valley2-7B](https://huggingface.co/luoruipu1/Valley2-7b),VideoLLM,7B,8,37.49,43.01,34.86,36.07,36.05,37.07,29.56,53.49,34.6,26.35,43.49,58.11,36.7,24.75,29.55,52.02,33.64,20.41,24.92,52.51,36.77,21.91,18.54,50.26,36.0,35.75,29.86,52.9,29.86,29.43,
         | 
| 8 | 
            +
            [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),VideoLLM,7B,100,43.54,54.59,39.51,38.55,42.56,41.76,37.66,50.67,53.69,31.84,61.54,52.53,69.02,40.91,28.96,50.0,49.24,28.44,29.02,49.53,47.42,24.48,36.09,51.05,52.67,31.75,30.9,50.0,50.35,33.85,
         | 
| 9 | 
            +
            [Video-LLaMA-2-13B](https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned),VideoLLM,13B,8,43.4,61.86,34.67,37.05,42.56,39.7,31.27,53.73,54.16,32.24,52.07,68.07,70.71,54.29,25.07,45.95,45.26,21.33,28.08,48.79,56.01,13.92,26.82,51.83,52.0,38.5,22.22,50.89,47.57,33.85,
         | 
| 10 | 
            +
            Gemini-1.5-pro,VideoLLM,-,1fps,67.1,91.68,48.99,55.9,64.67,73.79,63.92,70.32,77.45,57.88,96.75,88.18,97.98,87.88,43.28,54.45,63.0,36.7,55.21,63.87,65.64,38.14,55.63,68.85,81.0,55.25,67.71,73.21,80.9,73.7,
         | 
| 11 | 
            +
            [LLaVA-1.5-13B](https://huggingface.co/liuhaotian/llava-v1.5-13b),ImageLLM,13B,1,51.3,78.43,42.21,42.86,43.71,46.95,47.41,56.38,64.27,38.42,85.5,74.66,92.59,67.42,31.04,48.79,57.49,31.88,40.06,48.98,58.76,24.74,35.43,49.48,59.0,33.0,42.36,55.36,53.82,35.42,
         | 
| 12 | 
            +
            [SPHINX-v2](https://huggingface.co/Alpha-VLLM/LLaMA2-Accessory/tree/main/finetune/mm/SPHINX/SPHINX-v2-1k),ImageLLM,13B,1,51.92,81.21,40.33,43.64,46.89,45.24,50.63,59.07,64.34,34.88,89.94,79.05,93.27,67.93,36.72,51.21,55.96,19.04,43.53,54.75,54.3,20.36,37.42,54.45,59.67,37.25,42.36,50.45,59.03,30.99,
         | 
| 13 | 
            +
            [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),ImageLLM,7B,1,52.9,80.16,42.53,47.42,46.89,45.1,51.08,60.01,64.07,37.28,88.46,81.42,91.58,62.63,37.01,51.62,54.13,27.75,41.32,59.78,54.98,29.64,41.39,50.79,63.67,34.75,44.44,49.11,56.6,32.29,
         | 
| 14 | 
            +
            [Reka-Core-20240501](https://docs.reka.ai/image-video-and-audio-chat),VideoLLM,-,-,64.31,93.22,46.92,53.23,63.58,63.42,61.08,65.14,74.05,58.53,96.15,90.71,97.31,91.41,40.9,52.83,58.1,36.47,49.21,54.0,63.92,47.42,57.28,57.33,81.0,61.25,60.42,64.96,71.18,58.07,
         | 
| 15 | 
            +
            [LLaVA-NeXT-Video-32B-Qwen](https://huggingface.co/lmms-lab/LLaVA-NeXT-Video-32B-Qwen),VideoLLM,32B,32,66.74,90.2,46.04,49.25,75.94,73.08,68.67,69.38,76.51,54.64,95.86,86.82,96.3,85.86,42.09,54.05,59.02,30.28,49.53,57.73,62.54,27.32,78.48,74.35,89.0,65.75,78.47,72.99,77.08,66.15,
         | 
| 16 | 
            +
            [LLaVA-NeXT-Video-7B-DPO](https://huggingface.co/lmms-lab/LLaVA-NeXT-Video-7B-DPO),VideoLLM,7B,32,53.75,81.21,43.72,45.66,47.4,48.51,50.57,61.19,63.01,40.22,87.57,83.78,92.59,63.38,35.82,51.42,54.74,32.8,41.32,56.42,55.33,27.06,39.74,54.97,56.0,39.5,45.83,53.12,56.94,38.8,
         | 
| 17 | 
            +
            [LongVA-7B](https://huggingface.co/lmms-lab/LongVA-7B),VideoLLM,7B,32,56.95,86.44,44.22,45.79,53.03,53.34,56.14,62.13,65.67,44.71,92.31,86.15,94.61,75.76,36.72,50.4,54.43,35.32,43.22,53.07,53.26,32.22,54.3,61.78,64.33,35.25,52.43,54.46,62.5,45.83,
         | 
| 18 | 
            +
            [Llama-3-VILA1.5-8B](https://huggingface.co/Efficient-Large-Model/Llama-3-VILA1.5-8B),VideoLLM,8B,8,58.79,85.89,45.29,47.68,55.64,58.03,56.39,63.64,68.93,47.16,92.9,84.8,94.95,74.75,33.73,52.23,58.72,36.24,44.16,54.38,60.48,31.7,50.0,61.26,66.0,46.75,60.07,61.38,65.28,47.14,
         | 
| 19 | 
            +
            [InternVL2-8B](https://huggingface.co/OpenGVLab/InternVL2-8B),VideoLLM,8B,8,66.03,88.79,48.68,52.64,69.94,70.17,65.57,68.24,77.11,55.39,93.79,84.8,96.63,84.6,43.88,53.24,59.94,38.76,51.1,61.27,67.01,31.19,67.22,70.68,84.0,60.75,71.88,69.2,79.51,63.02,
         | 
| 20 | 
            +
            [InternLM-XComposer-2.5](https://huggingface.co/internlm/internlm-xcomposer2d5-7b),VideoLLM,7B,32,62.1,89.96,46.04,49.45,60.55,63.42,61.33,64.49,72.39,52.05,94.97,84.29,97.98,88.13,42.69,52.02,57.19,33.49,41.64,56.24,60.48,38.14,59.93,64.14,75.33,46.5,66.67,62.28,72.22,55.73,
         | 
| 21 | 
            +
            [LLaVA-OneVision-Qwen-2-7B](https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov),VideoLLM,7B,32,64.22,88.35,45.79,47.62,70.23,69.39,64.81,69.67,73.79,49.9,96.45,85.98,95.96,79.29,40.6,55.26,56.88,30.73,45.43,57.36,61.86,25.26,69.54,76.18,81.33,56.75,72.22,73.21,74.31,59.11,
         | 
| 22 | 
            +
            [MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6),VideoLLM,8B,64,66.26,92.36,49.37,52.25,68.5,68.32,63.04,68.49,76.38,58.48,96.75,87.67,98.99,90.66,42.09,55.67,60.55,39.45,45.11,58.29,64.26,40.72,59.6,65.97,86.33,64.25,71.18,71.65,72.92,58.85,
         | 
    	
        src/compute.py
    CHANGED
    
    | @@ -38,6 +38,7 @@ def compute_scores(merge_file): | |
| 38 | 
             
                dataset_scores_dict = {}
         | 
| 39 | 
             
                total_correct, total_num = 0, 0
         | 
| 40 | 
             
                eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg']
         | 
|  | |
| 41 | 
             
                for dataset_name, dataset_results in merge_data.items():
         | 
| 42 |  | 
| 43 | 
             
                    dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims}
         | 
| @@ -46,14 +47,17 @@ def compute_scores(merge_file): | |
| 46 | 
             
                            for result in dataset_results[id][dim]:
         | 
| 47 | 
             
                                dataset_correct['avg'] += result['rating']
         | 
| 48 | 
             
                                dataset_correct[dim] += result['rating']
         | 
|  | |
| 49 | 
             
                                dataset_num['avg'] += 1
         | 
| 50 | 
             
                                dataset_num[dim] += 1
         | 
|  | |
| 51 |  | 
| 52 | 
             
                    total_correct += dataset_correct['avg']
         | 
| 53 | 
             
                    total_num += dataset_num['avg']
         | 
| 54 | 
             
                    for dim in eval_dims:
         | 
| 55 | 
             
                        dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2)
         | 
| 56 | 
            -
             | 
|  | |
| 57 | 
             
                dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2)
         | 
| 58 |  | 
| 59 | 
             
                # print(dataset_score_dict)
         | 
| @@ -63,14 +67,15 @@ def compute_scores(merge_file): | |
| 63 | 
             
                # ========================
         | 
| 64 | 
             
                data = [
         | 
| 65 |  | 
| 66 | 
            -
                    ["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
         | 
| 67 | 
             
                     "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", 
         | 
| 68 | 
             
                            "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
         | 
| 69 | 
             
                            "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
         | 
| 70 | 
             
                            "Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation",
         | 
| 71 | 
             
                            "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"],
         | 
| 72 |  | 
| 73 | 
            -
                    [dataset_scores_dict["avg_all"], dataset_scores_dict[" | 
|  | |
| 74 | 
             
                     dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'], 
         | 
| 75 | 
             
                     dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'], 
         | 
| 76 | 
             
                     dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'], 
         | 
|  | |
| 38 | 
             
                dataset_scores_dict = {}
         | 
| 39 | 
             
                total_correct, total_num = 0, 0
         | 
| 40 | 
             
                eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg']
         | 
| 41 | 
            +
                dim_correct, dim_total = {dim: 0 for dim in eval_dims if dim!='avg'}, {dim: 0 for dim in eval_dims if dim!='avg'}
         | 
| 42 | 
             
                for dataset_name, dataset_results in merge_data.items():
         | 
| 43 |  | 
| 44 | 
             
                    dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims}
         | 
|  | |
| 47 | 
             
                            for result in dataset_results[id][dim]:
         | 
| 48 | 
             
                                dataset_correct['avg'] += result['rating']
         | 
| 49 | 
             
                                dataset_correct[dim] += result['rating']
         | 
| 50 | 
            +
                                dim_correct[dim] += result['rating']
         | 
| 51 | 
             
                                dataset_num['avg'] += 1
         | 
| 52 | 
             
                                dataset_num[dim] += 1
         | 
| 53 | 
            +
                                dim_total[dim] += 1
         | 
| 54 |  | 
| 55 | 
             
                    total_correct += dataset_correct['avg']
         | 
| 56 | 
             
                    total_num += dataset_num['avg']
         | 
| 57 | 
             
                    for dim in eval_dims:
         | 
| 58 | 
             
                        dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2)
         | 
| 59 | 
            +
                for dim in dim_correct:
         | 
| 60 | 
            +
                    dataset_scores_dict[f"avg_{dim}"] = round(dim_correct[dim] / dim_total[dim] * 100, 2)
         | 
| 61 | 
             
                dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2)
         | 
| 62 |  | 
| 63 | 
             
                # print(dataset_score_dict)
         | 
|  | |
| 67 | 
             
                # ========================
         | 
| 68 | 
             
                data = [
         | 
| 69 |  | 
| 70 | 
            +
                    ["Avg. All", "Avg. Action", "Avg. Direction", "Avg. Speed", "Avg. Event Order", "Avg. Attribute Change", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
         | 
| 71 | 
             
                     "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", 
         | 
| 72 | 
             
                            "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
         | 
| 73 | 
             
                            "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
         | 
| 74 | 
             
                            "Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation",
         | 
| 75 | 
             
                            "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"],
         | 
| 76 |  | 
| 77 | 
            +
                    [dataset_scores_dict["avg_all"], dataset_scores_dict["avg_action"], dataset_scores_dict["avg_direction"], dataset_scores_dict["avg_speed"], dataset_scores_dict["avg_order"], dataset_scores_dict["avg_attribute_change"], 
         | 
| 78 | 
            +
                     dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"],
         | 
| 79 | 
             
                     dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'], 
         | 
| 80 | 
             
                     dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'], 
         | 
| 81 | 
             
                     dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'], 
         |