Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import sys | |
| import pandas as pd | |
| import os | |
| import json | |
| import shutil | |
| import zipfile | |
| import uuid | |
| import requests | |
| TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp') | |
| metric_scale = { | |
| 'human_face_similarity': 1.5, | |
| 'clip_score': 0.02, | |
| 'env_clip': 1.5, | |
| 'other_subject_clip': 1.5, | |
| 'image_quality': 1.0, | |
| 'dynamic_degree': lambda x: 0.1 if x < 0.885 else (0.95 if x < 0.95 and x > 0.88 else 1.0), | |
| 'aesthetic_quality': 1.0, | |
| 'motion_smoothness': 1.0, | |
| } | |
| class ModelResult: | |
| def __init__(self, data): | |
| self.name = data['model_name'] | |
| self.project_link = data.get('project_link', None) | |
| self.result = data['result'] | |
| def to_dict(self): | |
| if self.project_link is not None: | |
| res = { | |
| 'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>', | |
| } | |
| else: | |
| res = { | |
| 'model_name': self.name, | |
| } | |
| total_score = [] | |
| for metric in self.result.keys(): | |
| res[metric] = round(float(self.result[metric]) - 1e-3, 4) | |
| if metric == 'dynamic_degree': | |
| score = metric_scale[metric](self.result[metric]) * self.result[metric] | |
| else: | |
| score = self.result[metric] * metric_scale[metric] | |
| total_score.append(score) | |
| total_score = sum(total_score) / len(total_score) | |
| res['comprehensive score'] = round(total_score, 4) | |
| return res | |
| def eval_request(model_name, org_link, huggingface_data_set_name): | |
| params = { | |
| "model_name": model_name, | |
| "org_link": org_link, | |
| "huggingface_data_set_name": huggingface_data_set_name | |
| } | |
| response = requests.post( | |
| "http://47.239.99.255/A2Bench_evaluation/eval", | |
| params=params, # 使用json参数自动设置Content-Type为application/json | |
| headers={"Content-Type": "application/json"} | |
| ) | |
| return response.json() | |
| def evaluation(model_name, org_link, huggingface_data_set_name): | |
| try: | |
| if org_link=="": | |
| org_link = None | |
| eval_request(model_name, org_link, huggingface_data_set_name) | |
| return "Evaluation completed successfully!" | |
| except Exception as e: | |
| raise gr.Error(f"Evaluation failed: {str(e)}") | |
| def load_leaderboard(): | |
| leaderboard_list = [] | |
| file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard") | |
| for file in file_list.json(): | |
| leaderboard_list.append(ModelResult(file)) | |
| return leaderboard_list | |
| HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness'] | |
| def display_table(): | |
| leaderboard_list = load_leaderboard() | |
| data = {} | |
| for metric in HEADER: | |
| data[metric] = [] | |
| for model_result in leaderboard_list: | |
| result_dict = model_result.to_dict() | |
| for metric in HEADER: | |
| data[metric].append(result_dict[metric]) | |
| df = pd.DataFrame(data) | |
| df = df.sort_values(by='comprehensive score', ascending=False) | |
| return df | |
| _HEADER_1 = ''' | |
| <div style="text-align: center; max-width: 650px; margin: 0 auto;"> | |
| <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1> | |
| <p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co/Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p> | |
| </div> | |
| ❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️ | |
| This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models. | |
| We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes: | |
| <ul style="font-size: 0.9rem; margin-top: -0.5rem;"> | |
| <li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li> | |
| <li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li> | |
| <li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li> | |
| <li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li> | |
| </ul> | |
| </p> | |
| ''' | |
| img = ''' | |
| <div style="text-align: center; margin: 1rem 0;"> | |
| <h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3> | |
| <div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;"> | |
| <img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);"> | |
| <img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);"> | |
| <img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);"> | |
| </div> | |
| <p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p> | |
| </div> | |
| ''' | |
| __HEADER__2 = ''' | |
| We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models. | |
| Evaluation metric include: | |
| - Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model. | |
| - Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness. | |
| - T2V Metrics: Measures text-video consistency using CLIP | |
| You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details. | |
| The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights. | |
| You can click the model name to visit the project page, At meantime, you can upload your model result as a huggingface dataset like [this](https://huggingface.co/datasets/ColinYK/pika_dataset). | |
| ''' # noqa E501 | |
| _CITE_ = r""" | |
| If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks! | |
| --- | |
| 📧 **Contact** | |
| If you have any questions or feedbacks, feel free to open a discussion or contact <b>yikun.dou@kunlun-inc.com</b>. | |
| """ # noqa E501 | |
| def upload_file(files): | |
| target_dir = os.path.join(TEMP_DIR, f'{files.name}') | |
| os.makedirs(target_dir, exist_ok=True) | |
| shutil.move(files.name, target_dir) | |
| return target_dir | |
| with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}") as demo: | |
| gr.Markdown(_HEADER_1) | |
| gr.HTML(img) | |
| gr.Markdown(__HEADER__2) | |
| with gr.Group(): | |
| table = gr.DataFrame( | |
| value=display_table(), | |
| datatype=['markdown', 'str'], | |
| interactive=False, # 允许编辑 | |
| headers=HEADER, | |
| ) | |
| Refresh = gr.Button("Refresh") | |
| Refresh.click(display_table, outputs=table) | |
| with gr.Group(): | |
| with gr.Row(): | |
| model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name") | |
| org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard", value=None) | |
| huggingface_data_set_name = gr.Textbox(label="Huggingface Data Set Name", placeholder="Required :Enter huggingface dataset set name, will show on leaderboard") | |
| evaluation_btn = gr.Button("Evaluation") | |
| output_message = gr.Textbox(label="Evaluation Status", interactive=False) | |
| evaluation_btn.click( | |
| evaluation, | |
| inputs=[model_name_input, org_link_input, huggingface_data_set_name], | |
| outputs=output_message, | |
| api_name="evaluate", | |
| ) | |
| gr.Markdown(_CITE_) | |
| if __name__ == "__main__": | |
| demo.launch() |