Spaces:
Sleeping
Sleeping
| import ast | |
| import argparse | |
| import gc | |
| import os | |
| from contextlib import contextmanager | |
| from pathlib import Path | |
| import cv2 | |
| import numpy as np | |
| import pandas as pd | |
| from joblib import Parallel, delayed | |
| from natsort import natsorted | |
| from tqdm import tqdm | |
| from utils.logger import logger | |
| from utils.video_utils import get_video_path_list | |
| def VideoCapture(video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| try: | |
| yield cap | |
| finally: | |
| cap.release() | |
| del cap | |
| gc.collect() | |
| def compute_motion_score(video_path): | |
| video_motion_scores = [] | |
| sampling_fps = 2 | |
| try: | |
| with VideoCapture(video_path) as cap: | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| valid_fps = min(max(sampling_fps, 1), fps) | |
| frame_interval = int(fps / valid_fps) | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| # if cannot get the second frame, use the last one | |
| frame_interval = min(frame_interval, total_frames - 1) | |
| prev_frame = None | |
| frame_count = -1 | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| frame_count += 1 | |
| if not ret: | |
| break | |
| # skip middle frames | |
| if frame_count % frame_interval != 0: | |
| continue | |
| gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
| if prev_frame is None: | |
| prev_frame = gray_frame | |
| continue | |
| flow = cv2.calcOpticalFlowFarneback( | |
| prev_frame, | |
| gray_frame, | |
| None, | |
| pyr_scale=0.5, | |
| levels=3, | |
| winsize=15, | |
| iterations=3, | |
| poly_n=5, | |
| poly_sigma=1.2, | |
| flags=0, | |
| ) | |
| mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1]) | |
| frame_motion_score = np.mean(mag) | |
| video_motion_scores.append(frame_motion_score) | |
| prev_frame = gray_frame | |
| video_meta_info = { | |
| "video_path": Path(video_path).name, | |
| "motion_score": round(float(np.mean(video_motion_scores)), 5), | |
| } | |
| return video_meta_info | |
| except Exception as e: | |
| print(f"Compute motion score for video {video_path} with error: {e}.") | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description="Compute the motion score of the videos.") | |
| parser.add_argument("--video_folder", type=str, default="", help="The video folder.") | |
| parser.add_argument( | |
| "--video_metadata_path", type=str, default=None, help="The path to the video dataset metadata (csv/jsonl)." | |
| ) | |
| parser.add_argument( | |
| "--video_path_column", | |
| type=str, | |
| default="video_path", | |
| help="The column contains the video path (an absolute path or a relative path w.r.t the video_folder).", | |
| ) | |
| parser.add_argument("--saved_path", type=str, required=True, help="The save path to the output results (csv/jsonl).") | |
| parser.add_argument("--saved_freq", type=int, default=100, help="The frequency to save the output results.") | |
| parser.add_argument("--n_jobs", type=int, default=1, help="The number of concurrent processes.") | |
| parser.add_argument( | |
| "--asethetic_score_metadata_path", type=str, default=None, help="The path to the video quality metadata (csv/jsonl)." | |
| ) | |
| parser.add_argument("--asethetic_score_threshold", type=float, default=4.0, help="The asethetic score threshold.") | |
| parser.add_argument( | |
| "--text_score_metadata_path", type=str, default=None, help="The path to the video text score metadata (csv/jsonl)." | |
| ) | |
| parser.add_argument("--text_score_threshold", type=float, default=0.02, help="The text threshold.") | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = parse_args() | |
| video_path_list = get_video_path_list( | |
| video_folder=args.video_folder, | |
| video_metadata_path=args.video_metadata_path, | |
| video_path_column=args.video_path_column | |
| ) | |
| if not (args.saved_path.endswith(".csv") or args.saved_path.endswith(".jsonl")): | |
| raise ValueError("The saved_path must end with .csv or .jsonl.") | |
| if os.path.exists(args.saved_path): | |
| if args.saved_path.endswith(".csv"): | |
| saved_metadata_df = pd.read_csv(args.saved_path) | |
| elif args.saved_path.endswith(".jsonl"): | |
| saved_metadata_df = pd.read_json(args.saved_path, lines=True) | |
| saved_video_path_list = saved_metadata_df[args.video_path_column].tolist() | |
| saved_video_path_list = [os.path.join(args.video_folder, video_path) for video_path in saved_video_path_list] | |
| video_path_list = list(set(video_path_list).difference(set(saved_video_path_list))) | |
| # Sorting to guarantee the same result for each process. | |
| video_path_list = natsorted(video_path_list) | |
| logger.info(f"Resume from {args.saved_path}: {len(saved_video_path_list)} processed and {len(video_path_list)} to be processed.") | |
| if args.asethetic_score_metadata_path is not None: | |
| if args.asethetic_score_metadata_path.endswith(".csv"): | |
| asethetic_score_df = pd.read_csv(args.asethetic_score_metadata_path) | |
| elif args.asethetic_score_metadata_path.endswith(".jsonl"): | |
| asethetic_score_df = pd.read_json(args.asethetic_score_metadata_path, lines=True) | |
| # In pandas, csv will save lists as strings, whereas jsonl will not. | |
| asethetic_score_df["aesthetic_score"] = asethetic_score_df["aesthetic_score"].apply( | |
| lambda x: ast.literal_eval(x) if isinstance(x, str) else x | |
| ) | |
| asethetic_score_df["aesthetic_score_mean"] = asethetic_score_df["aesthetic_score"].apply(lambda x: sum(x) / len(x)) | |
| filtered_asethetic_score_df = asethetic_score_df[asethetic_score_df["aesthetic_score_mean"] < args.asethetic_score_threshold] | |
| filtered_video_path_list = filtered_asethetic_score_df[args.video_path_column].tolist() | |
| filtered_video_path_list = [os.path.join(args.video_folder, video_path) for video_path in filtered_video_path_list] | |
| video_path_list = list(set(video_path_list).difference(set(filtered_video_path_list))) | |
| # Sorting to guarantee the same result for each process. | |
| video_path_list = natsorted(video_path_list) | |
| logger.info(f"Load {args.asethetic_score_metadata_path} and filter {len(filtered_video_path_list)} videos.") | |
| if args.text_score_metadata_path is not None: | |
| if args.text_score_metadata_path.endswith(".csv"): | |
| text_score_df = pd.read_csv(args.text_score_metadata_path) | |
| elif args.text_score_metadata_path.endswith(".jsonl"): | |
| text_score_df = pd.read_json(args.text_score_metadata_path, lines=True) | |
| filtered_text_score_df = text_score_df[text_score_df["text_score"] > args.text_score_threshold] | |
| filtered_video_path_list = filtered_text_score_df[args.video_path_column].tolist() | |
| filtered_video_path_list = [os.path.join(args.video_folder, video_path) for video_path in filtered_video_path_list] | |
| video_path_list = list(set(video_path_list).difference(set(filtered_video_path_list))) | |
| # Sorting to guarantee the same result for each process. | |
| video_path_list = natsorted(video_path_list) | |
| logger.info(f"Load {args.text_score_metadata_path} and filter {len(filtered_video_path_list)} videos.") | |
| for i in tqdm(range(0, len(video_path_list), args.saved_freq)): | |
| result_list = Parallel(n_jobs=args.n_jobs, backend="threading")( | |
| delayed(compute_motion_score)(video_path) for video_path in tqdm(video_path_list[i: i + args.saved_freq]) | |
| ) | |
| result_list = [result for result in result_list if result is not None] | |
| if len(result_list) == 0: | |
| continue | |
| result_df = pd.DataFrame(result_list) | |
| if args.saved_path.endswith(".csv"): | |
| header = False if os.path.exists(args.saved_path) else True | |
| result_df.to_csv(args.saved_path, header=header, index=False, mode="a") | |
| elif args.saved_path.endswith(".jsonl"): | |
| result_df.to_json(args.saved_path, orient="records", lines=True, mode="a") | |
| logger.info(f"Save result to {args.saved_path}.") | |
| if __name__ == "__main__": | |
| main() |