Spaces:

weizmannscience
/

tokenflow

Running on Zero

App Files Files Community

tokenflow / utils.py

multimodalart HF Staff

revert to working state

88a2ed3 about 2 years ago

raw

history blame

3.97 kB

	from pathlib import Path
	from PIL import Image
	import torch
	import yaml
	import math

	import torchvision.transforms as T
	from torchvision.io import read_video,write_video
	import os
	import random
	import numpy as np
	from torchvision.io import write_video
	# from kornia.filters import joint_bilateral_blur
	from kornia.geometry.transform import remap
	from kornia.utils.grid import create_meshgrid
	import cv2

	def save_video_frames(video_path, img_size=(512,512)):
	video, _, _ = read_video(video_path, output_format="TCHW")
	# rotate video -90 degree if video is .mov format. this is a weird bug in torchvision
	if video_path.endswith('.mov'):
	video = T.functional.rotate(video, -90)
	video_name = Path(video_path).stem
	os.makedirs(f'data/{video_name}', exist_ok=True)
	for i in range(len(video)):
	ind = str(i).zfill(5)
	image = T.ToPILImage()(video[i])
	image_resized = image.resize((img_size), resample=Image.Resampling.LANCZOS)
	image_resized.save(f'data/{video_name}/{ind}.png')

	def video_to_frames(video_path, img_size=(512,512)):
	video, _, _ = read_video(video_path, output_format="TCHW")
	# rotate video -90 degree if video is .mov format. this is a weird bug in torchvision
	if video_path.endswith('.mov'):
	video = T.functional.rotate(video, -90)
	video_name = Path(video_path).stem
	# os.makedirs(f'data/{video_name}', exist_ok=True)
	frames = []
	for i in range(len(video)):
	ind = str(i).zfill(5)
	image = T.ToPILImage()(video[i])
	image_resized = image.resize((img_size), resample=Image.Resampling.LANCZOS)
	# image_resized.save(f'data/{video_name}/{ind}.png')
	frames.append(image_resized)
	return frames

	def add_dict_to_yaml_file(file_path, key, value):
	data = {}

	# If the file already exists, load its contents into the data dictionary
	if os.path.exists(file_path):
	with open(file_path, 'r') as file:
	data = yaml.safe_load(file)

	# Add or update the key-value pair
	data[key] = value

	# Save the data back to the YAML file
	with open(file_path, 'w') as file:
	yaml.dump(data, file)

	def isinstance_str(x: object, cls_name: str):
	"""
	Checks whether x has any class named cls_name in its ancestry.
	Doesn't require access to the class's implementation.

	Useful for patching!
	"""

	for _cls in x.__class__.__mro__:
	if _cls.__name__ == cls_name:
	return True

	return False


	def batch_cosine_sim(x, y):
	if type(x) is list:
	x = torch.cat(x, dim=0)
	if type(y) is list:
	y = torch.cat(y, dim=0)
	x = x / x.norm(dim=-1, keepdim=True)
	y = y / y.norm(dim=-1, keepdim=True)
	similarity = x @ y.T
	return similarity


	def load_imgs(data_path, n_frames, device='cuda', pil=False):
	imgs = []
	pils = []
	for i in range(n_frames):
	img_path = os.path.join(data_path, "%05d.jpg" % i)
	if not os.path.exists(img_path):
	img_path = os.path.join(data_path, "%05d.png" % i)
	img_pil = Image.open(img_path)
	pils.append(img_pil)
	img = T.ToTensor()(img_pil).unsqueeze(0)
	imgs.append(img)
	if pil:
	return torch.cat(imgs).to(device), pils
	return torch.cat(imgs).to(device)


	def save_video(raw_frames, save_path, fps=10):
	video_codec = "libx264"
	video_options = {
	"crf": "18", # Constant Rate Factor (lower value = higher quality, 18 is a good balance)
	"preset": "slow", # Encoding preset (e.g., ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow)
	}

	frames = (raw_frames * 255).to(torch.uint8).cpu().permute(0, 2, 3, 1)
	write_video(save_path, frames, fps=fps, video_codec=video_codec, options=video_options)


	def seed_everything(seed):
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	random.seed(seed)
	np.random.seed(seed)