Spaces:
Runtime error
Runtime error
| from configs import CFG | |
| import os | |
| import requests | |
| import zipfile | |
| from pycocotools.coco import COCO | |
| import torch | |
| import cv2 | |
| import albumentations as A | |
| import soundfile as sf | |
| # Load Coco dataset | |
| def download_dataset(data_dir="../datasets"): | |
| # Create caption and image directories | |
| annotations_dir = os.path.join(data_dir, "annotations") | |
| images_dir = os.path.join(data_dir, "train2014") | |
| # Download annotations (captions) | |
| zip_file = os.path.join(annotations_dir, "annotations.zip") | |
| url = "http://images.cocodataset.org/annotations/annotations_trainval2014.zip" | |
| response = requests.get(url, stream=True) | |
| # write chunk in zip file | |
| with open(zip_file, "wb") as f: | |
| # 8192 = 8KB chunks (block or piece of data) | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| # unzip file | |
| with zipfile.ZipFile(zip_file, "r") as zip_ref: | |
| zip_ref.extractall(data_dir) # Extract all contents to the specified directory | |
| os.remove(zip_file) | |
| # Download train images | |
| zip_file = os.path.join(images_dir, "train2014.zip") | |
| url = "http://images.cocodataset.org/zips/train2014.zip" | |
| response = requests.get(url, stream=True) | |
| # write chunk in zip file | |
| with open(zip_file, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| # unzip file | |
| with zipfile.ZipFile(zip_file, "r") as zip_ref: | |
| zip_ref.extractall(data_dir) # Extract all contents to the specified directory | |
| os.remove(zip_file) | |
| # Download val images | |
| images_dir = os.path.join(data_dir, "val2014") | |
| zip_file = os.path.join(images_dir, "val2014.zip") | |
| url = "http://images.cocodataset.org/zips/val2014.zip" | |
| response = requests.get(url, stream=True) | |
| # write chunk in zip file | |
| with open(zip_file, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| # unzip file | |
| with zipfile.ZipFile(zip_file, "r") as zip_ref: | |
| zip_ref.extractall(data_dir) # Extract all contents to the specified directory | |
| os.remove(zip_file) | |
| def make_pairs(annotation_json_files, image_dir, max_captions=3): | |
| images = os.listdir(annotation_json_files) | |
| image_caption = [(os.path.join(annotation_json_files, image), "an image") for image in images] | |
| return image_caption | |