Spaces:
Runtime error
Runtime error
| # Script to bootstrap a dataset | |
| # Bootstraping a dataset requires first running an SQL script in BigQuery and downloading the json. | |
| # A dataset will then be created on the Hub with the initial data. This script will go step-by-step over the creation. | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from datasets import Dataset | |
| from huggingface_hub import dataset_info | |
| # Define the library name you'd like to work on. | |
| from huggingface_hub.utils import RepositoryNotFoundError | |
| library_name = input("Library name: ") | |
| current_dir = Path(__file__).parent | |
| if f"{library_name}.csv" not in os.listdir(current_dir / 'csv_files'): | |
| query = Path(current_dir / 'query.sql').read_text().replace("<PROJECT_NAME>", library_name) | |
| print("Open the following link: https://console.cloud.google.com/bigquery?project=huggingface-ml\n") | |
| print(f"Run the following query:\n\n{query}\n\n") | |
| print("Press paste the results here (Ctrl+D once pasted): ") | |
| csv_values = sys.stdin.read() | |
| Path(current_dir / f'csv_files/{library_name}.csv').write_text(csv_values) | |
| try: | |
| dataset_info(f'open-source-metrics/{library_name}-pip-installs') | |
| dataset_exists = True | |
| except RepositoryNotFoundError: | |
| dataset_exists = False | |
| dataset = Dataset.from_csv(str(current_dir / f'csv_files/{library_name}.csv'), delimiter='\t') | |
| dataset.push_to_hub(f'open-source-metrics/{library_name}-pip-installs', private=True) | |
| print("Dataset is pushed to Hub.") | |