Spaces:
Runtime error
Runtime error
| import datasets | |
| import pandas as pd | |
| from tqdm.notebook import tqdm | |
| import json | |
| import os | |
| def load_dataset(dataset_name, **kwargs): | |
| if dataset_name == "Stocknet": | |
| root_path = r"../../../stocknet-dataset/tweet/raw" | |
| stock_lists = os.listdir(root_path) | |
| all = pd.DataFrame() | |
| for stock in tqdm(stock_lists, desc="Loading Stocknet dataset..."): | |
| stock_path = os.path.join(root_path, stock) | |
| date_files = os.listdir(stock_path) | |
| for date in date_files: | |
| with open(os.path.join(stock_path, date_files[0])) as f: | |
| json_list = f.readlines() | |
| tmp_json = [] | |
| for json_str in json_list: | |
| tmp_json.append(json.loads(json_str)) | |
| tmp_json = pd.DataFrame(tmp_json) | |
| all = pd.concat([all, tmp_json], axis=0) | |
| all = all.reset_index(drop=True) | |
| all = datasets.Dataset.from_pandas(all) | |
| return all | |
| else: | |
| raise NotImplementedError("Only support Stocknet dataset for now") | |