| """ | |
| will do this when I need to. | |
| Is it absolutely necessary to keep track of idioms separately? | |
| """ | |
| import os | |
| import wandb | |
| from idiomify.fetchers import fetch_literal2idiomatic, fetch_config | |
| from idiomify.paths import ROOT_DIR | |
| def main(): | |
| config = fetch_config()['idioms'] | |
| train_df, _ = fetch_literal2idiomatic(config['ver']) | |
| idioms = train_df['Idiom'].tolist() | |
| idioms = list(set(idioms)) | |
| with wandb.init(entity="eubinecto", project="idiomify") as run: | |
| # the paths to write datasets in | |
| txt_path = ROOT_DIR / "all.txt" | |
| with open(txt_path, 'w') as fh: | |
| for idiom in idioms: | |
| fh.write(idiom + "\n") | |
| artifact = wandb.Artifact(name="idioms", type="dataset", description=config['description'], | |
| metadata=config) | |
| artifact.add_file(txt_path) | |
| # then, we just log them here. | |
| run.log_artifact(artifact, aliases=["latest", config['ver']]) | |
| # don't forget to remove them | |
| os.remove(txt_path) | |
| if __name__ == '__main__': | |
| main() | |