| """ | |
| will do this when I need to. | |
| Is it absolutely necessary to keep track of idioms separately? | |
| """ | |
| import os | |
| import wandb | |
| from idiomify.fetchers import fetch_literal2idiomatic, fetch_config | |
| from idiomify.paths import ROOT_DIR | |
| def main(): | |
| config = fetch_config()['idioms'] | |
| train_df, _ = fetch_literal2idiomatic(config['ver']) | |
| idioms_df = train_df[['Idiom', "Sense"]] | |
| idioms_df = idioms_df.groupby('Idiom').agg({'Sense': lambda x: list(set(x))}) | |
| with wandb.init(entity="eubinecto", project="idiomify") as run: | |
| # the paths to write datasets in | |
| tsv_path = ROOT_DIR / "all.tsv" | |
| idioms_df.to_csv(tsv_path, sep="\t") | |
| artifact = wandb.Artifact(name="idioms", type="dataset", description=config['description'], | |
| metadata=config) | |
| artifact.add_file(tsv_path) | |
| # then, we just log them here. | |
| run.log_artifact(artifact, aliases=["latest", config['ver']]) | |
| # don't forget to remove them | |
| os.remove(tsv_path) | |
| if __name__ == '__main__': | |
| main() | |