Spaces:

eubinecto
/

idiomify

Runtime error

idiomify / main_upload_idioms.py

[#5] idioms:d-1-3 is done. Added multiple senses of the same idiom as a list

370afc1 over 3 years ago

1.06 kB

	"""
	will do this when I need to.
	Is it absolutely necessary to keep track of idioms separately?
	"""
	import os
	import wandb
	from idiomify.fetchers import fetch_literal2idiomatic, fetch_config
	from idiomify.paths import ROOT_DIR


	def main():
	config = fetch_config()['idioms']
	train_df, _ = fetch_literal2idiomatic(config['ver'])
	idioms_df = train_df[['Idiom', "Sense"]]
	idioms_df = idioms_df.groupby('Idiom').agg({'Sense': lambda x: list(set(x))})

	with wandb.init(entity="eubinecto", project="idiomify") as run:
	# the paths to write datasets in
	tsv_path = ROOT_DIR / "all.tsv"
	idioms_df.to_csv(tsv_path, sep="\t")
	artifact = wandb.Artifact(name="idioms", type="dataset", description=config['description'],
	metadata=config)
	artifact.add_file(tsv_path)
	# then, we just log them here.
	run.log_artifact(artifact, aliases=["latest", config['ver']])
	# don't forget to remove them
	os.remove(tsv_path)


	if __name__ == '__main__':
	main()