| import numpy as np | |
| import pandas as pd | |
| from scipy.cluster.vq import kmeans2 | |
| if __name__ == "__main__": | |
| np.random.seed(0) | |
| df = pd.read_csv("raw/geyser.csv") | |
| df.columns = ["duration", "waiting"] | |
| _, z = kmeans2(df, 2) | |
| df["kind"] = np.where(z, "long", "short") | |
| df.to_csv("geyser.csv", index=False) | |