| import numpy as np | |
| import pandas as pd | |
| def main(): | |
| raw_data = "raw/titanic.csv" | |
| df = pd.read_csv(raw_data) | |
| df["class"] = df.pclass.map({1: "First", 2: "Second", 3: "Third"}) | |
| df["who"] = df[["age", "sex"]].apply(woman_child_or_man, axis=1) | |
| df["adult_male"] = df.who == "man" | |
| df["deck"] = df.cabin.str[0].map(lambda s: np.nan if s == "T" else s) | |
| df["embark_town"] = df.embarked.map({"C": "Cherbourg", "Q": "Queenstown", "S": "Southampton"}) | |
| df["alive"] = df.survived.map({0: "no", 1: "yes"}) | |
| df["alone"] = ~(df.parch + df.sibsp).astype(bool) | |
| df = df.drop(["name", "ticket", "cabin"], axis=1) | |
| df.to_csv("titanic.csv", index=False) | |
| def woman_child_or_man(passenger): | |
| age, sex = passenger | |
| if age < 16: | |
| return "child" | |
| else: | |
| return dict(male="man", female="woman")[sex] | |
| if __name__ == "__main__": | |
| main() | |