Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| import tensorflow_decision_forests as tfdf | |
| import tensorflow as tf | |
| from tensorflow import keras | |
| from tensorflow.keras import layers | |
| import pandas as pd | |
| import gradio as gr | |
| import urllib | |
| input_path = "https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census-income" | |
| input_column_header = "income_level" | |
| #Load data | |
| BASE_PATH = input_path | |
| CSV_HEADER = [ l.decode("utf-8").split(":")[0].replace(" ", "_") | |
| for l in urllib.request.urlopen(f"{BASE_PATH}.names") | |
| if not l.startswith(b"|")][2:] | |
| CSV_HEADER.append(input_column_header) | |
| train_data = pd.read_csv(f"{BASE_PATH}.data.gz", header=None, names=CSV_HEADER) | |
| test_data = pd.read_csv(f"{BASE_PATH}.test.gz", header=None, names=CSV_HEADER) | |
| #subset data | |
| train_data = train_data.loc[:, ["education", "sex", "capital_gains", "capital_losses", "income_level"]] | |
| test_data = test_data.loc[:, ["education", "sex", "capital_gains", "capital_losses", "income_level"]] | |
| def encode_df(df): | |
| sex_mapping = {" Male": 0, " Female": 1} | |
| df = df.replace({"sex": sex_mapping}) | |
| education_mapping = {" High school graduate": 1, " Some college but no degree": 2, | |
| " 10th grade": 3, " Children": 4, " Bachelors degree(BA AB BS)": 5, | |
| " Masters degree(MA MS MEng MEd MSW MBA)": 6, " Less than 1st grade": 7, | |
| " Associates degree-academic program": 8, " 7th and 8th grade": 9, | |
| " 12th grade no diploma": 10, " Associates degree-occup /vocational": 11, | |
| " Prof school degree (MD DDS DVM LLB JD)": 12, " 5th or 6th grade": 13, | |
| " 11th grade": 14, " Doctorate degree(PhD EdD)": 15, " 9th grade": 16, | |
| " 1st 2nd 3rd or 4th grade": 17} | |
| df = df.replace({"education": education_mapping}) | |
| income_mapping = {' - 50000.': 0, ' 50000+.': 1} | |
| df = df.replace({"income_level": income_mapping}) | |
| return df | |
| train_data = encode_df(train_data) | |
| test_data = encode_df(test_data) | |
| feature_a = tfdf.keras.FeatureUsage(name="education", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| feature_b = tfdf.keras.FeatureUsage(name="sex", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| feature_c = tfdf.keras.FeatureUsage(name="capital_gains", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| feature_d = tfdf.keras.FeatureUsage(name="capital_losses", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| # Convert the dataset into a TensorFlow dataset. | |
| train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_data, label="income_level") | |
| test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_data, label="income_level") | |
| # Train a GB Trees model | |
| model = tfdf.keras.GradientBoostedTreesModel( | |
| features = [feature_a, feature_b, feature_c, feature_d], | |
| exclude_non_specified_features = True, | |
| growing_strategy = "BEST_FIRST_GLOBAL", | |
| num_trees = 350, | |
| max_depth = 7, | |
| min_examples = 6, | |
| subsample = 0.65, | |
| sampling_method = "GOSS", | |
| validation_ratio = 0.1, | |
| task = tfdf.keras.Task.CLASSIFICATION, | |
| loss = "DEFAULT", | |
| verbose=0) | |
| model.compile(metrics=[keras.metrics.BinaryAccuracy(name="accuracy")]) | |
| model.fit(train_ds) | |
| model.evaluate(test_ds, verbose=0) | |
| #prepare user input for the model | |
| def process_inputs(education, sex, capital_gains, capital_losses): | |
| df = pd.DataFrame.from_dict( | |
| { | |
| "education": [edu_in], | |
| "sex": [sex_in], | |
| "capital_gains": [cap_gains_in], | |
| "capital_losses": [cap_losses_in] | |
| } | |
| ) | |
| df = encode_df(df) | |
| feature_a = tfdf.keras.FeatureUsage(name="education", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| feature_b = tfdf.keras.FeatureUsage(name="sex", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| feature_c = tfdf.keras.FeatureUsage(name="capital_gains", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| feature_d = tfdf.keras.FeatureUsage(name="capital_losses", semantic=tfdf.keras.FeatureSemantic.CATEGORICAL) | |
| df = tfdf.keras.pd_dataframe_to_tf_dataset(df) | |
| pred = model.predict(df) | |
| if pred > .5: | |
| pred_bi = 1 | |
| return {"> $50,000": pred_bi} | |
| elif pred <=.5: | |
| pred_bi = 0 | |
| return {"<= $50,000": pred_bi} | |
| iface = gr.Interface( | |
| process_inputs, | |
| [ | |
| gr.inputs.Dropdown([" 1st 2nd 3rd or 4th grade", " High school graduate", | |
| " Bachelors degree(BA AB BS)", " Masters degree(MA MS MEng MEd MSW MBA)", | |
| " Prof school degree (MD DDS DVM LLB JD)", | |
| " Doctorate degree(PhD EdD)"], type="index", label="education"), | |
| gr.inputs.Radio([" Male", " Female"], label="sex", type="index"), | |
| gr.inputs.Slider(minimum = 0, maximum = 99999, label="capital_gains"), | |
| gr.inputs.Slider(minimum = 0, maximum = 4608, label="capital_losses") | |
| ], | |
| gr.outputs.Label(num_top_classes=2), | |
| live=True, | |
| analytics_enabled=False | |
| ) | |