##relevant packages
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
# feature importance
import eli5
from eli5.sklearn import PermutationImportance
 

## reading in the dataset
data = pd.read_csv('train.csv')
## viewing the columns in the dataset
data.columns
## Index(['time_in_hospital', 'num_lab_procedures', 'num_procedures',
##        'num_medications', 'number_outpatient', 'number_emergency',
##        'number_inpatient', 'number_diagnoses', 'race_Caucasian',
##        'race_AfricanAmerican', 'gender_Female', 'age_[70-80)', 'age_[60-70)',
##        'age_[50-60)', 'age_[80-90)', 'age_[40-50)', 'payer_code_?',
##        'payer_code_MC', 'payer_code_HM', 'payer_code_SP', 'payer_code_BC',
##        'medical_specialty_?', 'medical_specialty_InternalMedicine',
##        'medical_specialty_Emergency/Trauma',
##        'medical_specialty_Family/GeneralPractice',
##        'medical_specialty_Cardiology', 'diag_1_428', 'diag_1_414',
##        'diag_1_786', 'diag_2_276', 'diag_2_428', 'diag_2_250', 'diag_2_427',
##        'diag_3_250', 'diag_3_401', 'diag_3_276', 'diag_3_428',
##        'max_glu_serum_None', 'A1Cresult_None', 'metformin_No',
##        'repaglinide_No', 'nateglinide_No', 'chlorpropamide_No',
##        'glimepiride_No', 'acetohexamide_No', 'glipizide_No', 'glyburide_No',
##        'tolbutamide_No', 'pioglitazone_No', 'rosiglitazone_No', 'acarbose_No',
##        'miglitol_No', 'troglitazone_No', 'tolazamide_No', 'examide_No',
##        'citoglipton_No', 'insulin_No', 'glyburide-metformin_No',
##        'glipizide-metformin_No', 'glimepiride-pioglitazone_No',
##        'metformin-rosiglitazone_No', 'metformin-pioglitazone_No', 'change_No',
##        'diabetesMed_Yes', 'readmitted'],
##       dtype='object')
## model building
y = data.readmitted

base_features = [c for c in data.columns if c != "readmitted"]

X = data[base_features]

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)
my_model = RandomForestClassifier(n_estimators=30, random_state=1).fit(train_X, train_y)
## permutation importance
perm1 = PermutationImportance(my_model, random_state=1).fit(val_X, val_y)
eli5.show_weights(perm1, feature_names = val_X.columns.tolist())
Weight Feature
0.0451 ± 0.0068 number_inpatient
0.0087 ± 0.0046 number_emergency
0.0062 ± 0.0053 number_outpatient
0.0033 ± 0.0016 payer_code_MC
0.0020 ± 0.0016 diag_3_401
0.0016 ± 0.0031 medical_specialty_Emergency/Trauma
0.0014 ± 0.0024 A1Cresult_None
0.0014 ± 0.0021 medical_specialty_Family/GeneralPractice
0.0013 ± 0.0010 diag_2_427
0.0013 ± 0.0011 diag_2_276
0.0011 ± 0.0022 age_[50-60)
0.0010 ± 0.0022 age_[80-90)
0.0007 ± 0.0006 repaglinide_No
0.0006 ± 0.0010 diag_1_428
0.0006 ± 0.0022 payer_code_SP
0.0005 ± 0.0030 insulin_No
0.0004 ± 0.0028 diabetesMed_Yes
0.0004 ± 0.0021 diag_3_250
0.0003 ± 0.0018 diag_2_250
0.0003 ± 0.0015 glipizide_No
… 44 more …