From 4f209fc1896fd1675af1bc3d514eb43a64a0b00e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 31 Aug 2018 12:35:38 -0400 Subject: [PATCH] simplifying column transformer example by removing default values / making use of sparse threshold --- examples/compose/plot_column_transformer_mixed_types.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 64f1a3c88d3d1..73ee27f83a907 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -61,13 +61,12 @@ categorical_features = ['embarked', 'sex', 'pclass'] categorical_transformer = Pipeline(steps=[ ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), - ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))]) + ('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( transformers=[ ('num', numeric_transformer, numeric_features), - ('cat', categorical_transformer, categorical_features)], - remainder='drop') + ('cat', categorical_transformer, categorical_features)]) # Append classifier to preprocessing pipeline. # Now we have a full prediction pipeline. @@ -77,8 +76,7 @@ X = data.drop('survived', axis=1) y = data['survived'] -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, - shuffle=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) clf.fit(X_train, y_train) print("model score: %.3f" % clf.score(X_test, y_test))