8000 FIX remove y labels from X sets · neurodata/scikit-learn@39a6780 · GitHub
[go: up one dir, main page]

Skip to content

Commit 39a6780

Browse files
committed
FIX remove y labels from X sets
1 parent 8a0aaad commit 39a6780

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

examples/linear_model/plot_poisson_regression_non_normal_loss.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,11 @@
145145
("preprocessor", linear_model_preprocessor),
146146
("regressor", DummyRegressor(strategy="mean")),
147147
]
148-
).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"])
148+
).fit(
149+
df_train.drop(columns=["Frequency"]),
150+
df_train["Frequency"],
151+
regressor__sample_weight=df_train["Exposure"],
152+
)
149153

150154

151155
##############################################################################
@@ -159,7 +163,7 @@
159163

160164
def score_estimator(estimator, df_test):
161165
"""Score an estimator on the test set."""
162-
y_pred = estimator.predict(df_test)
166+
y_pred = estimator.predict(df_test.drop(columns=["Frequency"]))
163167

164168
print(
165169
"MSE: %.3f"
@@ -217,7 +221,11 @@ def score_estimator(estimator, df_test):
217221
("preprocessor", linear_model_preprocessor),
218222
("regressor", Ridge(alpha=1e-6)),
219223
]
220-
).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"])
224+
).fit(
225+
df_train.drop(columns=["Frequency"]),
226+
df_train["Frequency"],
227+
regressor__sample_weight=df_train["Exposure"],
228+
)
221229

222230
# %%
223231
# The Poisson deviance cannot be computed on non-positive values predicted by
@@ -249,7 +257,11 @@ def score_estimator(estimator, df_test):
249257
("preprocessor", linear_model_preprocessor),
250258
("regressor", PoissonRegressor(alpha=1e-12, max_iter=300)),
251259
]
252-
).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"])
260+
).fit(
261+
df_train.drop(columns=["Frequency"]),
262+
df_train["Frequency"],
263+
regressor__sample_weight=df_train["Exposure"],
264+
)
253265

254266
print("PoissonRegressor evaluation:")
255267
test_preds.append(score_estimator(poisson_glm, df_test))
@@ -298,7 +310,11 @@ def score_estimator(estimator, df_test):
298310
HistGradientBoostingRegressor(loss="poisson", max_leaf_nodes=128),
299311
),
300312
]
301-
).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"])
313+
).fit(
314+
df_train.drop(columns=["Frequency"]),
315+
df_train["Frequency"],
316+
regressor__sample_weight=df_train["Exposure"],
317+
)
302318

303319
print("Poisson Gradient Boosted Trees evaluation:")
304320
test_preds.append(score_estimator(poisson_gbrt, df_test))
@@ -330,7 +346,7 @@ def score_estimator(estimator, df_test):
330346

331347
for idx, model in enumerate([ridge_glm, poisson_glm, poisson_gbrt]):
332348
if label == "train":
333-
y_pred = model.predict(df)
349+
y_pred = model.predict(df.drop(columns=["Frequency"]))
334350
else:
335351
y_pred = test_preds[idx + 1]
336352

0 commit comments

Comments
 (0)
0