|
145 | 145 | ("preprocessor", linear_model_preprocessor),
|
146 | 146 | ("regressor", DummyRegressor(strategy="mean")),
|
147 | 147 | ]
|
148 |
| -).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"]) |
| 148 | +).fit( |
| 149 | + df_train.drop(columns=["Frequency"]), |
| 150 | + df_train["Frequency"], |
| 151 | + regressor__sample_weight=df_train["Exposure"], |
| 152 | +) |
149 | 153 |
|
150 | 154 |
|
151 | 155 | ##############################################################################
|
|
159 | 163 |
|
160 | 164 | def score_estimator(estimator, df_test):
|
161 | 165 | """Score an estimator on the test set."""
|
162 |
| - y_pred = estimator.predict(df_test) |
| 166 | + y_pred = estimator.predict(df_test.drop(columns=["Frequency"])) |
163 | 167 |
|
164 | 168 | print(
|
165 | 169 | "MSE: %.3f"
|
@@ -217,7 +221,11 @@ def score_estimator(estimator, df_test):
|
217 | 221 | ("preprocessor", linear_model_preprocessor),
|
218 | 222 | ("regressor", Ridge(alpha=1e-6)),
|
219 | 223 | ]
|
220 |
| -).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"]) |
| 224 | +).fit( |
| 225 | + df_train.drop(columns=["Frequency"]), |
| 226 | + df_train["Frequency"], |
| 227 | + regressor__sample_weight=df_train["Exposure"], |
| 228 | +) |
221 | 229 |
|
222 | 230 | # %%
|
223 | 231 | # The Poisson deviance cannot be computed on non-positive values predicted by
|
@@ -249,7 +257,11 @@ def score_estimator(estimator, df_test):
|
249 | 257 | ("preprocessor", linear_model_preprocessor),
|
250 | 258 | ("regressor", PoissonRegressor(alpha=1e-12, max_iter=300)),
|
251 | 259 | ]
|
252 |
| -).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"]) |
| 260 | +).fit( |
| 261 | + df_train.drop(columns=["Frequency"]), |
| 262 | + df_train["Frequency"], |
| 263 | + regressor__sample_weight=df_train["Exposure"], |
| 264 | +) |
253 | 265 |
|
254 | 266 | print("PoissonRegressor evaluation:")
|
255 | 267 | test_preds.append(score_estimator(poisson_glm, df_test))
|
@@ -298,7 +310,11 @@ def score_estimator(estimator, df_test):
|
298 | 310 | HistGradientBoostingRegressor(loss="poisson", max_leaf_nodes=128),
|
299 | 311 | ),
|
300 | 312 | ]
|
301 |
| -).fit(df_train, df_train["Frequency"], regressor__sample_weight=df_train["Exposure"]) |
| 313 | +).fit( |
| 314 | + df_train.drop(columns=["Frequency"]), |
| 315 | + df_train["Frequency"], |
| 316 | + regressor__sample_weight=df_train["Exposure"], |
| 317 | +) |
302 | 318 |
|
303 | 319 | print("Poisson Gradient Boosted Trees evaluation:")
|
304 | 320 | test_preds.append(score_estimator(poisson_gbrt, df_test))
|
@@ -330,7 +346,7 @@ def score_estimator(estimator, df_test):
|
330 | 346 |
|
331 | 347 | for idx, model in enumerate([ridge_glm, poisson_glm, poisson_gbrt]):
|
332 | 348 | if label == "train":
|
333 |
| - y_pred = model.predict(df) |
| 349 | + y_pred = model.predict(df.drop(columns=["Frequency"])) |
334 | 350 | else:
|
335 | 351 | y_pred = test_preds[idx + 1]
|
336 | 352 |
|
|
0 commit comments