8000 Add metrics, update readme · frederikhoengaard/lazy-learn@5c1293f · GitHub
[go: up one dir, main page]

Skip to content

Commit 5c1293f

Browse files
Add metrics, update readme
1 parent b319646 commit 5c1293f

File tree

6 files changed

+58
-7
lines changed

6 files changed

+58
-7
lines changed

README.md

+21-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,27 @@ Current stable version is 0.0.3. The upcoming updates will support:
1919

2020
## Usage
2121

22-
Using lazy-learn revolves around the `LazyLearner` class. You can think of it as a kind of project, and it is the wrapper for any experiment within lazy-learn.
22+
Using lazy-learn revolves around the `LazyLearner` class. You can think of it as a kind of project, and it is the wrapper for any experiment within lazy-learn. You can consider a simple example with the California Housing dataset:
23+
24+
```python
25+
from lazylearn import LazyLearner
26+
from sklearn.datasets import fetch_california_housing
27+
28+
29+
# get some data
30+
data = fetch_california_housing(as_frame=True)
31+
df = data["data"]
32+
df["MedHouseVal"] = data["target"]
33+
34+
# instantiate and run the LazyLearner
35+
learner = LazyLearner()
36+
learner.create_project(data=df, target="MedHouseVal")
37+
learner.run_autopilot()
38+
39+
# evaluate results
40+
print(learner.leaderboard())
41+
42+
```
2343

2444
## Installation
2545

python/src/lazylearn/lazylearn.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ def __init__(self, random_state=None):
1616
self._leaderboard = None
1717
self.random_state = random_state
1818
self.target = None
19+
self.metric = None
1920

20-
def create_project(self, data, target, task="infer"):
21+
def create_project(self, data, target, task="infer", metric="default"):
2122
# ingest data
2223
self.target = target
2324
self.dataset = Ingestion().run(data)
@@ -26,6 +27,8 @@ def create_project(self, data, target, task="infer"):
2627
# if target is numeric then regression, else classification
2728
if self.dataset.column_type_map[target] == "numeric":
2829
self.task = "regression"
30+
if metric == "default":
31+
self.metric = "mse"
2932
else:
3033
self.task = "classification"
3134

@@ -57,7 +60,7 @@ def run_autopilot(self):
5760
random_state=self.random_state,
5861
)
5962
self._leaderboard = sorted(
60-
[model for model in sb.models], key=lambda x: x.score
63+
[model for model in sb.models], key=lambda x: x.score[self.metric]
6164
)
6265

6366
def leaderboard(self):

python/src/lazylearn/regression/models/xgboost/xgb_regressor_steps/hpo_step.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ def __init__(self, n_splits=5, random_state=None):
99
self.random_state = random_state
1010
self.param_grid = {
1111
"max_depth": [3, 4, 5, 6, 7, 8, 9, 10],
12-
"learning_rate": [0.001, 0.01, 0.1, 0.2, 0.3],
12+
"learning_rate": [0.001, 0.01, 0.1, 0.2, 0.3, 0.5],
1313
"subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
1414
"colsample_bytree": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
1515
"colsample_bylevel": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
1616
"min_child_weight": [0.5, 1.0, 3.0, 5.0, 7.0, 10.0],
1717
"gamma": [0, 0.25, 0.5, 1.0],
18-
"n_estimators": [100, 200, 300, 500, 1000],
18+
"n_estimators": [100, 200, 300, 500, 1000, 2500],
1919
}
2020

2121
def fit(self, pipeline: RegressionPipeline):

python/src/lazylearn/strategies/strategy_builder.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
RandomForestRegressionRunner,
44
)
55
from regression.models.xgboost.xgb import XGBRegressionRunner
6-
from sklearn.metrics import mean_absolute_error
6+
from strategies.strategy_steps.evaluation import Evaluator
77

88

99
class StrategyBuilder:
@@ -44,7 +44,8 @@ def start(self):
4444

4545
# get holdout scores
4646
strategy.predict(self.dataset.partitions["test"].copy())
47-
strategy.pipeline.holdout_score = mean_absolute_error(
47+
strategy.pipeline.holdout_score = Evaluator().evaluate(
48+
self.task,
4849
self.dataset.partitions["test"][self.target],
4950
strategy.pipeline.tmp_pred,
5051
)

python/src/lazylearn/strategies/strategy_steps/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from sklearn.metrics import (
2+
accuracy_score,
3+
f1_score,
4+
log_loss,
5+
mean_absolute_error,
6+
mean_absolute_percentage_error,
7+
mean_squared_error,
8+
)
9+
10+
11+
class Evaluator:
12+
def __init__(self):
13+
self.metrics = {
14+
"regression": [
15+
("mae", mean_absolute_error),
16+
("mse", mean_squared_error),
17+
("mape", mean_absolute_percentage_error),
18+
],
19+
"classification": [
20+
("accuracy", accuracy_score),
21+
("f1", f1_score),
22+
("logloss", log_loss),
23+
],
24+
}
25+
26+
def evaluate(self, task, y_pred, y_true):
27+
return {name: func(y_true, y_pred) for name, func in self.metrics[task]}

0 commit comments

Comments
 (0)
0