8000 leaderboard implemented · frederikhoengaard/lazy-learn@3a414a4 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3a414a4

Browse files
leaderboard implemented
1 parent d06f261 commit 3a414a4

File tree

5 files changed

+56
-20
lines changed

5 files changed

+56
-20
lines changed

Pipfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ verify_ssl = true
66
[packages]
77
loguru = "==0.6.*"
88
pandas = "==1.5.*"
9-
scikit-learn = "*"
9+
scikit-learn = "1.2.2"
1010
tqdm = "*"
1111
jupyter = "*"
12-
xgboost = "*"
12+
xgboost = "1.7.*"
1313

1414
[dev-packages]
1515
black = "==23.*"

python/src/lazylearn/lazylearn.py

+5-14
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from regression.models.randomforest.randomforest import ( # noqa
66
RandomForestRegressionRunner,
77
)
8-
from sklearn.metrics import mean_absolute_error
8+
from strategies.strategy_builder import StrategyBuilder
99

1010

1111
class LazyLearner:
@@ -50,18 +50,9 @@ def run_autopilot(self):
5050
5151
:return:
5252
"""
53+
sb = StrategyBuilder(task=self.task, dataset=self.dataset, target=self.target, random_state=self.random_state)
54+
self.leaderboard = sorted([model for model in sb.models], key=lambda x: x.score)
5355

54-
simple_random_forest = RandomForestRegressionRunner(
55-
target=self.target,
56-
dataset=self.dataset,
57-
random_state=self.random_state, # noqa
58-
)
59-
simple_random_forest.fit()
56+
def get_leaderboard(self):
57+
return [(item.name, item.score) for item in self.leaderboard]
6058

61-
# get holdout scores
62-
simple_random_forest.predict(self.dataset.partitions["test"])
63-
simple_random_forest.pipeline.holdout_score = mean_absolute_error(
64-
self.dataset.partitions["test"][self.target],
65-
simple_random_forest.pipeline.tmp_pred,
66-
)
67-
return simple_random_forest

python/src/lazylearn/models/models.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ def save(self):
2222

2323

2424
class Model:
25-
def __init__(self):
26-
self.name = None
25+
def __init__(self, name: str, score: float, pipeline):
26+
self.name = name
27+
self.score = score
28+
self.pipeline = pipeline
2729

2830
def save(self, path: str):
2931
raise NotImplementedError

python/src/lazylearn/regression/models/randomforest/randomforest.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88

99
class RandomForestRegressionRunner:
10-
def __init__(self, target, dataset, random_state=42):
10+
def __init__(self, target, dataset, random_state=None):
11+
self.name = "RandomForestRegressor"
1112
self.target = target
1213
self.dataset: Dataset = dataset
1314
self.random_state = random_state
@@ -31,7 +32,7 @@ def fit(self):
3132

3233
self.pipeline.add(OrdinalConverter(cat_vars=cat_vars))
3334

34-
self.pipeline.add(RandomForestRegressorStep())
35+
self.pipeline.add(RandomForestRegressorStep(random_state=self.random_state))
3536

3637
self.pipeline.fit()
3738

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from sklearn.metrics import mean_absolute_error
2+
3+
from models.models import Model
4+
from regression.models.randomforest.randomforest import RandomForestRegressionRunner
5+
6+
class StrategyBuilder:
7+
def __init__(self, task, dataset, target, random_state=None):
8+
self.task = task
9+
self.dataset = dataset
10+
self.target = target
11+
self.random_state = random_state
12+
self.strategies = []
13+
self.models = []
14+
15+
self.build()
16+
self.start()
17+
18+
def build(self):
19+
if self.task == "regression":
20+
self.strategies.append(
21+
RandomForestRegressionRunner(
22+
target=self.target,
23+
dataset=self.dataset,
24+
random_state=self.random_state, # noqa
25+
)
26+
)
27+
else:
28+
raise ValueError("Unsupported task!")
29+
30+
def start(self):
31+
for strategy in self.strategies:
32+
strategy.fit()
33+
34+
# get holdout scores
35+
strategy.predict(self.dataset.partitions["test"])
36+
strategy.pipeline.holdout_score = mean_absolute_error(
37+
self.dataset.partitions["test"][self.target],
38+
strategy.pipeline.tmp_pred,
39+
)
40+
41+
self.models.append(Model(name=strategy.name, score=strategy.pipeline.holdout_score, pipeline=strategy))
42+

0 commit comments

Comments
 (0)
0