8000 Update lazylearn.py · frederikhoengaard/lazy-learn@3f83e94 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3f83e94

Browse files
Update lazylearn.py
1 parent 7969384 commit 3f83e94

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

python/src/lazylearn/lazylearn.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import List, Tuple
22

3+
import pandas as pd
34
from ingestion.ingestion_pipeline import Ingestion
45
from model_selection.splitters import ( # noqa
56
test_train_splitter,
@@ -17,7 +18,14 @@
1718

1819
class LazyLearner:
1920
"""
20-
TODO: Introduce LazyLearner
21+
This class implements the LazyLearner, an object that
22+
runs AutoML for regression and classification given a
23+
dataset.
24+
25+
To run an experiment with LazyLearn, instantiate the class,
26+
the add a dataset and configure the project through the
27+
create_project() method. To trigger the modelling process,
28+
call the run_autopilot() method with the desired mode.
2129
"""
2230

2331
def __init__(self, random_state=None):
@@ -47,7 +55,11 @@ def create_project(
4755
:param task: "regression", "classification" or "infer"
4856
:param metric: metric by which to rank models
4957
:param test_size: share of dataset to use for holdout
50-
:param otv_config: out-of-time validation configuration
58+
:param otv_config: out-of-time validation configuration of the form
59+
otv_config = {
60+
"column" = "column_to_time_partition_on",
61+
"holdout_start_date" = "YYYYMMDD"
62+
}
5163
:return:
5264
"""
5365
# ingest data
@@ -72,7 +84,8 @@ def create_project(
7284

7385
if otv_config is not None:
7486
assert (
75-
otv_config["column"] in self.dataset.column_type_map["datetime"] # noqa
87+
otv_config["column"]
88+
in self.dataset.type_collections["datetime"] # noqa
7689
)
7790
self.otv_config = otv_config
7891
self.dataset.df = self.dataset.df.sort_values(
@@ -82,7 +95,7 @@ def create_project(
8295
self.dataset = time_test_train_splitter(
8396
self.dataset,
8497
test_size=test_size,
85-
split_date=otv_config["column"],
98+
split_date=pd.to_datetime(otv_config["holdout_start_date"]),
8699
split_column=otv_config["column"],
87100
) # noqa
88101
else:

0 commit comments

Comments
 (0)
0