1
1
from typing import List , Tuple
2
2
3
+ import pandas as pd
3
4
from ingestion .ingestion_pipeline import Ingestion
4
5
from model_selection .splitters import ( # noqa
5
6
test_train_splitter ,
17
18
18
19
class LazyLearner :
19
20
"""
20
- TODO: Introduce LazyLearner
21
+ This class implements the LazyLearner, an object that
22
+ runs AutoML for regression and classification given a
23
+ dataset.
24
+
25
+ To run an experiment with LazyLearn, instantiate the class,
26
+ the add a dataset and configure the project through the
27
+ create_project() method. To trigger the modelling process,
28
+ call the run_autopilot() method with the desired mode.
21
29
"""
22
30
23
31
def __init__ (self , random_state = None ):
@@ -47,7 +55,11 @@ def create_project(
47
55
:param task: "regression", "classification" or "infer"
48
56
:param metric: metric by which to rank models
49
57
:param test_size: share of dataset to use for holdout
50
- :param otv_config: out-of-time validation configuration
58
+ :param otv_config: out-of-time validation configuration of the form
59
+ otv_config = {
60
+ "column" = "column_to_time_partition_on",
61
+ "holdout_start_date" = "YYYYMMDD"
62
+ }
51
63
:return:
52
64
"""
53
65
# ingest data
@@ -72,7 +84,8 @@ def create_project(
72
84
73
85
if otv_config is not None :
74
86
assert (
75
- otv_config ["column" ] in self .dataset .column_type_map ["datetime" ] # noqa
87
+ otv_config ["column" ]
88
+ in self .dataset .type_collections ["datetime" ] # noqa
76
89
)
77
90
self .otv_config = otv_config
78
91
self .dataset .df = self .dataset .df .sort_values (
@@ -82,7 +95,7 @@ def create_project(
82
95
self .dataset = time_test_train_splitter (
83
96
self .dataset ,
84
97
test_size = test_size ,
85
- split_date = otv_config ["column" ] ,
98
+ split_date = pd . to_datetime ( otv_config ["holdout_start_date" ]) ,
86
99
split_column = otv_config ["column" ],
87
100
) # noqa
88
101
else :
0 commit comments