From a50b0c693b8a1f9abe6dd00b215a5b6f905063fb Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 17 Dec 2024 19:37:01 +0000 Subject: [PATCH 1/4] merge main --- ..._single_timeseries_forecasting_model_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index 0c694de2dc..6b0457381b 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -37,3 +37,20 @@ def test_create_single_timeseries() -> None: total_visits.plot.line() # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial] + + # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] + from bigframes.ml import forecasting + import bigframes.pandas as bpd + + model = forecasting.ARIMAPlus() + model.auto_arima = True + model.data_frequency = "auto_frequency" + model.decompose_time_series = True + + X = total_visits + y = parsed_date + + model.fit(X, y) + # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] + assert total_visits is not None + assert model is not None From 33d094044de260f334325d61b8867bb40032be2d Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 17 Dec 2024 19:37:01 +0000 Subject: [PATCH 2/4] merge main --- ..._single_timeseries_forecasting_model_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index 0c694de2dc..6b0457381b 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -37,3 +37,20 @@ def test_create_single_timeseries() -> None: total_visits.plot.line() # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial] + + # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] + from bigframes.ml import forecasting + import bigframes.pandas as bpd + + model = forecasting.ARIMAPlus() + model.auto_arima = True + model.data_frequency = "auto_frequency" + model.decompose_time_series = True + + X = total_visits + y = parsed_date + + model.fit(X, y) + # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] + assert total_visits is not None + assert model is not None From 389944faa3c8f5c188935666c71e5e2306a5edf0 Mon Sep 17 00:00:00 2001 From: Daniela Date: Wed, 18 Dec 2024 00:22:32 +0000 Subject: [PATCH 3/4] comments added - draft --- ...create_single_timeseries_forecasting_model_test.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index 6b0457381b..4317210ec2 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -42,15 +42,24 @@ def test_create_single_timeseries() -> None: from bigframes.ml import forecasting import bigframes.pandas as bpd + # Create a time series model to forecast total site visits: + # The auto_arima option defaults to True, so the auto.ARIMA algorithm automatically + # tunes the hyperparameters in the model. + # The data_frequency option defaults to 'auto_frequency so the training + # process automatically infers the data frequency of the input time series. + # The decompose_time_series option defaults to True, so that information about + # the time series data is returned when you evaluate the model in the next step. model = forecasting.ARIMAPlus() model.auto_arima = True model.data_frequency = "auto_frequency" model.decompose_time_series = True + # Use the data loaded in the previous step to fit the model X = total_visits y = parsed_date model.fit(X, y) # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] - assert total_visits is not None assert model is not None + assert parsed_date is not None + assert total_visits is not None From 9fe68aebaccae705f44996fc406c75b972240b51 Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 19 Dec 2024 20:19:49 +0000 Subject: [PATCH 4/4] fix test --- ...reate_single_timeseries_forecasting_model_test.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index 4317210ec2..0e69eba3dd 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -22,17 +22,19 @@ def test_create_single_timeseries() -> None: # Read and visualize the time series you want to forecast. df = bpd.read_gbq("bigquery-public-data.google_analytics_sample.ga_sessions_*") parsed_date = bpd.to_datetime(df.date, format="%Y%m%d", utc=True) + parsed_date.name = "parsed_date" visits = df["totals"].struct.field("visits") + visits.name = "total_visits" total_visits = visits.groupby(parsed_date).sum() # Expected output: total_visits.head() - # date + # parsed_date # 2016-08-01 00:00:00+00:00 1711 # 2016-08-02 00:00:00+00:00 2140 # 2016-08-03 00:00:00+00:00 2890 # 2016-08-04 00:00:00+00:00 3161 # 2016-08-05 00:00:00+00:00 2702 - # Name: visits, dtype: Int64 + # Name: total_visits, dtype: Int64 total_visits.plot.line() @@ -55,8 +57,10 @@ def test_create_single_timeseries() -> None: model.decompose_time_series = True # Use the data loaded in the previous step to fit the model - X = total_visits - y = parsed_date + training_data = total_visits.to_frame().reset_index(drop=False) + + X = training_data[["parsed_date"]] + y = training_data[["total_visits"]] model.fit(X, y) # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create]