From 126f1a77e164e2afbc398c39ec729ea16d0cf675 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 00:46:08 +0000
Subject: [PATCH 1/7] fix: model.fit metric not collected issue.

---
 bigframes/ml/core.py                       | 1 +
 tests/system/large/ml/test_linear_model.py | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 4bc61c5015..8e789386f1 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -273,6 +273,7 @@ def _create_model_ref(
     def _create_model_with_sql(self, session: bigframes.Session, sql: str) -> BqmlModel:
         # fit the model, synchronously
         _, job = session._start_query_ml_ddl(sql)
+        session._metrics.count_job_stats(job)
 
         # real model path in the session specific hidden dataset and table prefix
         model_name_full = f"{job.destination.project}.{job.destination.dataset_id}.{job.destination.table_id}"
diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index f593ac2983..145dbb462c 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -31,8 +31,14 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase
         ]
     ]
     y_train = df[["body_mass_g"]]
+
+    start_execution_count = df._block._expr.session._metrics.execution_count
+
     model.fit(X_train, y_train)
 
+    end_execution_count = df._block._expr.session._metrics.execution_count
+    assert end_execution_count - start_execution_count == 2
+
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
     utils.check_pandas_df_schema_and_index(

From f343e9e4c5d12a495233a05241a7f10b9c923073 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 01:24:43 +0000
Subject: [PATCH 2/7] update

---
 bigframes/ml/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 8e789386f1..c93bde83d1 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -273,7 +273,8 @@ def _create_model_ref(
     def _create_model_with_sql(self, session: bigframes.Session, sql: str) -> BqmlModel:
         # fit the model, synchronously
         _, job = session._start_query_ml_ddl(sql)
-        session._metrics.count_job_stats(job)
+        if session._metrics is not None:
+            session._metrics.count_job_stats(job)
 
         # real model path in the session specific hidden dataset and table prefix
         model_name_full = f"{job.destination.project}.{job.destination.dataset_id}.{job.destination.table_id}"

From f64eba2a8b283286589e54d733ef95e34271a9bc Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 17:11:50 +0000
Subject: [PATCH 3/7] fix unit test

---
 tests/unit/ml/test_golden_sql.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/ml/test_golden_sql.py b/tests/unit/ml/test_golden_sql.py
index 65f079852e..ce05011546 100644
--- a/tests/unit/ml/test_golden_sql.py
+++ b/tests/unit/ml/test_golden_sql.py
@@ -36,6 +36,7 @@ def mock_session():
         TEMP_MODEL_ID.project, TEMP_MODEL_ID.dataset_id
     )
     mock_session._bq_kms_key_name = None
+    mock_session._metrics = None
 
     query_job = mock.create_autospec(bigquery.QueryJob)
     type(query_job).destination = mock.PropertyMock(

From a10a5475b596a3ddacfdeea5e4a9ed9ab4314f3f Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 20:21:45 +0000
Subject: [PATCH 4/7] update code and test

---
 bigframes/ml/core.py               | 2 --
 bigframes/session/__init__.py      | 9 ++++++++-
 tests/system/small/ml/test_core.py | 6 ++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index c93bde83d1..4bc61c5015 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -273,8 +273,6 @@ def _create_model_ref(
     def _create_model_with_sql(self, session: bigframes.Session, sql: str) -> BqmlModel:
         # fit the model, synchronously
         _, job = session._start_query_ml_ddl(sql)
-        if session._metrics is not None:
-            session._metrics.count_job_stats(job)
 
         # real model path in the session specific hidden dataset and table prefix
         model_name_full = f"{job.destination.project}.{job.destination.dataset_id}.{job.destination.table_id}"
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 072bcc5781..9611d938b1 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1373,7 +1373,14 @@ def _start_query_ml_ddl(
         # https://cloud.google.com/bigquery/docs/customer-managed-encryption#encrypt-model
         job_config.destination_encryption_configuration = None
 
-        return bf_io_bigquery.start_query_with_client(self.bqclient, sql, job_config)
+        results_iterator, query_job = bf_io_bigquery.start_query_with_client(
+            self.bqclient, sql, job_config
+        )
+
+        if self._metrics is not None:
+            self._metrics.count_job_stats(query_job)
+
+        return results_iterator, query_job
 
     def _export(
         self,
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index 6b852e87af..65540e7e81 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -383,8 +383,14 @@ def test_model_forecast(time_series_bqml_arima_plus_model: core.BqmlModel):
 
 def test_model_register(ephemera_penguins_bqml_linear_model: core.BqmlModel):
     model = ephemera_penguins_bqml_linear_model
+
+    start_execution_count = model.session._metrics.execution_count
+
     model.register()
 
+    end_execution_count = model.session._metrics.execution_count
+    assert end_execution_count - start_execution_count == 1
+
     assert model.model.model_id is not None
     model_name = "bigframes_" + model.model.model_id
     # Only registered model contains the field, and the field includes project/dataset. Here only check model_id.

From 8e7937b05c0c55381cded85d4e8b1f75ce55f243 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 21:50:29 +0000
Subject: [PATCH 5/7] update code

---
 bigframes/session/__init__.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 9611d938b1..6cfaf48493 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1373,15 +1373,10 @@ def _start_query_ml_ddl(
         # https://cloud.google.com/bigquery/docs/customer-managed-encryption#encrypt-model
         job_config.destination_encryption_configuration = None
 
-        results_iterator, query_job = bf_io_bigquery.start_query_with_client(
-            self.bqclient, sql, job_config
+        return bf_io_bigquery.start_query_with_client(
+            self.bqclient, sql, job_config, metrics=self._metrics
         )
 
-        if self._metrics is not None:
-            self._metrics.count_job_stats(query_job)
-
-        return results_iterator, query_job
-
     def _export(
         self,
         array_value: core.ArrayValue,

From 70f96b0e11a440e8116dc86d17baf4f58a1567d0 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 22:00:14 +0000
Subject: [PATCH 6/7] update comment

---
 tests/system/large/ml/test_linear_model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index 145dbb462c..6189a69857 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -37,6 +37,8 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase
     model.fit(X_train, y_train)
 
     end_execution_count = df._block._expr.session._metrics.execution_count
+    # The fit function initiates two queries: the first generates and caches
+    # the training data, while the second creates and fits the model.
     assert end_execution_count - start_execution_count == 2
 
     # Check score to ensure the model was fitted

From 0a40d195d2b20973ae022bf3eb9ad9f7a6629a3f Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Tue, 15 Oct 2024 22:11:26 +0000
Subject: [PATCH 7/7] update test

---
 tests/system/large/ml/test_linear_model.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index 6189a69857..5c3f7631d9 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -31,16 +31,8 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase
         ]
     ]
     y_train = df[["body_mass_g"]]
-
-    start_execution_count = df._block._expr.session._metrics.execution_count
-
     model.fit(X_train, y_train)
 
-    end_execution_count = df._block._expr.session._metrics.execution_count
-    # The fit function initiates two queries: the first generates and caches
-    # the training data, while the second creates and fits the model.
-    assert end_execution_count - start_execution_count == 2
-
     # Check score to ensure the model was fitted
     result = model.score(X_train, y_train).to_pandas()
     utils.check_pandas_df_schema_and_index(
@@ -136,10 +128,20 @@ def test_unordered_mode_linear_regression_configure_fit_score_predict(
         ]
     ]
     y_train = df[["body_mass_g"]]
+
+    start_execution_count = df._block._expr.session._metrics.execution_count
     model.fit(X_train, y_train)
+    end_execution_count = df._block._expr.session._metrics.execution_count
+    # The fit function initiates two queries: the first generates and caches
+    # the training data, while the second creates and fits the model.
+    assert end_execution_count - start_execution_count == 2
 
     # Check score to ensure the model was fitted
+    start_execution_count = end_execution_count
     result = model.score(X_train, y_train).to_pandas()
+    end_execution_count = df._block._expr.session._metrics.execution_count
+    assert end_execution_count - start_execution_count == 1
+
     utils.check_pandas_df_schema_and_index(
         result, columns=utils.ML_REGRESSION_METRICS, index=1
     )
@@ -162,7 +164,10 @@ def test_unordered_mode_linear_regression_configure_fit_score_predict(
     assert reloaded_model.max_iterations == 20
     assert reloaded_model.tol == 0.01
 
+    start_execution_count = df._block._expr.session._metrics.execution_count
     pred = reloaded_model.predict(df)
+    end_execution_count = df._block._expr.session._metrics.execution_count
+    assert end_execution_count - start_execution_count == 1
     utils.check_pandas_df_schema_and_index(
         pred,
         columns=("predicted_body_mass_g",),