diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
index 90df6f9539..658818b261 100644
--- a/bigframes/ml/metrics/_metrics.py
+++ b/bigframes/ml/metrics/_metrics.py
@@ -25,7 +25,6 @@
 import bigframes_vendored.sklearn.metrics._regression as vendored_metrics_regression
 import numpy as np
 import pandas as pd
-import sklearn.metrics as sklearn_metrics  # type: ignore
 
 from bigframes.ml import utils
 import bigframes.pandas as bpd
@@ -176,9 +175,9 @@ def auc(
 ) -> float:
     x_series, y_series = utils.batch_convert_to_series(x, y)
 
-    # TODO(b/286410053) Support ML exceptions and error handling.
-    auc = sklearn_metrics.auc(x_series.to_pandas(), y_series.to_pandas())
-    return auc
+    x_pandas = x_series.to_pandas()
+    y_pandas = y_series.to_pandas()
+    return vendored_metrics_ranking.auc(x_pandas, y_pandas)
 
 
 auc.__doc__ = inspect.getdoc(vendored_metrics_ranking.auc)
diff --git a/noxfile.py b/noxfile.py
index b851bf160d..bffb6ebaa0 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -72,7 +72,9 @@
 UNIT_TEST_LOCAL_DEPENDENCIES: List[str] = []
 UNIT_TEST_DEPENDENCIES: List[str] = []
 UNIT_TEST_EXTRAS: List[str] = []
-UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {"3.12": ["polars"]}
+UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
+    "3.12": ["polars", "scikit-learn"],
+}
 
 # 3.10 is needed for Windows tests as it is the only version installed in the
 # bigframes-windows container image. For more information, search
@@ -96,8 +98,13 @@
 ]
 SYSTEM_TEST_LOCAL_DEPENDENCIES: List[str] = []
 SYSTEM_TEST_DEPENDENCIES: List[str] = []
-SYSTEM_TEST_EXTRAS: List[str] = ["tests"]
-SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}
+SYSTEM_TEST_EXTRAS: List[str] = []
+SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
+    "3.9": ["tests"],
+    "3.10": ["tests"],
+    "3.12": ["tests", "scikit-learn"],
+    "3.13": ["tests"],
+}
 
 LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME"
 
@@ -468,8 +475,7 @@ def cover(session):
 @nox.session(python=DEFAULT_PYTHON_VERSION)
 def docs(session):
     """Build the docs for this library."""
-
-    session.install("-e", ".")
+    session.install("-e", ".[scikit-learn]")
     session.install(
         # We need to pin to specific versions of the `sphinxcontrib-*` packages
         # which still support sphinx 4.x.
@@ -510,7 +516,7 @@ def docs(session):
 def docfx(session):
     """Build the docfx yaml files for this library."""
 
-    session.install("-e", ".")
+    session.install("-e", ".[scikit-learn]")
     session.install(
         # We need to pin to specific versions of the `sphinxcontrib-*` packages
         # which still support sphinx 4.x.
@@ -652,6 +658,8 @@ def prerelease(session: nox.sessions.Session, tests_path, extra_pytest_options=(
         if match.group(1) not in already_installed
     ]
 
+    print(already_installed)
+
     # We use --no-deps to ensure that pre-release versions aren't overwritten
     # by the version ranges in setup.py.
     session.install(*deps)
diff --git a/scripts/test_publish_api_coverage.py b/scripts/test_publish_api_coverage.py
index 034a266177..6dea10b608 100644
--- a/scripts/test_publish_api_coverage.py
+++ b/scripts/test_publish_api_coverage.py
@@ -19,6 +19,8 @@
 
 from . import publish_api_coverage
 
+pytest.importorskip("sklearn")
+
 
 @pytest.fixture
 def api_coverage_df():
diff --git a/setup.py b/setup.py
index 4386177a5e..1f6114b634 100644
--- a/setup.py
+++ b/setup.py
@@ -55,8 +55,6 @@
     "pyarrow >=10.0.1",
     "pydata-google-auth >=1.8.2",
     "requests >=2.27.1",
-    "scikit-learn >=1.2.2",
-    "sqlalchemy >=1.4,<3.0dev",
     "sqlglot >=23.6.3",
     "tabulate >=0.9",
     "ipywidgets >=7.7.1",
@@ -77,8 +75,15 @@
     "tests": [],
     # used for local engine, which is only needed for unit tests at present.
     "polars": ["polars >= 1.7.0"],
+    "scikit-learn": ["scikit-learn>=1.2.2"],
     # Packages required for basic development flow.
-    "dev": ["pytest", "pytest-mock", "pre-commit", "nox", "google-cloud-testutils"],
+    "dev": [
+        "pytest",
+        "pytest-mock",
+        "pre-commit",
+        "nox",
+        "google-cloud-testutils",
+    ],
 }
 extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values()))))
 
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index 8b7ad892c0..30d5c1c3a7 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -20,7 +20,6 @@ pyarrow==10.0.1
 pydata-google-auth==1.8.2
 requests==2.27.1
 scikit-learn==1.2.2
-sqlalchemy==1.4
 sqlglot==23.6.3
 tabulate==0.9
 ipywidgets==7.7.1
diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py
index 81e1b2f77f..b80202bdbe 100644
--- a/tests/system/small/ml/test_metrics.py
+++ b/tests/system/small/ml/test_metrics.py
@@ -17,7 +17,6 @@
 import numpy as np
 import pandas as pd
 import pytest
-import sklearn.metrics as sklearn_metrics  # type: ignore
 
 import bigframes
 from bigframes.ml import metrics
@@ -66,6 +65,7 @@ def test_r2_score_force_finite(session):
 
 
 def test_r2_score_ok_fit_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame({"y_true": [1, 2, 3, 4, 5], "y_pred": [2, 3, 4, 3, 6]})
 
     df = session.read_pandas(pd_df)
@@ -113,6 +113,7 @@ def test_accuracy_score_not_normailze(session):
 
 
 def test_accuracy_score_fit_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame({"y_true": [1, 2, 3, 4, 5], "y_pred": [2, 3, 4, 3, 6]})
 
     df = session.read_pandas(pd_df)
@@ -203,6 +204,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
 
 
 def test_roc_curve_binary_classification_prediction_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": [0, 0, 1, 1, 0, 1, 0, 1, 1, 1],
@@ -294,6 +296,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
 
 
 def test_roc_curve_binary_classification_decision_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     # Instead of operating on probabilities, assume a 70% decision threshold
     # has been applied, and operate on the final output
     y_score = [0.1, 0.4, 0.35, 0.8, 0.65, 0.9, 0.5, 0.3, 0.6, 0.45]
@@ -420,6 +423,7 @@ def test_roc_auc_score_returns_expected(session):
 
 
 def test_roc_auc_score_returns_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": [0, 0, 1, 1, 0, 1, 0, 1, 1, 1],
@@ -525,6 +529,7 @@ def test_confusion_matrix_column_index(session):
 
 
 def test_confusion_matrix_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": [2, 3, 3, 3, 4, 1],
@@ -543,6 +548,7 @@ def test_confusion_matrix_matches_sklearn(session):
 
 
 def test_confusion_matrix_str_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
@@ -603,6 +609,7 @@ def test_recall_score(session):
 
 
 def test_recall_score_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": [2, 0, 2, 2, 0, 1],
@@ -620,6 +627,7 @@ def test_recall_score_matches_sklearn(session):
 
 
 def test_recall_score_str_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
@@ -673,6 +681,7 @@ def test_precision_score(session):
 
 
 def test_precision_score_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": [2, 0, 2, 2, 0, 1],
@@ -695,6 +704,7 @@ def test_precision_score_matches_sklearn(session):
 
 
 def test_precision_score_str_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
@@ -752,6 +762,7 @@ def test_f1_score(session):
 
 
 def test_f1_score_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": [2, 0, 2, 2, 0, 1],
@@ -769,6 +780,7 @@ def test_f1_score_matches_sklearn(session):
 
 
 def test_f1_score_str_matches_sklearn(session):
+    sklearn_metrics = pytest.importorskip("sklearn.metrics")
     pd_df = pd.DataFrame(
         {
             "y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 00f47c754e..2daa7dd825 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -641,6 +641,8 @@ def test_series_replace_dict(scalars_dfs, replacement_dict):
     ),
 )
 def test_series_interpolate(method):
+    pytest.importorskip("scipy")
+
     values = [None, 1, 2, None, None, 16, None]
     index = [-3.2, 11.4, 3.56, 4, 4.32, 5.55, 76.8]
     pd_series = pd.Series(values, index)
diff --git a/tests/unit/ml/test_api_primitives.py b/tests/unit/ml/test_api_primitives.py
index 00a51ccfe9..dd2ceff143 100644
--- a/tests/unit/ml/test_api_primitives.py
+++ b/tests/unit/ml/test_api_primitives.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 
 import pytest
-import sklearn.decomposition as sklearn_decomposition  # type: ignore
-import sklearn.linear_model as sklearn_linear_model  # type: ignore
 
 import bigframes.ml.decomposition
 import bigframes.ml.linear_model
@@ -35,8 +33,9 @@ def test_base_estimator_repr():
     assert pca_estimator.__repr__() == "PCA(n_components=7)"
 
 
-@pytest.mark.skipif(sklearn_linear_model is None, reason="requires sklearn")
 def test_base_estimator_repr_matches_sklearn():
+    sklearn_decomposition = pytest.importorskip("sklearn.decomposition")
+    sklearn_linear_model = pytest.importorskip("sklearn.linear_model")
     estimator = bigframes.ml.linear_model.LinearRegression()
     sklearn_estimator = sklearn_linear_model.LinearRegression()
     assert estimator.__repr__() == sklearn_estimator.__repr__()
diff --git a/tests/unit/ml/test_compose.py b/tests/unit/ml/test_compose.py
index 395296f3e4..450ce8d6ee 100644
--- a/tests/unit/ml/test_compose.py
+++ b/tests/unit/ml/test_compose.py
@@ -15,8 +15,6 @@
 
 from google.cloud import bigquery
 import pytest
-import sklearn.compose as sklearn_compose  # type: ignore
-import sklearn.preprocessing as sklearn_preprocessing  # type: ignore
 
 from bigframes.ml import compose, preprocessing
 from bigframes.ml.compose import ColumnTransformer, SQLScalarColumnTransformer
@@ -119,6 +117,8 @@ def test_columntransformer_repr():
 
 
 def test_columntransformer_repr_matches_sklearn():
+    sklearn_compose = pytest.importorskip("sklearn.compose")
+    sklearn_preprocessing = pytest.importorskip("sklearn.preprocessing")
     bf_column_transformer = compose.ColumnTransformer(
         [
             (
diff --git a/tests/unit/ml/test_pipeline.py b/tests/unit/ml/test_pipeline.py
index ed5c621b1d..beebb9f282 100644
--- a/tests/unit/ml/test_pipeline.py
+++ b/tests/unit/ml/test_pipeline.py
@@ -13,10 +13,6 @@
 # limitations under the License.
 
 import pytest
-import sklearn.compose as sklearn_compose  # type: ignore
-import sklearn.linear_model as sklearn_linear_model  # type: ignore
-import sklearn.pipeline as sklearn_pipeline  # type: ignore
-import sklearn.preprocessing as sklearn_preprocessing  # type: ignore
 
 from bigframes.ml import compose, forecasting, linear_model, pipeline, preprocessing
 
@@ -57,8 +53,11 @@ def test_pipeline_repr():
     )
 
 
-@pytest.mark.skipif(sklearn_pipeline is None, reason="requires sklearn")
 def test_pipeline_repr_matches_sklearn():
+    sklearn_compose = pytest.importorskip("sklearn.compose")
+    sklearn_linear_model = pytest.importorskip("sklearn.linear_model")
+    sklearn_pipeline = pytest.importorskip("sklearn.pipeline")
+    sklearn_preprocessing = pytest.importorskip("sklearn.preprocessing")
     bf_pl = pipeline.Pipeline(
         [
             (
diff --git a/third_party/bigframes_vendored/sklearn/metrics/_ranking.py b/third_party/bigframes_vendored/sklearn/metrics/_ranking.py
index 7b97526de2..9262ffbd3d 100644
--- a/third_party/bigframes_vendored/sklearn/metrics/_ranking.py
+++ b/third_party/bigframes_vendored/sklearn/metrics/_ranking.py
@@ -16,6 +16,8 @@
 #          Michal Karbownik <michakarbownik@gmail.com>
 # License: BSD 3 clause
 
+import numpy as np
+
 from bigframes import constants
 
 
@@ -60,7 +62,23 @@ def auc(x, y) -> float:
     Returns:
         float: Area Under the Curve.
     """
-    raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+    if len(x) < 2:
+        raise ValueError(
+            f"At least 2 points are needed to compute area under curve, but x.shape = {len(x)}"
+        )
+
+    if x.is_monotonic_decreasing:
+        d = -1
+    elif x.is_monotonic_increasing:
+        d = 1
+    else:
+        raise ValueError(f"x is neither increasing nor decreasing : {x}.")
+
+    if hasattr(np, "trapezoid"):
+        # new in numpy 2.0
+        return d * np.trapezoid(y, x)
+    # np.trapz has been deprecated in 2.0
+    return d * np.trapz(y, x)  # type: ignore
 
 
 def roc_auc_score(y_true, y_score) -> float: