From ea05a6132fd607a6f4b80d3782909d144d8d8c00 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 2 Aug 2018 16:45:47 -0400 Subject: [PATCH 1/3] Used latex in ElasticNet doc --- sklearn/linear_model/coordinate_descent.py | 47 +++++++++++++--------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 6c0a74ae4a719..f6afae5ebf3a0 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -273,21 +273,25 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, The elastic net optimization function varies for mono and multi-outputs. - For mono-output tasks it is:: + For mono-output tasks it is: - 1 / (2 * n_samples) * ||y - Xw||^2_2 - + alpha * l1_ratio * ||w||_1 - + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 + .. math:: - For multi-output tasks it is:: + \\frac{1}{2n_{samples}} ||X w - y||_2 ^ 2 + \\alpha \\rho + ||w||_1 + \\frac{\\alpha(1-\\rho)}{2} ||w||_2 ^ 2 - (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 - + alpha * l1_ratio * ||W||_21 - + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2 + For multi-output tasks it is: - Where:: + .. math:: - ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2} + \\frac{1}{2n_{samples}} ||X W - y||_{Fro}^2 + \\alpha \\rho + ||W||_{21} + \\frac{\\alpha(1-\\rho)}{2} ||W||_{Fro} ^ 2 + + Where: + + .. math:: + + ||W||_{21} = \\sum_i \\sqrt{\\sum_j w_{ij}^2} i.e. the sum of norm of each row. @@ -510,20 +514,27 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, class ElasticNet(LinearModel, RegressorMixin): """Linear regression with combined L1 and L2 priors as regularizer. - Minimizes the objective function:: + Minimizes the objective function: + + .. math:: - 1 / (2 * n_samples) * ||y - Xw||^2_2 - + alpha * l1_ratio * ||w||_1 - + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 + \\min_{w} { \\frac{1}{2n_{samples}} ||X w - y||_2 ^ 2 + \\alpha \\rho + ||w||_1 + \\frac{\\alpha(1-\\rho)}{2} ||w||_2 ^ 2} + + where :math:`\\rho` corresponds to the ``l1_ratio`` parameter. If you are interested in controlling the L1 and L2 penalty - separately, keep in mind that this is equivalent to:: + separately, keep in mind that this is equivalent to: + + .. math:: + + a \\times L1 + b \\times L2 - a * L1 + b * L2 + where: - where:: + .. math:: - alpha = a + b and l1_ratio = a / (a + b) + \\alpha = a + b ~\\text{ and l1_ratio }~ = \\frac{a}{a + b} The parameter l1_ratio corresponds to alpha in the glmnet R package while alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio From 0f5438dff2530f5e35327fbfda36bb2b4999b3c7 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 3 Aug 2018 11:25:53 -0400 Subject: [PATCH 2/3] Revert "Used latex in ElasticNet doc" This reverts commit ea05a6132fd607a6f4b80d3782909d144d8d8c00. --- sklearn/linear_model/coordinate_descent.py | 47 +++++++++------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index f6afae5ebf3a0..6c0a74ae4a719 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -273,25 +273,21 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, The elastic net optimization function varies for mono and multi-outputs. - For mono-output tasks it is: - - .. math:: - - \\frac{1}{2n_{samples}} ||X w - y||_2 ^ 2 + \\alpha \\rho - ||w||_1 + \\frac{\\alpha(1-\\rho)}{2} ||w||_2 ^ 2 - - For multi-output tasks it is: + For mono-output tasks it is:: - .. math:: + 1 / (2 * n_samples) * ||y - Xw||^2_2 + + alpha * l1_ratio * ||w||_1 + + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 - \\frac{1}{2n_{samples}} ||X W - y||_{Fro}^2 + \\alpha \\rho - ||W||_{21} + \\frac{\\alpha(1-\\rho)}{2} ||W||_{Fro} ^ 2 + For multi-output tasks it is:: - Where: + (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + + alpha * l1_ratio * ||W||_21 + + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2 - .. math:: + Where:: - ||W||_{21} = \\sum_i \\sqrt{\\sum_j w_{ij}^2} + ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2} i.e. the sum of norm of each row. @@ -514,27 +510,20 @@ def enet_path(X, y, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None, class ElasticNet(LinearModel, RegressorMixin): """Linear regression with combined L1 and L2 priors as regularizer. - Minimizes the objective function: - - .. math:: + Minimizes the objective function:: - \\min_{w} { \\frac{1}{2n_{samples}} ||X w - y||_2 ^ 2 + \\alpha \\rho - ||w||_1 + \\frac{\\alpha(1-\\rho)}{2} ||w||_2 ^ 2} - - where :math:`\\rho` corresponds to the ``l1_ratio`` parameter. + 1 / (2 * n_samples) * ||y - Xw||^2_2 + + alpha * l1_ratio * ||w||_1 + + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 If you are interested in controlling the L1 and L2 penalty - separately, keep in mind that this is equivalent to: - - .. math:: - - a \\times L1 + b \\times L2 + separately, keep in mind that this is equivalent to:: - where: + a * L1 + b * L2 - .. math:: + where:: - \\alpha = a + b ~\\text{ and l1_ratio }~ = \\frac{a}{a + b} + alpha = a + b and l1_ratio = a / (a + b) The parameter l1_ratio corresponds to alpha in the glmnet R package while alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio From 6bc33ea7ede7e7a94e73ce4dd4c4142e8810146a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 3 Aug 2018 11:28:06 -0400 Subject: [PATCH 3/3] Minor doc changes to coordinate_descent.py Removed pseudo latex notation and fixed Fro norm indication --- sklearn/linear_model/coordinate_descent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 6c0a74ae4a719..5048402935a5b 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -1612,13 +1612,13 @@ class MultiTaskElasticNet(Lasso): The optimization objective for MultiTaskElasticNet is:: - (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + (1 / (2 * n_samples)) * ||Y - XW||_Fro^2 + alpha * l1_ratio * ||W||_21 + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2 Where:: - ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2} + ||W||_21 = sum_i sqrt(sum_j w_ij ^ 2) i.e. the sum of norm of each row.