From 7c54edc6310b18c55bf4f61b7ab61aab49bfd392 Mon Sep 17 00:00:00 2001 From: Jigna Panchal Date: Sat, 2 Nov 2019 14:19:42 -0700 Subject: [PATCH 1/6] alphabetized and added attributes to naive_bayes.py --- sklearn/naive_bayes.py | 121 +++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 64 deletions(-) diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index be9450d7334f0..03f669b9ee7e8 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -139,26 +139,23 @@ class GaussianNB(BaseNB): Attributes ---------- - class_prior_ : array, shape (n_classes,) - probability of each class. - class_count_ : array, shape (n_classes,) number of training samples observed in each class. + + class_prior_ : array, shape (n_classes,) + probability of each class. classes_ : array, shape (n_classes,) class labels known to the classifier - theta_ : array, shape (n_classes, n_features) - mean of each feature per class - - sigma_ : array, shape (n_classes, n_features) - variance of each feature per class - epsilon_ : float absolute additive value to variances - - classes_ : array-like, shape (n_classes,) - Unique class labels. + + sigma_ : array, shape (n_classes, n_features) + variance of each feature per class + + theta_ : array, shape (n_classes, n_features) + mean of each feature per class Examples -------- @@ -689,38 +686,36 @@ class MultinomialNB(BaseDiscreteNB): Attributes ---------- - class_log_prior_ : array, shape (n_classes, ) - Smoothed empirical log probability for each class. - - intercept_ : array, shape (n_classes, ) - Mirrors ``class_log_prior_`` for interpreting MultinomialNB - as a linear model. - - feature_log_prob_ : array, shape (n_classes, n_features) - Empirical log probability of features - given a class, ``P(x_i|y)``. - - coef_ : array, shape (n_classes, n_features) - Mirrors ``feature_log_prob_`` for interpreting MultinomialNB - as a linear model. - class_count_ : array, shape (n_classes,) Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. + class_log_prior_ : array, shape (n_classes, ) + Smoothed empirical log probability for each class. + classes_ : array, shape (n_classes,) Class labels known to the classifier + coef_ : array, shape (n_classes, n_features) + Mirrors ``feature_log_prob_`` for interpreting MultinomialNB + as a linear model. + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. + feature_log_prob_ : array, shape (n_classes, n_features) + Empirical log probability of features + given a class, ``P(x_i|y)``. + + intercept_ : array, shape (n_classes, ) + Mirrors ``class_log_prior_`` for interpreting MultinomialNB + as a linear model. + n_features_ : int Number of features of each sample. - classes_ : array-like, shape (n_classes,) - Unique class labels. Examples -------- @@ -804,33 +799,30 @@ class ComplementNB(BaseDiscreteNB): Attributes ---------- - class_log_prior_ : array, shape (n_classes, ) - Smoothed empirical log probability for each class. Only used in edge - case with a single class in the training set. - - feature_log_prob_ : array, shape (n_classes, n_features) - Empirical weights for class complements. - class_count_ : array, shape (n_classes,) Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. + class_log_prior_ : array, shape (n_classes, ) + Smoothed empirical log probability for each class. Only used in edge + case with a single class in the training set. + classes_ : array, shape (n_classes,) Class labels known to the classifier - + + feature_all_ : array, shape (n_features,) + Number of samples encountered for each feature during fitting. This + value is weighted by the sample weight when provided. + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. - n_features_ : int - Number of features of each sample. - - feature_all_ : array, shape (n_features,) - Number of samples encountered for each feature during fitting. This - value is weighted by the sample weight when provided. + feature_log_prob_ : array, shape (n_classes, n_features) + Empirical weights for class complements. - classes_ : array of shape (n_classes,) - The classes labels. + n_features_ : int + Number of features of each sample. Examples -------- @@ -919,29 +911,27 @@ class BernoulliNB(BaseDiscreteNB): Attributes ---------- - class_log_prior_ : array, shape = [n_classes] - Log probability of each class (smoothed). - - feature_log_prob_ : array, shape = [n_classes, n_features] - Empirical log probability of features given a class, P(x_i|y). - class_count_ : array, shape = [n_classes] Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. + + class_log_prior_ : array, shape = [n_classes] + Log probability of each class (smoothed). classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_count_ : array, shape = [n_classes, n_features] Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. + + feature_log_prob_ : array, shape = [n_classes, n_features] + Empirical log probability of features given a class, P(x_i|y). n_features_ : int Number of features of each sample. - classes_ : array of shape (n_classes,) - The classes labels. Examples -------- @@ -1045,23 +1035,26 @@ class CategoricalNB(BaseDiscreteNB): Attributes ---------- - class_log_prior_ : array, shape (n_classes, ) - Smoothed empirical log probability for each class. - - feature_log_prob_ : list of arrays, len n_features + category_count_ : list of arrays, len n_features Holds arrays of shape (n_classes, n_categories of respective feature) - for each feature. Each array provides the empirical log probability - of categories given the respective feature and class, ``P(x_i|y)``. + for each feature. Each array provides the number of samples + encountered for each class and category of the specific feature. class_count_ : array, shape (n_classes,) Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. + + class_log_prior_ : array, shape (n_classes, ) + Smoothed empirical log probability for each class. - category_count_ : list of arrays, len n_features + classes_ : array, shape (n_classes,) + Class labels known to the classifier + + feature_log_prob_ : list of arrays, len n_features Holds arrays of shape (n_classes, n_categories of respective feature) - for each feature. Each array provides the number of samples - encountered for each class and category of the specific feature. - + for each feature. Each array provides the empirical log probability + of categories given the respective feature and class, ``P(x_i|y)``. + n_features_ : int Number of features of each sample. From 0ed036b1eddf140dd03495ea4e3087b854566c12 Mon Sep 17 00:00:00 2001 From: Jigna Panchal Date: Sat, 2 Nov 2019 14:55:46 -0700 Subject: [PATCH 2/6] removed extra whitespace and scipy.sparse.issparse (never used) from naive_bayes.py --- sklearn/naive_bayes.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 03f669b9ee7e8..8c2c09c38fbba 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -21,7 +21,6 @@ import numpy as np -from scipy.sparse import issparse from .base import BaseEstimator, ClassifierMixin from .preprocessing import binarize @@ -141,7 +140,7 @@ class GaussianNB(BaseNB): ---------- class_count_ : array, shape (n_classes,) number of training samples observed in each class. - + class_prior_ : array, shape (n_classes,) probability of each class. @@ -150,10 +149,10 @@ class labels known to the classifier epsilon_ : float absolute additive value to variances - + sigma_ : array, shape (n_classes, n_features) variance of each feature per class - + theta_ : array, shape (n_classes, n_features) mean of each feature per class @@ -699,7 +698,7 @@ class MultinomialNB(BaseDiscreteNB): coef_ : array, shape (n_classes, n_features) Mirrors ``feature_log_prob_`` for interpreting MultinomialNB as a linear model. - + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when @@ -712,7 +711,7 @@ class MultinomialNB(BaseDiscreteNB): intercept_ : array, shape (n_classes, ) Mirrors ``class_log_prior_`` for interpreting MultinomialNB as a linear model. - + n_features_ : int Number of features of each sample. @@ -806,14 +805,14 @@ class ComplementNB(BaseDiscreteNB): class_log_prior_ : array, shape (n_classes, ) Smoothed empirical log probability for each class. Only used in edge case with a single class in the training set. - + classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_all_ : array, shape (n_features,) Number of samples encountered for each feature during fitting. This value is weighted by the sample weight when provided. - + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. @@ -822,7 +821,7 @@ class ComplementNB(BaseDiscreteNB): Empirical weights for class complements. n_features_ : int - Number of features of each sample. + Number of features of each sample. Examples -------- @@ -914,18 +913,18 @@ class BernoulliNB(BaseDiscreteNB): class_count_ : array, shape = [n_classes] Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. - + class_log_prior_ : array, shape = [n_classes] Log probability of each class (smoothed). classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_count_ : array, shape = [n_classes, n_features] Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. - + feature_log_prob_ : array, shape = [n_classes, n_features] Empirical log probability of features given a class, P(x_i|y). @@ -1043,18 +1042,18 @@ class CategoricalNB(BaseDiscreteNB): class_count_ : array, shape (n_classes,) Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. - + class_log_prior_ : array, shape (n_classes, ) Smoothed empirical log probability for each class. classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_log_prob_ : list of arrays, len n_features Holds arrays of shape (n_classes, n_categories of respective feature) for each feature. Each array provides the empirical log probability of categories given the respective feature and class, ``P(x_i|y)``. - + n_features_ : int Number of features of each sample. From 7b87aefbd104abf5430f684f4a571ae6cd9f166e Mon Sep 17 00:00:00 2001 From: Jigna Panchal Date: Sat, 2 Nov 2019 15:54:15 -0700 Subject: [PATCH 3/6] Updated documentation for RandomTreesEmbedding attributes --- sklearn/ensemble/_forest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 39af503c43279..ef1947943066a 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -2094,9 +2094,25 @@ class RandomTreesEmbedding(BaseForest): Attributes ---------- + base_estimator_ : DecisionTreeClassifier + The child estimator template used to create the collection of fitted + sub-estimators. + estimators_ : list of DecisionTreeClassifier The collection of fitted sub-estimators. + feature_importances_ : ndarray of shape (n_features,) + The feature importances (the higher, the more important the feature). + + n_features_ : int + The number of features when ``fit`` is performed. + + n_outputs_ : int + The number of outputs when ``fit`` is performed. + + one_hot_encoder_ : OneHotEncoder + Returns the one-hot encoder used to create the sparse matrix. + References ---------- .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees", From d590ff73b52e61b620825f0702b3ecc790470fab Mon Sep 17 00:00:00 2001 From: Jigna Panchal Date: Sat, 2 Nov 2019 16:00:39 -0700 Subject: [PATCH 4/6] Revert "removed extra whitespace and scipy.sparse.issparse (never used) from naive_bayes.py" This reverts commit 0ed036b1eddf140dd03495ea4e3087b854566c12. --- sklearn/naive_bayes.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 8c2c09c38fbba..03f669b9ee7e8 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -21,6 +21,7 @@ import numpy as np +from scipy.sparse import issparse from .base import BaseEstimator, ClassifierMixin from .preprocessing import binarize @@ -140,7 +141,7 @@ class GaussianNB(BaseNB): ---------- class_count_ : array, shape (n_classes,) number of training samples observed in each class. - + class_prior_ : array, shape (n_classes,) probability of each class. @@ -149,10 +150,10 @@ class labels known to the classifier epsilon_ : float absolute additive value to variances - + sigma_ : array, shape (n_classes, n_features) variance of each feature per class - + theta_ : array, shape (n_classes, n_features) mean of each feature per class @@ -698,7 +699,7 @@ class MultinomialNB(BaseDiscreteNB): coef_ : array, shape (n_classes, n_features) Mirrors ``feature_log_prob_`` for interpreting MultinomialNB as a linear model. - + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when @@ -711,7 +712,7 @@ class MultinomialNB(BaseDiscreteNB): intercept_ : array, shape (n_classes, ) Mirrors ``class_log_prior_`` for interpreting MultinomialNB as a linear model. - + n_features_ : int Number of features of each sample. @@ -805,14 +806,14 @@ class ComplementNB(BaseDiscreteNB): class_log_prior_ : array, shape (n_classes, ) Smoothed empirical log probability for each class. Only used in edge case with a single class in the training set. - + classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_all_ : array, shape (n_features,) Number of samples encountered for each feature during fitting. This value is weighted by the sample weight when provided. - + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. @@ -821,7 +822,7 @@ class ComplementNB(BaseDiscreteNB): Empirical weights for class complements. n_features_ : int - Number of features of each sample. + Number of features of each sample. Examples -------- @@ -913,18 +914,18 @@ class BernoulliNB(BaseDiscreteNB): class_count_ : array, shape = [n_classes] Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. - + class_log_prior_ : array, shape = [n_classes] Log probability of each class (smoothed). classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_count_ : array, shape = [n_classes, n_features] Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. - + feature_log_prob_ : array, shape = [n_classes, n_features] Empirical log probability of features given a class, P(x_i|y). @@ -1042,18 +1043,18 @@ class CategoricalNB(BaseDiscreteNB): class_count_ : array, shape (n_classes,) Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. - + class_log_prior_ : array, shape (n_classes, ) Smoothed empirical log probability for each class. classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_log_prob_ : list of arrays, len n_features Holds arrays of shape (n_classes, n_categories of respective feature) for each feature. Each array provides the empirical log probability of categories given the respective feature and class, ``P(x_i|y)``. - + n_features_ : int Number of features of each sample. From 04d407873f3841093cb2fd82e744ba643e896e01 Mon Sep 17 00:00:00 2001 From: Jigna Panchal Date: Sat, 2 Nov 2019 16:02:05 -0700 Subject: [PATCH 5/6] Revert "Updated documentation for RandomTreesEmbedding attributes" This reverts commit 7b87aefbd104abf5430f684f4a571ae6cd9f166e. --- sklearn/ensemble/_forest.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index ef1947943066a..39af503c43279 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -2094,25 +2094,9 @@ class RandomTreesEmbedding(BaseForest): Attributes ---------- - base_estimator_ : DecisionTreeClassifier - The child estimator template used to create the collection of fitted - sub-estimators. - estimators_ : list of DecisionTreeClassifier The collection of fitted sub-estimators. - feature_importances_ : ndarray of shape (n_features,) - The feature importances (the higher, the more important the feature). - - n_features_ : int - The number of features when ``fit`` is performed. - - n_outputs_ : int - The number of outputs when ``fit`` is performed. - - one_hot_encoder_ : OneHotEncoder - Returns the one-hot encoder used to create the sparse matrix. - References ---------- .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees", From ac6fa1878888a9007b746471400366e6f1264b45 Mon Sep 17 00:00:00 2001 From: Jigna Panchal Date: Sun, 3 Nov 2019 08:56:23 -0800 Subject: [PATCH 6/6] Cleared flake8 errors for naive_bayes.py --- sklearn/naive_bayes.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 03f669b9ee7e8..8c2c09c38fbba 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -21,7 +21,6 @@ import numpy as np -from scipy.sparse import issparse from .base import BaseEstimator, ClassifierMixin from .preprocessing import binarize @@ -141,7 +140,7 @@ class GaussianNB(BaseNB): ---------- class_count_ : array, shape (n_classes,) number of training samples observed in each class. - + class_prior_ : array, shape (n_classes,) probability of each class. @@ -150,10 +149,10 @@ class labels known to the classifier epsilon_ : float absolute additive value to variances - + sigma_ : array, shape (n_classes, n_features) variance of each feature per class - + theta_ : array, shape (n_classes, n_features) mean of each feature per class @@ -699,7 +698,7 @@ class MultinomialNB(BaseDiscreteNB): coef_ : array, shape (n_classes, n_features) Mirrors ``feature_log_prob_`` for interpreting MultinomialNB as a linear model. - + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when @@ -712,7 +711,7 @@ class MultinomialNB(BaseDiscreteNB): intercept_ : array, shape (n_classes, ) Mirrors ``class_log_prior_`` for interpreting MultinomialNB as a linear model. - + n_features_ : int Number of features of each sample. @@ -806,14 +805,14 @@ class ComplementNB(BaseDiscreteNB): class_log_prior_ : array, shape (n_classes, ) Smoothed empirical log probability for each class. Only used in edge case with a single class in the training set. - + classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_all_ : array, shape (n_features,) Number of samples encountered for each feature during fitting. This value is weighted by the sample weight when provided. - + feature_count_ : array, shape (n_classes, n_features) Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. @@ -822,7 +821,7 @@ class ComplementNB(BaseDiscreteNB): Empirical weights for class complements. n_features_ : int - Number of features of each sample. + Number of features of each sample. Examples -------- @@ -914,18 +913,18 @@ class BernoulliNB(BaseDiscreteNB): class_count_ : array, shape = [n_classes] Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. - + class_log_prior_ : array, shape = [n_classes] Log probability of each class (smoothed). classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_count_ : array, shape = [n_classes, n_features] Number of samples encountered for each (class, feature) during fitting. This value is weighted by the sample weight when provided. - + feature_log_prob_ : array, shape = [n_classes, n_features] Empirical log probability of features given a class, P(x_i|y). @@ -1043,18 +1042,18 @@ class CategoricalNB(BaseDiscreteNB): class_count_ : array, shape (n_classes,) Number of samples encountered for each class during fitting. This value is weighted by the sample weight when provided. - + class_log_prior_ : array, shape (n_classes, ) Smoothed empirical log probability for each class. classes_ : array, shape (n_classes,) Class labels known to the classifier - + feature_log_prob_ : list of arrays, len n_features Holds arrays of shape (n_classes, n_categories of respective feature) for each feature. Each array provides the empirical log probability of categories given the respective feature and class, ``P(x_i|y)``. - + n_features_ : int Number of features of each sample.