From 7c54edc6310b18c55bf4f61b7ab61aab49bfd392 Mon Sep 17 00:00:00 2001
From: Jigna Panchal <panchal.jh@gmail.com>
Date: Sat, 2 Nov 2019 14:19:42 -0700
Subject: [PATCH 1/6] alphabetized and added attributes to naive_bayes.py

---
 sklearn/naive_bayes.py | 121 +++++++++++++++++++----------------------
 1 file changed, 57 insertions(+), 64 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index be9450d7334f0..03f669b9ee7e8 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -139,26 +139,23 @@ class GaussianNB(BaseNB):
 
     Attributes
     ----------
-    class_prior_ : array, shape (n_classes,)
-        probability of each class.
-
     class_count_ : array, shape (n_classes,)
         number of training samples observed in each class.
+        
+    class_prior_ : array, shape (n_classes,)
+        probability of each class.
 
     classes_ : array, shape (n_classes,)
         class labels known to the classifier
 
-    theta_ : array, shape (n_classes, n_features)
-        mean of each feature per class
-
-    sigma_ : array, shape (n_classes, n_features)
-        variance of each feature per class
-
     epsilon_ : float
         absolute additive value to variances
-
-    classes_ : array-like, shape (n_classes,)
-        Unique class labels.
+        
+    sigma_ : array, shape (n_classes, n_features)
+        variance of each feature per class
+        
+    theta_ : array, shape (n_classes, n_features)
+        mean of each feature per class
 
     Examples
     --------
@@ -689,38 +686,36 @@ class MultinomialNB(BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape (n_classes, )
-        Smoothed empirical log probability for each class.
-
-    intercept_ : array, shape (n_classes, )
-        Mirrors ``class_log_prior_`` for interpreting MultinomialNB
-        as a linear model.
-
-    feature_log_prob_ : array, shape (n_classes, n_features)
-        Empirical log probability of features
-        given a class, ``P(x_i|y)``.
-
-    coef_ : array, shape (n_classes, n_features)
-        Mirrors ``feature_log_prob_`` for interpreting MultinomialNB
-        as a linear model.
-
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
 
+    class_log_prior_ : array, shape (n_classes, )
+        Smoothed empirical log probability for each class.
+
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
 
+    coef_ : array, shape (n_classes, n_features)
+        Mirrors ``feature_log_prob_`` for interpreting MultinomialNB
+        as a linear model.
+    
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
 
+    feature_log_prob_ : array, shape (n_classes, n_features)
+        Empirical log probability of features
+        given a class, ``P(x_i|y)``.
+
+    intercept_ : array, shape (n_classes, )
+        Mirrors ``class_log_prior_`` for interpreting MultinomialNB
+        as a linear model.
+    
     n_features_ : int
         Number of features of each sample.
 
-    classes_ : array-like, shape (n_classes,)
-        Unique class labels.
 
     Examples
     --------
@@ -804,33 +799,30 @@ class ComplementNB(BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape (n_classes, )
-        Smoothed empirical log probability for each class. Only used in edge
-        case with a single class in the training set.
-
-    feature_log_prob_ : array, shape (n_classes, n_features)
-        Empirical weights for class complements.
-
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
 
+    class_log_prior_ : array, shape (n_classes, )
+        Smoothed empirical log probability for each class. Only used in edge
+        case with a single class in the training set.
+    
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-
+        
+    feature_all_ : array, shape (n_features,)
+        Number of samples encountered for each feature during fitting. This
+        value is weighted by the sample weight when provided.
+        
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature) during fitting.
         This value is weighted by the sample weight when provided.
 
-    n_features_ : int
-        Number of features of each sample.
-
-    feature_all_ : array, shape (n_features,)
-        Number of samples encountered for each feature during fitting. This
-        value is weighted by the sample weight when provided.
+    feature_log_prob_ : array, shape (n_classes, n_features)
+        Empirical weights for class complements.
 
-    classes_ : array of shape (n_classes,)
-        The classes labels.
+    n_features_ : int
+        Number of features of each sample.    
 
     Examples
     --------
@@ -919,29 +911,27 @@ class BernoulliNB(BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape = [n_classes]
-        Log probability of each class (smoothed).
-
-    feature_log_prob_ : array, shape = [n_classes, n_features]
-        Empirical log probability of features given a class, P(x_i|y).
-
     class_count_ : array, shape = [n_classes]
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
+        
+    class_log_prior_ : array, shape = [n_classes]
+        Log probability of each class (smoothed).
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-
+        
     feature_count_ : array, shape = [n_classes, n_features]
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
+        
+    feature_log_prob_ : array, shape = [n_classes, n_features]
+        Empirical log probability of features given a class, P(x_i|y).
 
     n_features_ : int
         Number of features of each sample.
 
-    classes_ : array of shape (n_classes,)
-        The classes labels.
 
     Examples
     --------
@@ -1045,23 +1035,26 @@ class CategoricalNB(BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape (n_classes, )
-        Smoothed empirical log probability for each class.
-
-    feature_log_prob_ : list of arrays, len n_features
+    category_count_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
-        for each feature. Each array provides the empirical log probability
-        of categories given the respective feature and class, ``P(x_i|y)``.
+        for each feature. Each array provides the number of samples
+        encountered for each class and category of the specific feature.
 
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
+        
+    class_log_prior_ : array, shape (n_classes, )
+        Smoothed empirical log probability for each class.
 
-    category_count_ : list of arrays, len n_features
+    classes_ : array, shape (n_classes,)
+        Class labels known to the classifier
+        
+    feature_log_prob_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
-        for each feature. Each array provides the number of samples
-        encountered for each class and category of the specific feature.
-
+        for each feature. Each array provides the empirical log probability
+        of categories given the respective feature and class, ``P(x_i|y)``.
+    
     n_features_ : int
         Number of features of each sample.
 

From 0ed036b1eddf140dd03495ea4e3087b854566c12 Mon Sep 17 00:00:00 2001
From: Jigna Panchal <panchal.jh@gmail.com>
Date: Sat, 2 Nov 2019 14:55:46 -0700
Subject: [PATCH 2/6] removed extra whitespace and scipy.sparse.issparse (never
 used) from  naive_bayes.py

---
 sklearn/naive_bayes.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 03f669b9ee7e8..8c2c09c38fbba 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -21,7 +21,6 @@
 
 
 import numpy as np
-from scipy.sparse import issparse
 
 from .base import BaseEstimator, ClassifierMixin
 from .preprocessing import binarize
@@ -141,7 +140,7 @@ class GaussianNB(BaseNB):
     ----------
     class_count_ : array, shape (n_classes,)
         number of training samples observed in each class.
-        
+
     class_prior_ : array, shape (n_classes,)
         probability of each class.
 
@@ -150,10 +149,10 @@ class labels known to the classifier
 
     epsilon_ : float
         absolute additive value to variances
-        
+
     sigma_ : array, shape (n_classes, n_features)
         variance of each feature per class
-        
+
     theta_ : array, shape (n_classes, n_features)
         mean of each feature per class
 
@@ -699,7 +698,7 @@ class MultinomialNB(BaseDiscreteNB):
     coef_ : array, shape (n_classes, n_features)
         Mirrors ``feature_log_prob_`` for interpreting MultinomialNB
         as a linear model.
-    
+
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
@@ -712,7 +711,7 @@ class MultinomialNB(BaseDiscreteNB):
     intercept_ : array, shape (n_classes, )
         Mirrors ``class_log_prior_`` for interpreting MultinomialNB
         as a linear model.
-    
+
     n_features_ : int
         Number of features of each sample.
 
@@ -806,14 +805,14 @@ class ComplementNB(BaseDiscreteNB):
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class. Only used in edge
         case with a single class in the training set.
-    
+
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-        
+
     feature_all_ : array, shape (n_features,)
         Number of samples encountered for each feature during fitting. This
         value is weighted by the sample weight when provided.
-        
+
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature) during fitting.
         This value is weighted by the sample weight when provided.
@@ -822,7 +821,7 @@ class ComplementNB(BaseDiscreteNB):
         Empirical weights for class complements.
 
     n_features_ : int
-        Number of features of each sample.    
+        Number of features of each sample.
 
     Examples
     --------
@@ -914,18 +913,18 @@ class BernoulliNB(BaseDiscreteNB):
     class_count_ : array, shape = [n_classes]
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
-        
+
     class_log_prior_ : array, shape = [n_classes]
         Log probability of each class (smoothed).
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-        
+
     feature_count_ : array, shape = [n_classes, n_features]
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
-        
+
     feature_log_prob_ : array, shape = [n_classes, n_features]
         Empirical log probability of features given a class, P(x_i|y).
 
@@ -1043,18 +1042,18 @@ class CategoricalNB(BaseDiscreteNB):
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
-        
+
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class.
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-        
+
     feature_log_prob_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
         for each feature. Each array provides the empirical log probability
         of categories given the respective feature and class, ``P(x_i|y)``.
-    
+
     n_features_ : int
         Number of features of each sample.
 

From 7b87aefbd104abf5430f684f4a571ae6cd9f166e Mon Sep 17 00:00:00 2001
From: Jigna Panchal <panchal.jh@gmail.com>
Date: Sat, 2 Nov 2019 15:54:15 -0700
Subject: [PATCH 3/6] Updated documentation for RandomTreesEmbedding attributes

---
 sklearn/ensemble/_forest.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 39af503c43279..ef1947943066a 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -2094,9 +2094,25 @@ class RandomTreesEmbedding(BaseForest):
 
     Attributes
     ----------
+    base_estimator_ : DecisionTreeClassifier
+        The child estimator template used to create the collection of fitted
+        sub-estimators.
+
     estimators_ : list of DecisionTreeClassifier
         The collection of fitted sub-estimators.
 
+    feature_importances_ : ndarray of shape (n_features,)
+        The feature importances (the higher, the more important the feature).
+
+    n_features_ : int
+        The number of features when ``fit`` is performed.
+
+    n_outputs_ : int
+        The number of outputs when ``fit`` is performed.
+
+    one_hot_encoder_ : OneHotEncoder
+        Returns the one-hot encoder used to create the sparse matrix.
+
     References
     ----------
     .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees",

From d590ff73b52e61b620825f0702b3ecc790470fab Mon Sep 17 00:00:00 2001
From: Jigna Panchal <panchal.jh@gmail.com>
Date: Sat, 2 Nov 2019 16:00:39 -0700
Subject: [PATCH 4/6] Revert "removed extra whitespace and
 scipy.sparse.issparse (never used) from  naive_bayes.py"

This reverts commit 0ed036b1eddf140dd03495ea4e3087b854566c12.
---
 sklearn/naive_bayes.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 8c2c09c38fbba..03f669b9ee7e8 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -21,6 +21,7 @@
 
 
 import numpy as np
+from scipy.sparse import issparse
 
 from .base import BaseEstimator, ClassifierMixin
 from .preprocessing import binarize
@@ -140,7 +141,7 @@ class GaussianNB(BaseNB):
     ----------
     class_count_ : array, shape (n_classes,)
         number of training samples observed in each class.
-
+        
     class_prior_ : array, shape (n_classes,)
         probability of each class.
 
@@ -149,10 +150,10 @@ class labels known to the classifier
 
     epsilon_ : float
         absolute additive value to variances
-
+        
     sigma_ : array, shape (n_classes, n_features)
         variance of each feature per class
-
+        
     theta_ : array, shape (n_classes, n_features)
         mean of each feature per class
 
@@ -698,7 +699,7 @@ class MultinomialNB(BaseDiscreteNB):
     coef_ : array, shape (n_classes, n_features)
         Mirrors ``feature_log_prob_`` for interpreting MultinomialNB
         as a linear model.
-
+    
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
@@ -711,7 +712,7 @@ class MultinomialNB(BaseDiscreteNB):
     intercept_ : array, shape (n_classes, )
         Mirrors ``class_log_prior_`` for interpreting MultinomialNB
         as a linear model.
-
+    
     n_features_ : int
         Number of features of each sample.
 
@@ -805,14 +806,14 @@ class ComplementNB(BaseDiscreteNB):
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class. Only used in edge
         case with a single class in the training set.
-
+    
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-
+        
     feature_all_ : array, shape (n_features,)
         Number of samples encountered for each feature during fitting. This
         value is weighted by the sample weight when provided.
-
+        
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature) during fitting.
         This value is weighted by the sample weight when provided.
@@ -821,7 +822,7 @@ class ComplementNB(BaseDiscreteNB):
         Empirical weights for class complements.
 
     n_features_ : int
-        Number of features of each sample.
+        Number of features of each sample.    
 
     Examples
     --------
@@ -913,18 +914,18 @@ class BernoulliNB(BaseDiscreteNB):
     class_count_ : array, shape = [n_classes]
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
-
+        
     class_log_prior_ : array, shape = [n_classes]
         Log probability of each class (smoothed).
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-
+        
     feature_count_ : array, shape = [n_classes, n_features]
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
-
+        
     feature_log_prob_ : array, shape = [n_classes, n_features]
         Empirical log probability of features given a class, P(x_i|y).
 
@@ -1042,18 +1043,18 @@ class CategoricalNB(BaseDiscreteNB):
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
-
+        
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class.
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-
+        
     feature_log_prob_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
         for each feature. Each array provides the empirical log probability
         of categories given the respective feature and class, ``P(x_i|y)``.
-
+    
     n_features_ : int
         Number of features of each sample.
 

From 04d407873f3841093cb2fd82e744ba643e896e01 Mon Sep 17 00:00:00 2001
From: Jigna Panchal <panchal.jh@gmail.com>
Date: Sat, 2 Nov 2019 16:02:05 -0700
Subject: [PATCH 5/6] Revert "Updated documentation for RandomTreesEmbedding
 attributes"

This reverts commit 7b87aefbd104abf5430f684f4a571ae6cd9f166e.
---
 sklearn/ensemble/_forest.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index ef1947943066a..39af503c43279 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -2094,25 +2094,9 @@ class RandomTreesEmbedding(BaseForest):
 
     Attributes
     ----------
-    base_estimator_ : DecisionTreeClassifier
-        The child estimator template used to create the collection of fitted
-        sub-estimators.
-
     estimators_ : list of DecisionTreeClassifier
         The collection of fitted sub-estimators.
 
-    feature_importances_ : ndarray of shape (n_features,)
-        The feature importances (the higher, the more important the feature).
-
-    n_features_ : int
-        The number of features when ``fit`` is performed.
-
-    n_outputs_ : int
-        The number of outputs when ``fit`` is performed.
-
-    one_hot_encoder_ : OneHotEncoder
-        Returns the one-hot encoder used to create the sparse matrix.
-
     References
     ----------
     .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees",

From ac6fa1878888a9007b746471400366e6f1264b45 Mon Sep 17 00:00:00 2001
From: Jigna Panchal <panchal.jh@gmail.com>
Date: Sun, 3 Nov 2019 08:56:23 -0800
Subject: [PATCH 6/6] Cleared flake8 errors for naive_bayes.py

---
 sklearn/naive_bayes.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 03f669b9ee7e8..8c2c09c38fbba 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -21,7 +21,6 @@
 
 
 import numpy as np
-from scipy.sparse import issparse
 
 from .base import BaseEstimator, ClassifierMixin
 from .preprocessing import binarize
@@ -141,7 +140,7 @@ class GaussianNB(BaseNB):
     ----------
     class_count_ : array, shape (n_classes,)
         number of training samples observed in each class.
-        
+
     class_prior_ : array, shape (n_classes,)
         probability of each class.
 
@@ -150,10 +149,10 @@ class labels known to the classifier
 
     epsilon_ : float
         absolute additive value to variances
-        
+
     sigma_ : array, shape (n_classes, n_features)
         variance of each feature per class
-        
+
     theta_ : array, shape (n_classes, n_features)
         mean of each feature per class
 
@@ -699,7 +698,7 @@ class MultinomialNB(BaseDiscreteNB):
     coef_ : array, shape (n_classes, n_features)
         Mirrors ``feature_log_prob_`` for interpreting MultinomialNB
         as a linear model.
-    
+
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
@@ -712,7 +711,7 @@ class MultinomialNB(BaseDiscreteNB):
     intercept_ : array, shape (n_classes, )
         Mirrors ``class_log_prior_`` for interpreting MultinomialNB
         as a linear model.
-    
+
     n_features_ : int
         Number of features of each sample.
 
@@ -806,14 +805,14 @@ class ComplementNB(BaseDiscreteNB):
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class. Only used in edge
         case with a single class in the training set.
-    
+
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-        
+
     feature_all_ : array, shape (n_features,)
         Number of samples encountered for each feature during fitting. This
         value is weighted by the sample weight when provided.
-        
+
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature) during fitting.
         This value is weighted by the sample weight when provided.
@@ -822,7 +821,7 @@ class ComplementNB(BaseDiscreteNB):
         Empirical weights for class complements.
 
     n_features_ : int
-        Number of features of each sample.    
+        Number of features of each sample.
 
     Examples
     --------
@@ -914,18 +913,18 @@ class BernoulliNB(BaseDiscreteNB):
     class_count_ : array, shape = [n_classes]
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
-        
+
     class_log_prior_ : array, shape = [n_classes]
         Log probability of each class (smoothed).
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-        
+
     feature_count_ : array, shape = [n_classes, n_features]
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
-        
+
     feature_log_prob_ : array, shape = [n_classes, n_features]
         Empirical log probability of features given a class, P(x_i|y).
 
@@ -1043,18 +1042,18 @@ class CategoricalNB(BaseDiscreteNB):
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
-        
+
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class.
 
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
-        
+
     feature_log_prob_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
         for each feature. Each array provides the empirical log probability
         of categories given the respective feature and class, ``P(x_i|y)``.
-    
+
     n_features_ : int
         Number of features of each sample.