diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 585ba69fbb1ce..cebc428e17b12 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -139,23 +139,23 @@ class GaussianNB(_BaseNB):
 
     Attributes
     ----------
-    class_prior_ : array, shape (n_classes,)
-        probability of each class.
-
     class_count_ : array, shape (n_classes,)
         number of training samples observed in each class.
 
+    class_prior_ : array, shape (n_classes,)
+        probability of each class.
+
     classes_ : array, shape (n_classes,)
         class labels known to the classifier
 
-    theta_ : array, shape (n_classes, n_features)
-        mean of each feature per class
+    epsilon_ : float
+        absolute additive value to variances
 
     sigma_ : array, shape (n_classes, n_features)
         variance of each feature per class
 
-    epsilon_ : float
-        absolute additive value to variances
+    theta_ : array, shape (n_classes, n_features)
+        mean of each feature per class
 
     Examples
     --------
@@ -685,33 +685,33 @@ class MultinomialNB(_BaseDiscreteNB):
 
     Attributes
     ----------
+    class_count_ : array, shape (n_classes,)
+        Number of samples encountered for each class during fitting. This
+        value is weighted by the sample weight when provided.
+
     class_log_prior_ : array, shape (n_classes, )
         Smoothed empirical log probability for each class.
 
-    intercept_ : array, shape (n_classes, )
-        Mirrors ``class_log_prior_`` for interpreting MultinomialNB
-        as a linear model.
-
-    feature_log_prob_ : array, shape (n_classes, n_features)
-        Empirical log probability of features
-        given a class, ``P(x_i|y)``.
+    classes_ : array, shape (n_classes,)
+        Class labels known to the classifier
 
     coef_ : array, shape (n_classes, n_features)
         Mirrors ``feature_log_prob_`` for interpreting MultinomialNB
         as a linear model.
 
-    class_count_ : array, shape (n_classes,)
-        Number of samples encountered for each class during fitting. This
-        value is weighted by the sample weight when provided.
-
-    classes_ : array, shape (n_classes,)
-        Class labels known to the classifier
-
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature)
         during fitting. This value is weighted by the sample weight when
         provided.
 
+    feature_log_prob_ : array, shape (n_classes, n_features)
+        Empirical log probability of features
+        given a class, ``P(x_i|y)``.
+
+    intercept_ : array, shape (n_classes, )
+        Mirrors ``class_log_prior_`` for interpreting MultinomialNB
+        as a linear model.
+
     n_features_ : int
         Number of features of each sample.
 
@@ -797,31 +797,31 @@ class ComplementNB(_BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape (n_classes, )
-        Smoothed empirical log probability for each class. Only used in edge
-        case with a single class in the training set.
-
-    feature_log_prob_ : array, shape (n_classes, n_features)
-        Empirical weights for class complements.
-
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
 
+    class_log_prior_ : array, shape (n_classes, )
+        Smoothed empirical log probability for each class. Only used in edge
+        case with a single class in the training set.
+
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
 
+    feature_all_ : array, shape (n_features,)
+        Number of samples encountered for each feature during fitting. This
+        value is weighted by the sample weight when provided.
+
     feature_count_ : array, shape (n_classes, n_features)
         Number of samples encountered for each (class, feature) during fitting.
         This value is weighted by the sample weight when provided.
 
+    feature_log_prob_ : array, shape (n_classes, n_features)
+        Empirical weights for class complements.
+
     n_features_ : int
         Number of features of each sample.
 
-    feature_all_ : array, shape (n_features,)
-        Number of samples encountered for each feature during fitting. This
-        value is weighted by the sample weight when provided.
-
     Examples
     --------
     >>> import numpy as np
@@ -909,16 +909,13 @@ class BernoulliNB(_BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape = [n_classes]
-        Log probability of each class (smoothed).
-
-    feature_log_prob_ : array, shape = [n_classes, n_features]
-        Empirical log probability of features given a class, P(x_i|y).
-
     class_count_ : array, shape = [n_classes]
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
 
+    class_log_prior_ : array, shape = [n_classes]
+        Log probability of each class (smoothed).
+
     classes_ : array, shape (n_classes,)
         Class labels known to the classifier
 
@@ -927,26 +924,12 @@ class BernoulliNB(_BaseDiscreteNB):
         during fitting. This value is weighted by the sample weight when
         provided.
 
+    feature_log_prob_ : array, shape = [n_classes, n_features]
+        Empirical log probability of features given a class, P(x_i|y).
+
     n_features_ : int
         Number of features of each sample.
 
-    See Also
-    ----------
-    MultinomialNB: The multinomial Naive Bayes classifier is \
-        suitable for classification with discrete features.
-
-    References
-    ----------
-    C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to
-    Information Retrieval. Cambridge University Press, pp. 234-265.
-    https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
-
-    A. McCallum and K. Nigam (1998). A comparison of event models for naive
-    Bayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for
-    Text Categorization, pp. 41-48.
-
-    V. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with
-    naive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).
 
     Examples
     --------
@@ -960,6 +943,19 @@ class BernoulliNB(_BaseDiscreteNB):
     BernoulliNB()
     >>> print(clf.predict(X[2:3]))
     [3]
+
+    References
+    ----------
+    C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to
+    Information Retrieval. Cambridge University Press, pp. 234-265.
+    https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html
+
+    A. McCallum and K. Nigam (1998). A comparison of event models for naive
+    Bayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for
+    Text Categorization, pp. 41-48.
+
+    V. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with
+    naive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).
     """
 
     def __init__(self, alpha=1.0, binarize=.0, fit_prior=True,
@@ -1036,22 +1032,25 @@ class CategoricalNB(_BaseDiscreteNB):
 
     Attributes
     ----------
-    class_log_prior_ : array, shape (n_classes, )
-        Smoothed empirical log probability for each class.
-
-    feature_log_prob_ : list of arrays, len n_features
+    category_count_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
-        for each feature. Each array provides the empirical log probability
-        of categories given the respective feature and class, ``P(x_i|y)``.
+        for each feature. Each array provides the number of samples
+        encountered for each class and category of the specific feature.
 
     class_count_ : array, shape (n_classes,)
         Number of samples encountered for each class during fitting. This
         value is weighted by the sample weight when provided.
 
-    category_count_ : list of arrays, len n_features
+    class_log_prior_ : array, shape (n_classes, )
+        Smoothed empirical log probability for each class.
+
+    classes_ : array, shape (n_classes,)
+        Class labels known to the classifier
+
+    feature_log_prob_ : list of arrays, len n_features
         Holds arrays of shape (n_classes, n_categories of respective feature)
-        for each feature. Each array provides the number of samples
-        encountered for each class and category of the specific feature.
+        for each feature. Each array provides the empirical log probability
+        of categories given the respective feature and class, ``P(x_i|y)``.
 
     n_features_ : int
         Number of features of each sample.