raghavrv
diff --git a/‎doc/modules/feature_extraction.rst
Lines changed: 4 additions & 5 deletions b/‎doc/modules/feature_extraction.rst
Lines changed: 4 additions & 5 deletions
diff --git a/‎examples/applications/plot_out_of_core_classification.py
Lines changed: 1 addition & 1 deletion b/‎examples/applications/plot_out_of_core_classification.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/text/document_classification_20newsgroups.py
Lines changed: 1 addition & 1 deletion b/‎examples/text/document_classification_20newsgroups.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/text/document_clustering.py
Lines changed: 2 additions & 2 deletions b/‎examples/text/document_clustering.py
Lines changed: 2 additions & 2 deletions
@@ -125,11 +125,10 @@ Since the hash function might cause collisions between (unrelated) features,
 a signed hash function is used and the sign of the hash value
 determines the sign of the value stored in the output matrix for a feature.
 This way, collisions are likely to cancel out rather than accumulate error,
-and the expected mean of any output feature's value is zero.
-
-If ``non_negative=True`` is passed to the constructor, the absolute
-value is taken.  This undoes some of the collision handling, but allows
-the output to be passed to estimators like
+and the expected mean of any output feature's value is zero. This mechanism
+is enabled by default with ``alternate_sign=True`` and is particularly useful
+for small hash table sizes (``n_features < 10000``). For large hash table
+sizes, it can be disabled, to allow the output to be passed to estimators like
 :class:`sklearn.naive_bayes.MultinomialNB` or
 :class:`sklearn.feature_selection.chi2`
 feature selectors that expect non-negative inputs.
 
@@ -194,7 +194,7 @@ def progress(blocknum, bs, size):
 # maximum
 
 vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18,
-                               non_negative=True)
+                               alternate_sign=False)
 
 
 # Iterator over parsed Reuters SGML files.
 
@@ -152,7 +152,7 @@ def size_mb(docs):
 print("Extracting features from the training data using a sparse vectorizer")
 t0 = time()
 if opts.use_hashing:
-    vectorizer = HashingVectorizer(stop_words='english', non_negative=True,
+    vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,
                                    n_features=opts.n_features)
     X_train = vectorizer.transform(data_train.data)
 else:
 
@@ -144,13 +144,13 @@ def is_interactive():
     if opts.use_idf:
         # Perform an IDF normalization on the output of HashingVectorizer
         hasher = HashingVectorizer(n_features=opts.n_features,
-                                   stop_words='english', non_negative=True,
+                                   stop_words='english', alternate_sign=False,
                                    norm=None, binary=False)
         vectorizer = make_pipeline(hasher, TfidfTransformer())
     else:
         vectorizer = HashingVectorizer(n_features=opts.n_features,
                                        stop_words='english',
-                                       non_negative=False, norm='l2',
+                                       alternate_sign=False, norm='l2',
                                        binary=False)
 else:
     vectorizer = TfidfVectorizer(max_df=0.5, max_features=opts.n_features,