8000 [MRG + 1] Fix reference in fetch_kddcup99 (#8071) · scikit-learn/scikit-learn@f61d96c · GitHub
[go: up one dir, main page]

Skip to content

Commit f61d96c

Browse files
b-carterlesteve
authored andcommitted
[MRG + 1] Fix reference in fetch_kddcup99 (#8071)
* fix 'percent10' parameter default in fetch_kddcup99 docstring * Consistent default 'percent10' value in _fetch_brute_kddcup99 to be consistent
1 parent 072eefe commit f61d96c

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

sklearn/datasets/kddcup99.py

Lines changed: 9 additions & 7 deletions
-
shuffle=False, percent10=False):
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
4545
4646
The KDD Cup '99 dataset was created by processing the tcpdump portions
4747
of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset,
48-
created by MIT Lincoln Lab [1] . The artificial data was generated using
48+
created by MIT Lincoln Lab [1]. The artificial data was generated using
4949
a closed network and hand-injected attacks to produce a large number of
5050
different types of attack with normal activity in the background.
5151
As the initial goal was to produce a large training set for supervised
@@ -134,7 +134,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
134134
shuffle : bool, default=False
135135
Whether to shuffle dataset.
136136
137-
percent10 : bool, default=False
137+
percent10 : bool, default=True
138138
Whether to load only 10 percent of the data.
139139
140140
download_if_missing : bool, default=True
@@ -155,9 +155,11 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
155155
Detection Evaluation Richard Lippmann, Joshua W. Haines,
156156
David J. Fried, Jonathan Korba, Kumar Das
157157
158-
.. [2] A Geometric Framework for Unsupervised Anomaly Detection: Detecting
159-
Intrusions in Unlabeled Data (2002) by Eleazar Eskin, Andrew Arnold,
160-
Michael Prerau, Leonid Portnoy, Sal Stolfo
158+
.. [2] K. Yamanishi, J.-I. Takeuchi, G. Williams, and P. Milne. Online
159+
unsupervised outlier detection using finite mixtures with
160+
discounting learning algorithms. In Proceedings of the sixth
161+
ACM SIGKDD international conference on Knowledge discovery
162+
and data mining, pages 320-324. ACM Press, 2000.
161163
162164
"""
163165
kddcup99 = _fetch_brute_kddcup99(shuffle=shuffle, percent10=percent10,
@@ -214,7 +216,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
214216

215217
def _fetch_brute_kddcup99(subset=None, data_home=None,
216218
download_if_missing=True, random_state=None,
217
219+
shuffle=False, percent10=True):
218220

219221
"""Load the kddcup99 dataset, downloading it if necessary.
220222
@@ -242,7 +244,7 @@ def _fetch_brute_kddcup99(subset=None, data_home=None,
242244
shuffle : bool, default=False
243245
Whether to shuffle dataset.
244246
245-
percent10 : bool, default=False
247+
percent10 : bool, default=True
246248
Whether to load only 10 percent of the data.
247249
248250
Returns

0 commit comments

Comments
 (0)
0