@@ -45,7 +45,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
45
45
46
46
The KDD Cup '99 dataset was created by processing the tcpdump portions
47
47
of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset,
48
- created by MIT Lincoln Lab [1] . The artificial data was generated using
48
+ created by MIT Lincoln Lab [1]. The artificial data was generated using
49
49
a closed network and hand-injected attacks to produce a large number of
50
50
different types of attack with normal activity in the background.
51
51
As the initial goal was to produce a large training set for supervised
@@ -134,7 +134,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
134
134
shuffle : bool, default=False
135
135
Whether to shuffle dataset.
136
136
137
- percent10 : bool, default=False
137
+ percent10 : bool, default=True
138
138
Whether to load only 10 percent of the data.
139
139
140
140
download_if_missing : bool, default=True
@@ -155,9 +155,11 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
155
155
Detection Evaluation Richard Lippmann, Joshua W. Haines,
156
156
David J. Fried, Jonathan Korba, Kumar Das
157
157
158
- .. [2] A Geometric Framework for Unsupervised Anomaly Detection: Detecting
159
- Intrusions in Unlabeled Data (2002) by Eleazar Eskin, Andrew Arnold,
160
- Michael Prerau, Leonid Portnoy, Sal Stolfo
158
+ .. [2] K. Yamanishi, J.-I. Takeuchi, G. Williams, and P. Milne. Online
159
+ unsupervised outlier detection using finite mixtures with
160
+ discounting learning algorithms. In Proceedings of the sixth
161
+ ACM SIGKDD international conference on Knowledge discovery
162
+ and data mining, pages 320-324. ACM Press, 2000.
161
163
162
164
"""
163
165
kddcup99 = _fetch_brute_kddcup99 (shuffle = shuffle , percent10 = percent10 ,
@@ -214,7 +216,7 @@ def fetch_kddcup99(subset=None, shuffle=False, random_state=None,
214
216
215
217
def _fetch_brute_kddcup99 (subset = None , data_home = None ,
216
218
download_if_missing = True , random_state = None ,
217
- shuffle = False , percent10 = False ):
219
+ shuffle = False , percent10 = True ):
218
220
219
221
"""Load the kddcup99 dataset, downloading it if necessary.
220
222
@@ -242,7 +244,7 @@ def _fetch_brute_kddcup99(subset=None, data_home=None,
242
244
shuffle : bool, default=False
243
245
Whether to shuffle dataset.
244
246
245
- percent10 : bool, default=False
247
+ percent10 : bool, default=True
246
248
Whether to load only 10 percent of the data.
247
249
248
250
Returns
0 commit comments