8000 Merge branch 'master' of https://github.com/scikit-learn/scikit-learn… · scikit-learn/scikit-learn@2f78ea7 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2f78ea7

Browse files
committed
Merge branch 'master' of https://github.com/scikit-learn/scikit-learn into pr/10427
2 parents bb89d20 + ddaafa6 commit 2f78ea7

File tree

15 files changed

+79
-76
lines changed

15 files changed

+79
-76
lines changed

COPYING

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
New BSD License
22

3-
Copyright (c) 2007–2017 The scikit-learn developers.
3+
Copyright (c) 2007–2018 The scikit-learn developers.
44
All rights reserved.
55

66

build_tools/circle/build_doc.sh

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,13 @@ get_build_type() {
5757
echo QUICK BUILD: no changed filenames for $git_range
5858
return
5959
fi
60-
if echo "$filenames" | grep -q -e ^examples/
60+
changed_examples=$(echo "$filenames" | grep -e ^examples/)
61+
if [[ -n "$changed_examples" ]]
6162
then
62-
echo BUILD: detected examples/ filename modified in $git_range: $(echo "$filenames" | grep -e ^examples/ | head -n1)
63+
echo BUILD: detected examples/ filename modified in $git_range: $changed_examples
64+
pattern=$(echo "$changed_examples" | paste -sd '|')
65+
# pattern for examples to run is the last line of output
66+
echo "$pattern"
6367
return
6468
fi
6569
echo QUICK BUILD: no examples/ filename modified in $git_range:
@@ -75,12 +79,17 @@ fi
7579
if [[ "$CIRCLE_BRANCH" =~ ^master$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST" ]]
7680
then
7781
# PDF linked into HTML
78-
MAKE_TARGET="dist LATEXMKOPTS=-halt-on-error"
82+
make_args="dist LATEXMKOPTS=-halt-on-error"
7983
elif [[ "$build_type" =~ ^QUICK ]]
8084
then
81-
MAKE_TARGET=html-noplot
85+
make_args=html-noplot
86+
elif [[ "$build_type" =~ ^'BUILD: detected examples' ]]
87+
then
88+
# pattern for examples to run is the last line of output
89+
pattern=$(echo "$build_type" | tail -n 1)
90+
make_args="html EXAMPLES_PATTERN=$pattern"
8291
else
83-
MAKE_TARGET=html
92+
make_args=html
8493
fi
8594

8695
# Installing required system packages to support the rendering of math
@@ -124,7 +133,7 @@ then
124133
fi
125134

126135
# The pipefail is requested to propagate exit code
127-
set -o pipefail && cd doc && make $MAKE_TARGET 2>&1 | tee ~/log.txt
136+
set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt
128137

129138
cd -
130139
set +o pipefail

doc/Makefile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@ SPHINXOPTS =
66
SPHINXBUILD ?= sphinx-build
77
PAPER =
88
BUILDDIR = _build
9+
ifneq ($(EXAMPLES_PATTERN),)
10+
EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)"
11+
endif
912

1013
# Internal variables.
1114
PAPEROPT_a4 = -D latex_paper_size=a4
1215
PAPEROPT_letter = -D latex_paper_size=letter
13-
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
16+
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
17+
$(EXAMPLES_PATTERN_OPTS) .
18+
1419

1520
.PHONY: help clean html dirhtml pickle json latex latexpdf changes linkcheck doctest optipng
1621

doc/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575

7676
# General information about the project.
7777
project = u('scikit-learn')
78-
copyright = u('2007 - 2017, scikit-learn developers (BSD License)')
78+
copyright = u('2007 - 2018, scikit-learn developers (BSD License)')
7979

8080
# The version info for the project you're documenting, acts as replacement for
8181
# |version| and |release|, also used in various other places throughout the

doc/developers/contributing.rst

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -444,15 +444,17 @@ to the documentation, e.g.::
444444

445445
pip install --editable ..
446446

447-
To generate the full web site, including the example gallery (this might take a
448-
while)::
447+
To generate the full web site, including the example gallery::
449448

450449
make html
451450

452-
Or, if you'd rather quickly generate the documentation without the example
453-
gallery::
454-
455-
make html-noplot
451+
Generating the example gallery will run all our examples which takes a
452+
while. To save some time, you can use:
453+
- ``make html-noplot``: this will generate the documentation without the
454+
example gallery. This is useful when changing a docstring for example.
455+
- ``EXAMPLES_PATTERN=your_regex_goes_here make html``: only the examples
456+
matching ``your_regex_goes_here`` will be run. This is particularly
457+
useful if you are modifying a few examples.
456458

457459
That should create all the documentation in the ``_build/html/stable`` directory.
458460

doc/whats_new/v0.20.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,11 @@ Classifiers and regressors
210210
callable and b) the input to the NearestNeighbors model is sparse.
211211
:issue:`9579` by :user:`Thomas Kober <tttthomasssss>`.
212212

213+
- Fixed a bug in :class:`svm.SVC` where when the argument ``kernel`` is
214+
unicode in Python2, the ``predict_proba`` method was raising an
215+
unexpected TypeError given dense inputs.
216+
:issue:`10412` by :user:`Jiongyan Zhang <qmick>`.
217+
213218
Decomposition, manifold learning and clustering
214219

215220
- Fix for uninformative error in :class:`decomposition.IncrementalPCA`:
@@ -274,6 +279,10 @@ Metrics
274279
:issue:`10093` by :user:`alexryndin <alexryndin>`
275280
and :user:`Hanmin Qin <qinhanmin2014>`.
276281

282+
- Fixed a bug to avoid integer overflow. Casted product to 64 bits integer in
283+
:func:`mutual_info_score`.
284+
:issue:`9772` by :user:`Kumar Ashutosh <thechargedneutron>`.
285+
277286
Neighbors
278287

279288
- Fixed a bug so ``predict`` in :class:`neighbors.RadiusNeighborsRegressor` can

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ test = pytest
77
addopts =
88
--doctest-modules
99
--disable-pytest-warnings
10+
--ignore sklearn/externals
1011

1112
[wheelhouse_uploader]
1213
artifact_indexes=

setup.py

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def configuration(parent_package='', top_path=None):
123123
os.remove('MANIFEST')
124124

125125
from numpy.distutils.misc_util import Configuration
126+
126127
config = Configuration(None, parent_package, top_path)
127128

128129
# Avoid non-useful msg:
@@ -137,26 +138,6 @@ def configuration(parent_package='', top_path=None):
137138
return config
138139

139140

140-
def get_scipy_status():
141-
"""
142-
Returns a dictionary containing a boolean specifying whether SciPy
143-
is up-to-date, along with the version string (empty string if
144-
not installed).
145-
"""
146-
scipy_status = {}
147-
try:
148-
import scipy
149-
scipy_version = scipy.__version__
150-
scipy_status['up_to_date'] = parse_version(
151-
scipy_version) >= parse_version(SCIPY_MIN_VERSION)
152-
scipy_status['version'] = scipy_version
153-
except ImportError:
154-
traceback.print_exc()
155-
scipy_status['up_to_date'] = False
156-
scipy_status['version'] = ""
157-
return scipy_status
158-
159-
160141
def get_numpy_status():
161142
"""
162143
Returns a dictionary containing a boolean specifying whether NumPy
@@ -206,6 +187,10 @@ def setup_package():
206187
'Programming Language :: Python :: 3.6',
207188
],
208189
cmdclass=cmdclass,
190+
install_requires=[
191+
'numpy>={0}'.format(NUMPY_MIN_VERSION),
192+
'scipy>={0}'.format(SCIPY_MIN_VERSION)
193+
],
209194
**extra_setuptools_args)
210195

211196
if len(sys.argv) == 1 or (
@@ -229,9 +214,6 @@ def setup_package():
229214
numpy_status = get_numpy_status()
230215
numpy_req_str = "scikit-learn requires NumPy >= {0}.\n".format(
231216
NUMPY_MIN_VERSION)
232-
scipy_status = get_scipy_status()
233-
scipy_req_str = "scikit-learn requires SciPy >= {0}.\n".format(
234-
SCIPY_MIN_VERSION)
235217

236218
instructions = ("Installation instructions are available on the "
237219
"scikit-learn website: "
@@ -247,16 +229,6 @@ def setup_package():
247229
raise ImportError("Numerical Python (NumPy) is not "
248230
"installed.\n{0}{1}"
249231
.format(numpy_req_str, instructions))
250-
if scipy_status['up_to_date'] is False:
251-
if scipy_status['version']:
252-
raise ImportError("Your installation of Scientific Python "
253-
"(SciPy) {0} is out-of-date.\n{1}{2}"
254-
.format(scipy_status['version'],
255-
scipy_req_str, instructions))
256-
else:
257-
raise ImportError("Scientific Python (SciPy) is not "
258-
"installed.\n{0}{1}"
259-
.format(scipy_req_str, instructions))
260232

261233
from numpy.distutils.core import setup
262234

sklearn/calibration.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,8 @@ def predict(self, T):
517517
def calibration_curve(y_true, y_prob, normalize=False, n_bins=5):
518518
"""Compute true and predicted probabilities for a calibration curve.
519519
520+
The method assumes the inputs come from a binary classifier.
521+
520522
Calibration curves may also be referred to as reliability diagrams.
521523
522524
Read more in the :ref:`User Guide <calibration>`.

sklearn/metrics/cluster/supervised.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def mutual_info_score(labels_true, labels_pred, contingency=None):
601601
log_contingency_nm = np.log(nz_val)
602602
contingency_nm = nz_val / contingency_sum
603603
# Don't need to calculate the full outer product, just for non-zeroes
604-
outer = pi.take(nzx) * pj.take(nzy)
604+
outer = pi.take(nzx).astype(np.int64) * pj.take(nzy).astype(np.int64)
605605
log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
606606
mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
607607
contingency_nm * log_outer)

sklearn/metrics/cluster/tests/test_supervised.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from sklearn.metrics.cluster import normalized_mutual_info_score
1414
from sklearn.metrics.cluster import v_measure_score
1515

16+
from sklearn.utils import assert_all_finite
1617
from sklearn.utils.testing import (
1718
assert_equal, assert_almost_equal, assert_raise_message,
1819
)
@@ -172,6 +173,17 @@ def test_expected_mutual_info_overflow():
172173
assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
173174

174175

176+
def test_int_overflow_mutual_info_score():
177+
# Test overflow in mutual_info_classif
178+
x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
179+
204) + [4] * (814 + 39) + [5] * (316 + 20))
180+
y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
181+
[0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
182+
[1] * 20)
183+
184+
assert_all_finite(mutual_info_score(x.ravel(), y.ravel()))
185+
186+
175187
def test_entropy():
176188
ent = entropy([0, 0, 42.])
177189
assert_almost_equal(ent, 0.6365141, 5)

sklearn/svm/base.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -228,15 +228,6 @@ def _dense_fit(self, X, y, sample_weight, solver_type, kernel,
228228

229229
libsvm.set_verbosity_wrap(self.verbose)
230230

231-
if six.PY2:
232-
# In python2 ensure kernel is ascii bytes to prevent a TypeError
233-
if isinstance(kernel, six.types.UnicodeType):
234-
kernel = str(kernel)
235-
if six.PY3:
236-
# In python3 ensure kernel is utf8 unicode to prevent a TypeError
237-
if isinstance(kernel, bytes):
238-
kernel = str(kernel, 'utf8')
239-
240231
# we don't pass **self.get_params() to allow subclasses to
241232
# add other parameters to __init__
242233
self.support_, self.support_vectors_, self.n_support_, \

sklearn/svm/libsvm.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ LIBSVM_KERNEL_TYPES = ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']
5454
def fit(
5555
np.ndarray[np.float64_t, ndim=2, mode='c'] X,
5656
np.ndarray[np.float64_t, ndim=1, mode='c'] Y,
57-
int svm_type=0, str kernel='rbf', int degree=3,
57+
int svm_type=0, kernel='rbf', int degree=3,
5858
double gamma=0.1, double coef0=0., double tol=1e-3,
5959
double C=1., double nu=0.5, double epsilon=0.1,
6060
np.ndarray[np.float64_t, ndim=1, mode='c']
@@ -342,7 +342,7 @@ def predict_proba(
342342
np.ndarray[np.float64_t, ndim=1, mode='c'] intercept,
343343
np.ndarray[np.float64_t, ndim=1, mode='c'] probA=np.empty(0),
344344
np.ndarray[np.float64_t, ndim=1, mode='c'] probB=np.empty(0),
345-
int svm_type=0, str kernel='rbf', int degree=3,
345+
int svm_type=0, kernel='rbf', int degree=3,
346346
double gamma=0.1, double coef0=0.,
347347
np.ndarray[np.float64_t, ndim=1, mode='c']
348348
class_weight=np.empty(0),
@@ -462,7 +462,7 @@ def decision_function(
462462
def cross_validation(
463463
np.ndarray[np.float64_t, ndim=2, mode='c'] X,
464464
np.ndarray[np.float64_t, ndim=1, mode='c'] Y,
465-
int n_fold, svm_type=0, str kernel='rbf', int degree=3,
465+
int n_fold, svm_type=0, kernel='rbf', int degree=3,
466466
double gamma=0.1, double coef0=0., double tol=1e-3,
467467
double C=1., double nu=0.5, double epsilon=0.1,
468468
np.ndarray[np.float64_t, ndim=1, mode='c']

sklearn/svm/tests/test_svm.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -505,27 +505,25 @@ def test_bad_input():
505505

506506

507507
def test_unicode_kernel():
508-
# Test that a unicode kernel name does not cause a TypeError on clf.fit
508+
# Test that a unicode kernel name does not cause a TypeError
509509
if six.PY2:
510510
# Test unicode (same as str on python3)
511-
clf = svm.SVC(kernel=unicode('linear'))
512-
clf.fit(X, Y)
513-
514-
# Test ascii bytes (str is bytes in python2)
515-
clf = svm.SVC(kernel=str('linear'))
516-
clf.fit(X, Y)
517-
else:
518-
# Test unicode (str is unicode in python3)
519-
clf = svm.SVC(kernel=str('linear'))
520-
clf.fit(X, Y)
521-
522-
# Test ascii bytes (same as str on python2)
523-
clf = svm.SVC(kernel=bytes('linear', 'ascii'))
511+
clf = svm.SVC(kernel=u'linear', probability=True)
524512
clf.fit(X, Y)
513+
clf.predict_proba(T)
514+
svm.libsvm.cross_validation(iris.data,
515+
iris.target.astype(np.float64), 5,
516+
kernel=u'linear',
517+
random_seed=0)
525518

526519
# Test default behavior on both versions
527-
clf = svm.SVC(kernel='linear')
520+
clf = svm.SVC(kernel='linear', probability=True)
528521
clf.fit(X, Y)
522+
clf.predict_proba(T)
523+
svm.libsvm.cross_validation(iris.data,
524+
iris.target.astype(np.float64), 5,
525+
kernel='linear',
526+
random_seed=0)
529527

530528

531529
def test_sparse_precomputed():

sklearn/tree/_splitter.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,8 @@ cdef class BestSplitter(BaseDenseSplitter):
535535
# Sort n-element arrays pointed to by Xf and samples, simultaneously,
536536
# by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997).
537537
cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil:
538+
if n == 0:
539+
return
538540
cdef int maxd = 2 * <int>log(n)
539541
introsort(Xf, samples, n, maxd)
540542

0 commit comments

Comments
 (0)
0