8000 FIX make_circles() now works with odd number of samples, test added (… · scikit-learn/scikit-learn@4bead39 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4bead39

Browse files
christianbraune79jnothman
authored andcommitted
FIX make_circles() now works with odd number of samples, test added (#10045)
1 parent 9092347 commit 4bead39

File tree

3 files changed

+47
-12
lines changed

3 files changed

+47
-12
lines changed

doc/whats_new/v0.20.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ Decomposition, manifold learning and clustering
156156
wrapped estimator and its parameter. :issue:`9999` by :user:`Marcus Voss
157157
<marcus-voss>` and `Joel Nothman`_.
158158

159+
- Fixed a bug in :func:`datasets.make_circles`, where no odd number of data
160+
points could be generated. :issue:`10037` by :user:`Christian Braune
161+
<christianbraune79>`_.
162+
159163
Metrics
160164
161165
- Fixed a bug due to floating point error in :func:`metrics.roc_auc_score` with

sklearn/datasets/samples_generator.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,8 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None,
585585
Parameters
586586
----------
587587
n_samples : int, optional (default=100)
588-
The total number of points generated.
588+
The total number of points generated. If odd, the inner circle will
589+
have one point more than the outer circle.
589590
590591
shuffle : bool, optional (default=True)
591592
Whether to shuffle the samples.
@@ -599,7 +600,7 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None,
599600
If None, the random number generator is the RandomState instance used
600601
by `np.random`.
601602
602-
factor : double < 1 (default=.8)
603+
factor : 0 < double < 1 (default=.8)
603604
Scale factor between inner and outer circle.
604605
605606
Returns
@@ -611,22 +612,25 @@ def make_circles(n_samples=100, shuffle=True, noise=None, random_state=None,
611612
The integer labels (0 or 1) for class membership of each sample.
612613
"""
613614

614-
if factor > 1 or factor < 0:
615+
if factor >= 1 or factor < 0:
615616
raise ValueError("'factor' has to be between 0 and 1.")
616617

618+
n_samples_out = n_samples // 2
619+
n_samples_in = n_samples - n_samples_out
620+
617621
generator = check_random_state(random_state)
618-
# so as not to have the first point = last point, we add one and then
619-
# remove it.
620-
linspace = np.linspace(0, 2 * np.pi, n_samples // 2 + 1)[:-1]
621-
outer_circ_x = np.cos(linspace)
622-
outer_circ_y = np.sin(linspace)
623-
inner_circ_x = outer_circ_x * factor
624-
inner_circ_y = outer_circ_y * factor
622+
# so as not to have the first point = last point, we set endpoint=False
623+
linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint=False)
624+
linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=False)
625+
outer_circ_x = np.cos(linspace_out)
626+
outer_circ_y = np.sin(linspace_out)
627+
inner_circ_x = np.cos(linspace_in) * factor
628+
inner_circ_y = np.sin(linspace_in) * factor
625629

626630
X = np.vstack((np.append(outer_circ_x, inner_circ_x),
627631
np.append(outer_circ_y, inner_circ_y))).T
628-
y = np.hstack([np.zeros(n_samples // 2, dtype=np.intp),
629-
np.ones(n_samples // 2, dtype=np.intp)])
632+
y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),
633+
np.ones(n_samples_in, dtype=np.intp)])
630634
if shuffle:
631635
X, y = util_shuffle(X, y, random_state=generator)
632636

sklearn/datasets/tests/test_samples_generator.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from sklearn.datasets import make_friedman3
2626
from sklearn.datasets import make_low_rank_matrix
2727
from sklearn.datasets import make_moons
28+
from sklearn.datasets import make_circles
2829
from sklearn.datasets import make_sparse_coded_signal
2930
from sklearn.datasets import make_sparse_uncorrelated
3031
from sklearn.datasets import make_spd_matrix
@@ -385,3 +386,29 @@ def test_make_moons():
385386
dist_sqr = ((x - center) ** 2).sum()
386387
assert_almost_equal(dist_sqr, 1.0,
387388
err_msg="Point is not on expected unit circle")
389+
390+
391+
def test_make_circles():
392+
factor = 0.3
393+
394+
for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]:
395+
# Testing odd and even case, because in the past make_circles always
396+
# created an even number of samples.
397+
X, y = make_circles(n_samples, shuffle=False, noise=None,
398+
factor=factor)
399+
assert_equal(X.shape, (n_samples, 2), "X shape mismatch")
400+
assert_equal(y.shape, (n_samples,), "y shape mismatch")
401+
center = [0.0, 0.0]
402+
for x, label in zip(X, y):
403+
dist_sqr = ((x - center) ** 2).sum()
404+
dist_exp = 1.0 if label == 0 else factor**2
405+
assert_almost_equal(dist_sqr, dist_exp,
406+
err_msg="Point is not on expected circle")
407+
408+
assert_equal(X[y == 0].shape, (n_outer, 2),
409+
"Samples not correctly distributed across circles.")
410+
assert_equal(X[y == 1].shape, (n_inner, 2),
411+
"Samples not correctly distributed across circles.")
412+
413+
assert_raises(ValueError, make_circles, factor=-0.01)
414+
assert_raises(ValueError, make_circles, factor=1.)

0 commit comments

Comments
 (0)
0