8000 Merge branch 'main' into pca-on-sparse-noncentered-data · scikit-learn/scikit-learn@600f2f1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 600f2f1

Browse files
committed
Merge branch 'main' into pca-on-sparse-noncentered-data
2 parents d9ca26d + 080bf81 commit 600f2f1

File tree

20 files changed

+94
-145
lines changed

20 files changed

+94
-145
lines changed

.github/workflows/artifact-redirector.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
name: CircleCI artifacts redirector
22
on: [status]
3+
4+
# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
5+
# github actions workflow:
6+
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
7+
permissions:
8+
statuses: write
9+
310
jobs:
411
circleci_artifacts_redirector_job:
512
runs-on: ubuntu-latest

.github/workflows/assign.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ on:
44
issue_comment:
55
types: created
66

7+
# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
8+
# github actions workflow:
9+
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
10+
permissions:
11+
issues: write
12+
713
jobs:
814
one:
915
runs-on: ubuntu-latest

.github/workflows/labeler-module.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ on:
33
pull_request_target:
44
types: [opened]
55

6+
# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
7+
# github actions workflow:
8+
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
9+
permissions:
10+
contents: read
11+
pull-requests: write
12+
613
jobs:
714
triage:
815
runs-on: ubuntu-latest

.github/workflows/labeler-title-regex.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ on:
33
pull_request_target:
44
types: [opened, edited]
55

6+
# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
7+
# github actions workflow:
8+
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
69
permissions:
710
contents: read
811
pull-requests: write

.github/workflows/unassign.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ on:
44
issues:
55
types: unassigned
66

7+
# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
8+
# github actions workflow:
9+
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
10+
permissions:
11+
issues: write
12+
713
jobs:
814
one:
915
runs-on: ubuntu-latest

doc/themes/scikit-learn-modern/javascript.html

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -67,78 +67,6 @@
6767
this.getAttribute('id') +
6868
'" title="Permalink to this term">¶</a>');
6969
});
70-
71-
{%- if pagename != 'index' and pagename != 'documentation' %}
72-
/*** Hide navbar when scrolling down ***/
73-
// Returns true when headerlink target matches hash in url
74-
(function() {
75-
hashTargetOnTop = function() {
76-
var hash = window.location.hash;
77-
if ( hash.length < 2 ) { return false; }
78-
79-
var target = document.getElementById( hash.slice(1) );
80-
if ( target === null ) { return false; }
81-
82-
var top = target.getBoundingClientRect().top;
83-
return (top < 2) && (top > -2);
84-
};
85-
86-
// Hide navbar on load if hash target is on top
87-
var navBar = document.getElementById("navbar");
88-
var navBarToggler = document.getElementById("sk-navbar-toggler");
89-
var navBarHeightHidden = "-" + navBar.getBoundingClientRect().height + "px";
90-
var $window = $(window);
91-
92-
hideNavBar = function() {
93-
navBar.style.top = navBarHeightHidden;
94-
};
95-
96-
showNavBar = function() {
97-
navBar.style.top = "0";
98-
}
99-
100-
if (hashTargetOnTop()) {
101-
hideNavBar()
102-
}
103-
104-
var prevScrollpos = window.pageYOffset;
105-
hideOnScroll = function(lastScrollTop) {
106-
if (($window.width() < 768) && (navBarToggler.getAttribute("aria-expanded") === 'true')) {
107-
return;
108-
}
109-
if (lastScrollTop > 2 && (prevScrollpos <= lastScrollTop) || hashTargetOnTop()){
110-
hideNavBar()
111-
} else {
112-
showNavBar()
113-
}
114-
prevScrollpos = lastScrollTop;
115-
};
116-
117-
/*** high performance scroll event listener***/
118-
var raf = window.requestAnimationFrame ||
119-
window.webkitRequestAnimationFrame ||
120-
window.mozRequestAnimationFrame ||
121-
window.msRequestAnimationFrame ||
122-
window.oRequestAnimationFrame;
123-
var lastScrollTop = $window.scrollTop();
124-
125-
if (raf) {
126-
loop();
127-
}
128-
129-
function loop() {
130-
var scrollTop = $window.scrollTop();
131-
if (lastScrollTop === scrollTop) {
132-
raf(loop);
133-
return;
134-
} else {
135-
lastScrollTop = scrollTop;
136-
hideOnScroll(lastScrollTop);
137-
raf(loop);
138-
}
139-
}
140-
})();
141-
{%- endif %}
14270
});
14371

14472
</script>

doc/themes/scikit-learn-modern/layout.html

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,6 @@
4646
<label id="sk-sidemenu-toggle" class="sk-btn-toggle-toc btn sk-btn-primary" for="sk-toggle-checkbox">Toggle Menu</label>
4747
<div id="sk-sidebar-wrapper" class="border-right">
4848
<div class="sk-sidebar-toc-wrapper">
49-
<div class="sk-sidebar-toc-logo">
50-
{%- if logo %}
51-
<a href="{{ pathto('index') }}">
52-
<img
53-
class="sk-brand-img"
54-
src="{{ pathto('_static/' + logo, 1) }}"
55-
alt="logo"/>
56-
</a>
57-
{%- endif %}
58-
</div>
5949
<div class="btn-group w-100 mb-2" role="group" aria-label="rellinks">
6050
{%- if prev %}
6151
<a href="{{ prev.link|e }}" role="button" class="btn sk-btn-rellink py-1" sk-rellink-tooltip="{{ prev.title|striptags }}">Prev</a>

doc/themes/scikit-learn-modern/static/css/theme.css

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -534,10 +534,6 @@ a.sk-documentation-index-anchor:hover {
534534

535535
/* toc */
536536

537-
div.sk-sidebar-toc-logo {
538-
height: 52px;
539-
}
540-
541537
.sk-toc-active {
542538
font-weight: bold;
543539
}
@@ -549,6 +545,7 @@ div.sk-sidebar-toc-wrapper {
549545
overflow-y: scroll;
550546
height: 100vh;
551547
padding-right: 1.75rem;
548+
padding-top: 52px;
552549

553550
/* Hide scrollbar for IE and Edge */
554551
-ms-overflow-style: none;

doc/whats_new/v1.2.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ Changelog
204204
no longer raise warnings when fitting data with feature names.
205205
:pr:`24873` by :user:`Tim Head <betatim>`.
206206

207+
- |Fix| Improves error message in :class:`neural_network.MLPClassifier` and
208+
:class:`neural_network.MLPRegressor`, when `early_stopping=True` and
209+
:meth:`partial_fit` is called. :pr:`25694` by `Thomas Fan`_.
210+
207211
:mod:`sklearn.preprocessing`
208212
............................
209213

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
"sklearn.manifold._barnes_hut_tsne",
9696
"sklearn.manifold._utils",
9797
"sklearn.metrics.cluster._expected_mutual_info_fast",
98+
"sklearn.metrics._dist_metrics",
9899
"sklearn.metrics._pairwise_distances_reduction._datasets_pair",
99100
"sklearn.metrics._pairwise_distances_reduction._middle_term_computer",
100101
"sklearn.metrics._pairwise_distances_reduction._base",
@@ -201,7 +202,7 @@ def build_extensions(self):
201202
print(f"Using old NumPy C API (version 1.7) for extension {ext.name}")
202203

203204
if sklearn._OPENMP_SUPPORTED:
204-
openmp_flag = get_openmp_flag(self.compiler)
205+
openmp_flag = get_openmp_flag()
205206

206207
for e in self.extensions:
207208
e.extra_compile_args += openmp_flag

sklearn/_build_utils/openmp_helpers.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,7 @@
1212
from .pre_build_helpers import compile_test_program
1313

1414

15-
def get_openmp_flag(compiler):
16-
if hasattr(compiler, "compiler"):
17-
compiler = compiler.compiler[0]
18-
else:
19-
compiler = compiler.__class__.__name__
20-
15+
def get_openmp_flag():
2116
if sys.platform == "win32":
2217
return ["/openmp"]
2318
elif sys.platform == "darwin" and "openmp" in os.getenv("CPPFLAGS", ""):
@@ -66,7 +61,7 @@ def check_openmp_support():
6661
if flag.startswith(("-L", "-Wl,-rpath", "-l", "-Wl,--sysroot=/"))
6762
]
6863

69-
extra_postargs = get_openmp_flag
64+
extra_postargs = get_openmp_flag()
7065

7166
openmp_exception = None
7267
try:

sklearn/_build_utils/pre_build_helpers.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,11 @@
1010
from setuptools.command.build_ext import customize_compiler, new_compiler
1111

1212

13-
def compile_test_program(code, extra_preargs=[], extra_postargs=[]):
13+
def compile_test_program(code, extra_preargs=None, extra_postargs=None):
1414
"""Check that some C code can be compiled and run"""
1515
ccompiler = new_compiler()
1616
customize_compiler(ccompiler)
1717

18-
# extra_(pre/post)args can be a callable to make it possible to get its
19-
# value from the compiler
20-
if callable(extra_preargs):
21-
extra_preargs = extra_preargs(ccompiler)
22-
if callable(extra_postargs):
23-
extra_postargs = extra_postargs(ccompiler)
24-
2518
start_dir = os.path.abspath(".")
2619

2720
with tempfile.TemporaryDirectory() as tmp_dir:

sklearn/datasets/_svmlight_format_io.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .. import __version__
2626

2727
from ..utils import check_array, IS_PYPY
28+
from ..utils._param_validation import validate_params, HasMethods
2829

2930
if not IS_PYPY:
3031
from ._svmlight_format_fast import (
@@ -404,6 +405,17 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
404405
)
405406

406407

408+
@validate_params(
409+
{
410+
"X": ["array-like", "sparse matrix"],
411+
"y": ["array-like", "sparse matrix"],
412+
"f": [str, HasMethods(["write"])],
413+
"zero_based": ["boolean"],
414+
"comment": [str, bytes, None],
415+
"query_id": ["array-like", None],
416+
"multilabel": ["boolean"],
417+
}
418+
)
407419
def dump_svmlight_file(
408420
X,
409421
y,
@@ -428,7 +440,7 @@ def dump_svmlight_file(
428440
Training vectors, where `n_samples` is the number of samples and
429441
`n_features` is the number of features.
430442
431-
y : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]
443+
y : {array-like, sparse matrix}, shape = (n_samples,) or (n_samples, n_labels)
432444
Target values. Class labels must be an
433445
integer or float, or array-like objects of integer or float for
434446
multilabel classifications.
@@ -442,7 +454,7 @@ def dump_svmlight_file(
442454
Whether column indices should be written zero-based (True) or one-based
443455
(False).
444456
445-
comment : str, default=None
457+
comment : str or bytes, default=None
446458
Comment to insert at the top of the file. This should be either a
447459
Unicode string, which will be encoded as UTF-8, or an ASCII byte
448460
string.
@@ -459,7 +471,7 @@ def dump_svmlight_file(
459471
https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).
460472
461473
.. versionadded:: 0.17
462-
parameter *multilabel* to support multilabel datasets.
474+
parameter `multilabel` to support multilabel datasets.
463475
"""
464476
if comment is not None:
465477
# Convert comment string to list of lines in UTF-8.

sklearn/metrics/_dist_metrics.pyx.tp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ METRIC_MAPPING{{name_suffix}} = {
115115
'pyfunc': PyFuncDistance{{name_suffix}},
116116
}
117117

118-
cdef inline cnp.ndarray _buffer_to_ndarray{{name_suffix}}(const {{INPUT_DTYPE_t}}* x, cnp.npy_intp n):
118+
cdef inline object _buffer_to_ndarray{{name_suffix}}(const {{INPUT_DTYPE_t}}* x, cnp.npy_intp n):
119119
# Wrap a memory buffer with an ndarray. Warning: this is not robust.
120120
# In particular, if x is deallocated before the returned array goes
121121
# out of scope, this could cause memory errors. Since there is not
@@ -620,9 +620,9 @@ cdef class DistanceMetric{{name_suffix}}:
620620
return dist
621621

622622
def _pairwise_dense_dense(self, X, Y):
623-
cdef cnp.ndarray[{{INPUT_DTYPE_t}}, ndim=2, mode='c'] Xarr
624-
cdef cnp.ndarray[{{INPUT_DTYPE_t}}, ndim=2, mode='c'] Yarr
625-
cdef cnp.ndarray[DTYPE_t, ndim=2, mode='c'] Darr
623+
cdef const {{INPUT_DTYPE_t}}[:, ::1] Xarr
624+
cdef const {{INPUT_DTYPE_t}}[:, ::1] Yarr
625+
cdef DTYPE_t[:, ::1] Darr
626626

627627
Xarr = np.asarray(X, dtype={{INPUT_DTYPE}}, order='C')
628628
self._validate_data(Xarr)
@@ -2806,10 +2806,9 @@ cdef class PyFuncDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
28062806
const {{INPUT_DTYPE_t}}* x2,
28072807
ITYPE_t size,
28082808
) except -1 with gil:
2809-
cdef cnp.ndarray x1arr
2810-
cdef cnp.ndarray x2arr
2811-
x1arr = _buffer_to_ndarray{{name_suffix}}(x1, size)
2812-
x2arr = _buffer_to_ndarray{{name_suffix}}(x2, size)
2809+
cdef:
2810+
object x1arr = _buffer_to_ndarray{{name_suffix}}(x1, size)
2811+
object x2arr = _buffer_to_ndarray{{name_suffix}}(x2, size)
28132812
d = self.func(x1arr, x2arr, **self.kwargs)
28142813
try:
28152814
# Cython generates code here that results in a TypeError

sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ from sklearn import get_config
3232
from sklearn.utils import check_scalar
3333
from ...utils._openmp_helpers import _openmp_effective_n_threads
3434
from ...utils._typedefs import DTYPE, SPARSE_INDEX_TYPE
35+
from ...utils.sparsefuncs_fast import _sqeuclidean_row_norms_sparse
3536

3637
cnp.import_array()
3738

@@ -103,23 +104,6 @@ cdef DTYPE_t[::1] _sqeuclidean_row_norms32_dense(
103104
return squared_row_norms
104105

105106

106-
cdef DTYPE_t[::1] _sqeuclidean_row_norms64_sparse(
107-
const DTYPE_t[:] X_data,
108-
const SPARSE_INDEX_TYPE_t[:] X_indptr,
109-
ITYPE_t num_threads,
110-
):
111-
cdef:
112-
ITYPE_t n = X_indptr.shape[0] - 1
113-
SPARSE_INDEX_TYPE_t X_i_ptr, idx = 0
114-
DTYPE_t[::1] squared_row_norms = np.zeros(n, dtype=DTYPE)
115-
116-
for idx in prange(n, schedule='static', nogil=True, num_threads=num_threads):
117-
for X_i_ptr in range(X_indptr[idx], X_indptr[idx+1]):
118-
squared_row_norms[idx] += X_data[X_i_ptr] * X_data[X_i_ptr]
119-
120-
return squared_row_norms
121-
122-
123107
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
124108

125109
from ._datasets_pair cimport DatasetsPair{{name_suffix}}
@@ -131,10 +115,10 @@ cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}(
131115
):
132116
if issparse(X):
133117
# TODO: remove this instruction which is a cast in the float32 case
134-
# by moving squared row norms computations in MiddleTermComputer.
118+
# by moving squared row norms computations in MiddleTermComputer.
135119
X_data = np.asarray(X.data, dtype=DTYPE)
136120
X_indptr = np.asarray(X.indptr, dtype=SPARSE_INDEX_TYPE)
137-
return _sqeuclidean_row_norms64_sparse(X_data, X_indptr, num_threads)
121+
return _sqeuclidean_row_norms_sparse(X_data, X_indptr, num_threads)
138122
else:
139123
return _sqeuclidean_row_norms{{name_suffix}}_dense(X, num_threads)
140124

sklearn/neural_network/_multilayer_perceptron.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,9 @@ def _fit_stochastic(
575575
)
576576

577577
# early_stopping in partial_fit doesn't make sense
578-
early_stopping = self.early_stopping and not incremental
578+
if self.early_stopping and incremental:
579+
raise ValueError("partial_fit does not support early_stopping=True")
580+
early_stopping = self.early_stopping
579581
if early_stopping:
580582
# don't stratify in multilabel classification
581583
should_stratify = is_classifier(self) and self.n_outputs_ == 1

0 commit comments

Comments
 (0)
0