8000 MNT Enforce ruff/Perflint rules (PERF) by DimitriPapadopoulos · Pull Request #30693 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

MNT Enforce ruff/Perflint rules (PERF) #30693

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions build_tools/generate_authors_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,7 @@ def generate_table(contributors):


def generate_list(contributors):
lines = []
for contributor in contributors:
lines.append("- %s" % (contributor["name"],))
lines = ["- %s" % (contributor["name"],) for contributor in contributors]
return "\n".join(lines) + "\n"


Expand Down
14 changes: 7 additions & 7 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,14 +1039,14 @@ def infer_next_release_versions():
]

# Convert each module API reference page
for module in API_REFERENCE:
rst_templates.append(
(
"api/module",
f"api/{module}",
{"module": module, "module_info": API_REFERENCE[module]},
)
rst_templates.extend(
(
"api/module",
f"api/{module}",
{"module": module, "module_info": API_REFERENCE[module]},
)
for module in API_REFERENCE
)

# Convert the deprecated API reference page (if there exists any)
if DEPRECATED_API_REFERENCE:
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ preview = true
# This enables us to use the explicit preview rules that we want only
explicit-preview-rules = true
# all rules can be found here: https://docs.astral.sh/ruff/rules/
select = ["E", "F", "W", "I", "CPY001", "RUF"]
select = ["E", "F", "W", "I", "CPY001", "PERF", "RUF"]
ignore=[
# space before : (needed for how black formats slicing)
"E203",
Expand All @@ -161,6 +161,8 @@ ignore=[
# We don't care much about F841.
# Local variable ... is assigned to but never used
"F841",
# `try`-`except` within a loop incurs performance overhead
"PERF203",
# some RUF rules trigger too many changes
"RUF002",
"RUF003",
Expand Down
44 changes: 21 additions & 23 deletions sklearn/covariance/_robust_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,35 +314,33 @@ def select_candidates(
)

# compute `n_trials` location and shape estimates candidates in the subset
all_estimates = []
if not run_from_estimates:
# perform `n_trials` computations from random initial supports
for j in range(n_trials):
all_estimates.append(
_c_step(
X,
n_support,
remaining_iterations=n_iter,
verbose=verbose,
cov_computation_method=cov_computation_method,
random_state=random_state,
)
all_estimates = [
_c_step(
X,
n_support,
remaining_iterations=n_iter,
verbose=verbose,
cov_computation_method=cov_computation_method,
random_state=random_state,
)
for j in range(n_trials)
]
else:
# perform computations from every given initial estimates
for j in range(n_trials):
initial_estimates = (estimates_list[0][j], estimates_list[1][j])
all_estimates.append(
_c_step(
X,
n_support,
remaining_iterations=n_iter,
initial_estimates=initial_estimates,
verbose=verbose,
cov_computation_method=cov_computation_method,
random_state=random_state,
)
all_estimates = [
_c_step(
X,
n_support,
remaining_iterations=n_iter,
initial_estimates=(estimates_list[0][j], estimates_list[1][j]),
verbose=verbose,
cov_computation_method=cov_computation_method,
random_state=random_state,
)
for j in range(n_trials)
]
all_locs_sub, all_covs_sub, all_dets_sub, all_supports_sub, all_ds_sub = zip(
*all_estimates
)
Expand Down
8 changes: 4 additions & 4 deletions sklearn/datasets/_arff_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,10 @@ def _io_to_generator(gzip_file):
# read arff data with chunks
columns_to_keep = [col for col in columns_names if col in columns_to_select]
dfs = [first_df[columns_to_keep]]
for data in chunk_generator(arff_container["data"], chunksize):
dfs.append(
pd.DataFrame(data, columns=columns_names, copy=False)[columns_to_keep]
)
dfs.extend(
pd.DataFrame(data, columns=columns_names, copy=False)[columns_to_keep]
for data in chunk_generator(arff_container["data"], chunksize)
)
Comment on lines +199 to +202
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

most of the changes in this PR are insignificant. But in places like this one, it can actually add quite a bit of value.

Therefore I don't mind having these rules enabled. In reality, once this PR is merged, we won't have much of an issue with the rule itself, since it affects a tiny portion of the code we write, and very often for the better.

# dfs[0] contains only one row, which may not have enough data to infer to
# column's dtype. Here we use `dfs[1]` to configure the dtype in dfs[0]
if len(dfs) >= 2:
Expand Down
4 changes: 1 addition & 3 deletions sklearn/datasets/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,7 @@ def load_files(
target = target[indices]

if load_content:
data = []
for filename in filenames:
data.append(Path(filename).read_bytes())
data = [Path(filename).read_bytes() for filename in filenames]
if encoding is not None:
data = [d.decode(encoding, decode_error) for d in data]
return Bunch(
Expand Down
10 changes: 5 additions & 5 deletions sklearn/datasets/_openml.py
98BF
Original file line number Diff line number Diff line change
Expand Up @@ -741,15 +741,15 @@ def _valid_data_column_names(features_list, target_columns):
# OpenML guide follows that columns that have the `is_row_identifier` or
# `is_ignore` flag, these can not be learned on. Also target columns are
# excluded.
valid_data_column_names = []
for feature in features_list:
return [
feature["name"]
for feature in features_list
if (
feature["name"] not in target_columns
and feature["is_ignore"] != "true"
and feature["is_row_identifier"] != "true"
):
valid_data_column_names.append(feature["name"])
return valid_data_column_names
)
]


@validate_params(
Expand Down
4 changes: 3 additions & 1 deletion sklearn/decomposition/_incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,9 @@ def transform(self, X):
for batch in gen_batches(
n_samples, self.batch_size_, min_batch_size=self.n_components or 0
):
output.append(super().transform(X[batch].toarray()))
output.append( # noqa: PERF401 # FIXME
super().transform(X[batch].toarray())
)
return np.vstack(output)
else:
return super().transform(X)
Expand Down
3 changes: 1 addition & 2 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,7 @@ def _concatenate_predictions(self, X, predictions):
# Since those probabilities must sum to one for each sample,
# we can work with probabilities of `n_classes - 1` classes.
# Hence we drop the first column.
for pred in preds:
X_meta.append(pred[:, 1:])
X_meta.extend(pred[:, 1:] for pred in preds)
elif preds.ndim == 1:
# Some estimator return a 1D array for predictions
# which must be 2-dimensional arrays.
Expand Down
44 changes: 22 additions & 22 deletions sklearn/gaussian_process/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,12 @@ def theta(self):
theta : ndarray of shape (n_dims,)
The non-fixed, log-transformed hyperparameters of the kernel
"""
theta = []
params = self.get_params()
for hyperparameter in self.hyperparameters:
if not hyperparameter.fixed:
theta.append(params[hyperparameter.name])
theta = [
params[hyperparameter.name]
for hyperparameter in self.hyperparameters
if not hyperparameter.fixed
]
if len(theta) > 0:
return np.log(np.hstack(theta))
else:
Expand Down Expand Up @@ -719,15 +720,16 @@ def hyperparameters(self):
for hyperparameter in self.k1.hyperparameters
]

for hyperparameter in self.k2.hyperparameters:
r.append(
Hyperparameter(
"k2__" + hyperparameter.name,
hyperparameter.value_type,
hyperparameter.bounds,
hyperparameter.n_elements,
)
r.extend(
Hyperparameter(
"k2__" + hyperparameter.name,
hyperparameter.value_type,
hyperparameter.bounds,
hyperparameter.n_elements,
)
for hyperparameter in self.k2.hyperparameters
)

return r

@property
Expand Down Expand Up @@ -1056,17 +1058,15 @@ def get_params(self, deep=True):
@property
def hyperparameters(self):
"""Returns a list of all hyperparameter."""
r = []
for hyperparameter in self.kernel.hyperparameters:
r.append(
Hyperparameter(
"kernel__" + hyperparameter.name,
hyperparameter.value_type,
hyperparameter.bounds,
hyperparameter.n_elements,
)
return [
Hyperparameter(
"kernel__" + hyperparameter.name,
hyperparameter.value_type,
hyperparameter.bounds,
hyperparameter.n_elements,
)
return r
for hyperparameter in self.kernel.hyperparameters
]

@property
def theta(self):
Expand Down
3 changes: 1 addition & 2 deletions sklearn/manifold/tests/test_spectral_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ def test_sparse_graph_connected_component(coo_container):
group = p[start:stop]
# Connect all elements within the group at least once via an
# arbitrary path that spans the group.
for i in range(len(group) - 1):
connections.append((group[i], group[i + 1]))
connections.extend((group[i], group[i + 1]) for i in range(len(group) - 1))

# Add some more random connections within the group
min_idx, max_idx = 0, len(group) - 1
Expand Down
8 changes: 6 additions & 2 deletions sklearn/model_selection/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,8 +1175,12 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
_store("fit_time", out["fit_time"])
_store("score_time", out["score_time"])
# Store a list of param dicts at the key 'params'
for param, ma in _yield_masked_array_for_each_param(candidate_params):
results[param] = ma
results.update(
{
param: ma
for param, ma in _yield_masked_array_for_each_param(candidate_params)
}
)
results["params"] = candidate_params

test_scores_dict = _normalize_score_results(out["test_scores"])
Expand Down
9 changes: 5 additions & 4 deletions sklearn/model_selection/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2091,10 +2091,11 @@ def learning_curve(
)
out = np.asarray(out).transpose((2, 1, 0))
else:
train_test_proportions = []
for train, test in cv_iter:
for n_train_samples in train_sizes_abs:
train_test_proportions.append((train[:n_train_samples], test))
train_test_proportions = [
(train[:n_train_samples], test)
for train, test in cv_iter
for n_train_samples in train_sizes_abs
]

results = parallel(
delayed(_fit_and_score)(
Expand Down
9 changes: 5 additions & 4 deletions sklearn/preprocessing/_polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,10 +816,11 @@ def get_feature_names_out(self, input_features=None):
n_splines = self.bsplines_[0].c.shape[1]

input_features = _check_feature_names_in(self, input_features)
feature_names = []
for i in range(self.n_features_in_):
for j in range(n_splines - 1 + self.include_bias):
feature_names.append(f"{input_features[i]}_sp_{j}")
feature_names = [
f"{input_features[i]}_sp_{j}"
for i in range(self.n_features_in_)
for j in range(n_splines - 1 + self.include_bias)
]
return np.asarray(feature_names, dtype=object)

@_fit_context(prefer_skip_nested_validation=True)
Expand Down
Loading
0