8000 STY Run black · scikit-learn/scikit-learn@b9478e0 · GitHub
[go: up one dir, main page]

Skip to content

Commit b9478e0

Browse files
committed
STY Run black
1 parent 9b2ae4e commit b9478e0

File tree

512 files changed

+59892
-42633
lines changed
  • compose
  • covariance
  • cross_decomposition
  • datasets
  • decomposition
  • ensemble
  • experimental
  • feature_extraction
  • feature_selection
  • gaussian_process
  • impute
  • inspection
  • linear_model
  • manifold
  • metrics
  • mixture
  • model_selection
  • neighbors
  • neural_network
  • preprocessing
  • semi_supervised
  • svm
  • tests
  • tree
  • utils
  • Some content is hidden

    Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

    512 files changed

    +59892
    -42633
    lines changed

    .github/scripts/label_title_regex.py

    Lines changed: 2 additions & 8 deletions
    Original file line numberDiff line numberDiff line change
    @@ -15,15 +15,9 @@
    1515
    title = issue.title
    1616

    1717

    18-
    regex_to_labels = [
    19-
    (r"\bDOC\b", "Documentation"),
    20-
    (r"\bCI\b", "Build / CI")
    21-
    ]
    18+
    regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
    2219

    23-
    labels_to_add = [
    24-
    label for regex, label in regex_to_labels
    25-
    if re.search(regex, title)
    26-
    ]
    20+
    labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
    2721

    2822
    if labels_to_add:
    2923
    issue.add_to_labels(*labels_to_add)

    asv_benchmarks/benchmarks/cluster.py

    Lines changed: 43 additions & 39 deletions
    Original file line numberDiff line numberDiff line change
    @@ -10,16 +10,16 @@ class KMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
    1010
    Benchmarks for KMeans.
    1111
    """
    1212

    13-
    param_names = ['representation', 'algorithm', 'init']
    14-
    params = (['dense', 'sparse'], ['full', 'elkan'], ['random', 'k-means++'])
    13+
    param_names = ["representation", "algorithm", "init"]
    14+
    params = (["dense", "sparse"], ["full", "elkan"], ["random", "k-means++"])
    1515

    1616
    def setup_cache(self):
    1717
    super().setup_cache()
    1818

    1919
    def make_data(self, params):
    2020
    representation, algorithm, init = params
    2121

    22-
    if representation == 'sparse':
    22+
    if representation == "sparse":
    2323
    data = _20newsgroups_highdim_dataset(n_samples=8000)
    2424
    else:
    2525
    data = _blobs_dataset(n_clusters=20)
    @@ -29,44 +29,46 @@ def make_data(self, params):
    2929
    def make_estimator(self, params):
    3030
    representation, algorithm, init = params
    3131

    32-
    max_iter = 30 if representation == 'sparse' else 100
    32+
    max_iter = 30 if representation == "sparse" else 100
    3333

    34-
    estimator = KMeans(n_clusters=20,
    35-
    algorithm=algorithm,
    36-
    init=init,
    37-
    n_init=1,
    38-
    max_iter=max_iter,
    39-
    tol=-1,
    40-
    57AE random_state=0)
    34+
    estimator = KMeans(
    35+
    n_clusters=20,
    36+
    algorithm=algorithm,
    37+
    init=init,
    38+
    n_init=1,
    39+
    max_iter=max_iter,
    40+
    tol=-1,
    41+
    random_state=0,
    42+
    )
    4143

    4244
    return estimator
    4345

    4446
    def make_scorers(self):
    45-
    self.train_scorer = (
    46-
    lambda _, __: neg_mean_inertia(self.X,
    47-
    self.estimator.predict(self.X),
    48-
    self.estimator.cluster_centers_))
    49-
    self.test_scorer = (
    50-
    lambda _, __: neg_mean_inertia(self.X_val,
    51-
    self.estimator.predict(self.X_val),
    52-
    self.estimator.cluster_centers_))
    47+
    self.train_scorer = lambda _, __: neg_mean_inertia(
    48+
    self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
    49+
    )
    50+
    self.test_scorer = lambda _, __: neg_mean_inertia(
    51+
    self.X_val,
    52+
    self.estimator.predict(self.X_val),
    53+
    self.estimator.cluster_centers_,
    54+
    )
    5355

    5456

    5557
    class MiniBatchKMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
    5658
    """
    5759
    Benchmarks for MiniBatchKMeans.
    5860
    """
    5961

    60-
    param_names = ['representation', 'init']
    61-
    params = (['dense', 'sparse'], ['random', 'k-means++'])
    62+
    param_names = ["representation", "init"]
    63+
    params = (["dense", "sparse"], ["random", "k-means++"])
    6264

    6365
    def setup_cache(self):
    6466
    super().setup_cache()
    6567

    6668
    def make_data(self, params):
    6769
    representation, init = params
    6870

    69-
    if representation == 'sparse':
    71+
    if representation == "sparse":
    7072
    data = _20newsgroups_highdim_dataset()
    7173
    else:
    7274
    data = _blobs_dataset(n_clusters=20)
    @@ -76,25 +78,27 @@ def make_data(self, params):
    7678
    def make_estimator(self, params):
    7779
    representation, init = params
    7880

    79-
    max_iter = 5 if representation == 'sparse' else 2
    81+
    max_iter = 5 if representation == "sparse" else 2
    8082

    81-
    estimator = MiniBatchKMeans(n_clusters=20,
    82-
    init=init,
    83-
    n_init=1,
    84-
    max_iter=max_iter,
    85-
    batch_size=1000,
    86-
    max_no_improvement=None,
    87-
    compute_labels=False,
    88-
    random_state=0)
    83+
    estimator = MiniBatchKMeans(
    84+
    n_clusters=20,
    85+
    init=init,
    86+
    n_init=1,
    87+
    max_iter=max_iter,
    88+
    batch_size=1000,
    89+
    max_no_improvement=None,
    90+
    compute_labels=False,
    91+
    random_state=0,
    92+
    )
    8993

    9094
    return estimator
    9195

    9296
    def make_scorers(self):
    93-
    self.train_scorer = (
    94-
    lambda _, __: neg_mean_inertia(self.X,
    95-
    self.estimator.predict(self.X),
    96-
    self.estimator.cluster_centers_))
    97-
    self.test_scorer = (
    98-
    lambda _, __: neg_mean_inertia(self.X_val,
    99-
    self.estimator.predict(self.X_val),
    100-
    self.estimator.cluster_centers_))
    97+
    self.train_scorer = lambda _, __: neg_mean_inertia(
    98+
    self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
    99+
    )
    100+
    self.test_scorer = lambda _, __: neg_mean_inertia(
    101+
    self.X_val,
    102+
    self.estimator.predict(self.X_val),
    103+
    self.estimator.cluster_centers_,
    104+
    )

    asv_benchmarks/benchmarks/common.py

    Lines changed: 73 additions & 52 deletions
    Original file line numberDiff line numberDiff line change
    @@ -14,86 +14,102 @@ def get_from_config():
    1414
    """Get benchmarks configuration from the config.json file"""
    1515
    current_path = Path(__file__).resolve().parent
    1616

    17-
    config_path = current_path / 'config.json'
    18-
    with open(config_path, 'r') as config_file:
    19-
    config_file = ''.join(line for line in config_file
    20-
    if line and '//' not in line)
    17+
    config_path = current_path / "config.json"
    18+
    with open(config_path, "r") as config_file:
    19+
    config_file = "".join(line for line in config_file if line and "//" not in line)
    2120
    config = json.loads(config_file)
    2221

    23-
    profile = os.getenv('SKLBENCH_PROFILE', config['profile'])
    22+
    profile = os.getenv("SKLBENCH_PROFILE", config["profile"])
    2423

    25-
    n_jobs_vals_env = os.getenv('SKLBENCH_NJOBS')
    24+
    n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS")
    2625
    if n_jobs_vals_env:
    2726
    n_jobs_vals = eval(n_jobs_vals_env)
    2827
    else:
    29-
    n_jobs_vals = config['n_jobs_vals']
    28+
    n_jobs_vals = config["n_jobs_vals"]
    3029
    if not n_jobs_vals:
    3130
    n_jobs_vals = list(range(1, 1 + cpu_count()))
    3231

    33-
    cache_path = current_path / 'cache'
    32+
    cache_path = current_path / "cache"
    3433
    cache_path.mkdir(exist_ok=True)
    35-
    (cache_path / 'estimators').mkdir(exist_ok=True)
    36-
    (cache_path / 'tmp').mkdir(exist_ok=True)
    34+
    (cache_path / "estimators").mkdir(exist_ok=True)
    35+
    (cache_path / "tmp").mkdir(exist_ok=True)
    3736

    38-
    save_estimators = os.getenv('SKLBENCH_SAVE_ESTIMATORS',
    39-
    config['save_estimators'])
    40-
    save_dir = os.getenv('ASV_COMMIT', 'new')[:8]
    37+
    save_estimators = os.getenv("SKLBENCH_SAVE_ESTIMATORS", config["save_estimators"])
    38+
    save_dir = os.getenv("ASV_COMMIT", "new")[:8]
    4139

    4240
    if save_estimators:
    43-
    (cache_path / 'estimators' / save_dir).mkdir(exist_ok=True)
    41+
    (cache_path / "estimators" / save_dir).mkdir(exist_ok=True)
    4442

    45-
    base_commit = os.getenv('SKLBENCH_BASE_COMMIT', config['base_commit'])
    43+
    base_commit = os.getenv("SKLBENCH_BASE_COMMIT", config["base_commit"])
    4644

    47-
    bench_predict = os.getenv('SKLBENCH_PREDICT', config['bench_predict'])
    48-
    bench_transform = os.getenv('SKLBENCH_TRANSFORM',
    49-
    config['bench_transform'])
    45+
    bench_predict = os.getenv("SKLBENCH_PREDICT", config["bench_predict"])
    46+
    bench_transform = os.getenv("SKLBENCH_TRANSFORM", config["bench_transform"])
    5047

    51-
    return (profile, n_jobs_vals, save_estimators, save_dir, base_commit,
    52-
    bench_predict, bench_transform)
    48+
    return (
    49+
    profile,
    50+
    n_jobs_vals,
    51+
    save_estimators,
    52+
    save_dir,
    53+
    base_commit,
    54+
    bench_predict,
    55+
    bench_transform,
    56+
    )
    5357

    5458

    5559
    def get_estimator_path(benchmark, directory, params, save=False):
    5660
    """Get path of pickled fitted estimator"""
    57-
    path = Path(__file__).resolve().parent / 'cache'
    58-
    path = (path / 'estimators' / directory) if save else (path / 'tmp')
    61+
    path = Path(__file__).resolve().parent / "cache"
    62+
    path = (path / "estimators" / directory) if save else (path / "tmp")
    5963

    60-
    filename = (benchmark.__class__.__name__
    61-
    + '_estimator_' + '_'.join(list(map(str, params))) + '.pkl')
    64+
    filename = (
    65+
    benchmark.__class__.__name__
    66+
    + "_estimator_"
    67+
    + "_".join(list(map(str, params)))
    68+
    + ".pkl"
    69+
    )
    6270

    6371
    return path / filename
    6472

    6573

    6674
    def clear_tmp():
    6775
    """Clean the tmp directory"""
    68-
    path = Path(__file__).resolve().parent / 'cache' / 'tmp'
    76+
    path = Path(__file__).resolve().parent / "cache" / "tmp"
    6977
    for child in path.iterdir():
    7078
    child.unlink()
    7179

    7280

    7381
    class Benchmark(ABC):
    7482
    """Abstract base class for all the benchmarks"""
    83+
    7584
    timer = timeit.default_timer # wall time
    7685
    processes = 1
    7786
    timeout = 500
    7887

    79-
    (profile, n_jobs_vals, save_estimators, save_dir, base_commit,
    80-
    bench_predict, bench_transform) = get_from_config()
    81-
    82-
    if profile == 'fast':
    88+
    (
    89+
    profile,
    90+
    n_jobs_vals,
    91+
    save_estimators,
    92+
    save_dir,
    93+
    base_commit,
    94+
    bench_predict,
    95+
    bench_transform,
    96+
    ) = get_from_config()
    97+
    98+
    if profile == "fast":
    8399
    warmup_time = 0
    84100
    repeat = 1
    85101
    number = 1
    86102
    min_run_count = 1
    87-
    data_size = 'small'
    88-
    elif profile == 'regular':
    103+
    data_size = "small"
    104+
    elif profile == "regular":
    89105
    warmup_time = 1
    90106
    repeat = (3, 100, 30)
    91-
    data_size = 'small'
    92-
    elif profile == 'large_scale':
    107+
    data_size = "small"
    108+
    elif profile == "large_scale":
    93109
    warmup_time = 1
    94110
    repeat = 3
    95111
    number = 1
    96-
    data_size = 'large'
    112+
    data_size = "large"
    97113

    98114
    @property
    99115
    @abstractmethod
    @@ -103,6 +119,7 @@ def params(self):
    103119

    104120
    class Estimator(ABC):
    105121
    """Abstract base class for all benchmarks of estimators"""
    122+
    106123
    @abstractmethod
    107124
    def make_data(self, params):
    108125
    """Return the dataset for a combination of parameters"""
    @@ -112,8 +129,7 @@ def make_data(self, params):
    112129

    113130
    @abstractmethod
    114131
    def make_estimator(self, params):
    115-
    """Return an instance of the estimator for a combination of parameters
    116-
    """
    132+
    """Return an instance of the estimator for a combination of parameters"""
    117133
    pass
    118134

    119135
    def skip(self, params):
    @@ -137,9 +153,10 @@ def setup_cache(self):
    137153

    138154
    estimator.fit(X, y)
    139155

    140-
    est_path = get_estimator_path(self, Benchmark.save_dir,
    141-
    params, Benchmark.save_estimators)
    142-
    with est_path.open(mode='wb') as f:
    156+
    est_path = get_estimator_path(
    157+
    self, Benchmark.save_dir, params, Benchmark.save_estimators
    158+
    )
    159+
    with est_path.open(mode="wb") as f:
    143160
    pickle.dump(estimator, f)
    144161

    145162
    def setup(self, *params):
    @@ -152,9 +169,10 @@ def setup(self, *params):
    152169

    153170
    self.X, self.X_val, self.y, self.y_val = self.make_data(params)
    154171

    155-
    est_path = get_estimator_path(self, Benchmark.save_dir,
    156-
    params, Benchmark.save_estimators)
    157-
    with est_path.open(mode='rb') as f:
    172+
    est_path = get_estimator_path(
    173+
    self, Benchmark.save_dir, params, Benchmark.save_estimators
    174+
    )
    175+
    with est_path.open(mode="rb") as f:
    158176
    self.estimator = pickle.load(f)
    159177

    160178
    self.make_scorers()
    @@ -166,14 +184,14 @@ def peakmem_fit(self, *args):
    166184
    self.estimator.fit(self.X, self.y)
    167185

    168186
    def track_train_score(self, *args):
    169-
    if hasattr(self.estimator, 'predict'):
    187+
    if hasattr(self.estimator, "predict"):
    170188
    y_pred = self.estimator.predict(self.X)
    171189
    else:
    172190
    y_pred = None
    173191
    return float(self.train_scorer(self.y, y_pred))
    174192

    175193
    def track_test_score(self, *args):
    176-
    if hasattr(self.estimator, 'predict'):
    194+
    if hasattr(self.estimator, "predict"):
    177195
    y_val_pred = self.estimator.predict(self.X_val)
    178196
    else:
    179197
    y_val_pred = None
    @@ -182,18 +200,20 @@ def track_test_score(self, *args):
    182200

    183201
    class Predictor(ABC):
    184202
    """Abstract base class for benchmarks of estimators implementing predict"""
    203+
    185204
    if Benchmark.bench_predict:
    205+
    186206
    def time_predict(self, *args):
    187207
    self.estimator.predict(self.X)
    188208

    189209
    def peakmem_predict(self, *args):
    190210
    self.estimator.predict(self.X)
    191211

    192212
    if Benchmark.base_commit is not None:
    213+
    193214
    def track_same_prediction(self, *args):
    194-
    est_path = get_estimator_path(self, Benchmark.base_commit,
    195-
    args, True)
    196-
    with est_path.open(mode='rb') as f:
    215+
    est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
    216+
    with est_path.open(mode="rb") as f:
    197217
    estimator_base = pickle.load(f)
    198218

    199219
    y_val_pred_base = estimator_base.predict(self.X_val)
    @@ -208,20 +228,21 @@ def params(self):
    208228

    209229

    210230
    class Transformer(ABC):
    211-
    """Abstract base class for benchmarks of estimators implementing transform
    212-
    """
    231+
    """Abstract base class for benchmarks of estimators implementing transform"""
    232+
    213233
    if Benchmark.bench_transform:
    234+
    214235
    def time_transform(self, *args):
    215236
    self.estimator.transform(self.X)
    216237

    217238
    def peakmem_transform(self, *args):
    218239
    self.estimator.transform(self.X)
    219240

    220241
    if Benchmark.base_commit is not None:
    242+
    221243
    def track_same_transform(self, *args):
    222-
    est_path = get_estimator_path(self, Benchmark.base_commit,
    223-
    args, True)
    224-
    with est_path.open(mode='rb') as f:
    244+
    est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
    245+
    with est_path.open(mode="rb") as f:
    225246
    estimator_base = pickle.load(f)
    226247

    227248
    X_val_t_base = estimator_base.transform(self.X_val)

    0 commit comments

    Comments
     (0)
    0