8000 Lazily import parallelized format modules · IBMZ-Linux-OSS-Python/black@fe632e6 · GitHub
[go: up one dir, main page]

Skip to content

Commit fe632e6

Browse files
ichard26fabioz
andcommitted
Lazily import parallelized format modules
`black.reformat_many` depends on a lot of slow-to-import modules. When formatting simply a single file, the time paid to import those modules is totally wasted. So I moved `black.reformat_many` and its helpers to `black.concurrency` which is now *only* imported if there's more than one file to reformat. This way, running Black over a single file is snappier Here are the numbers before and after this patch running `python -m black --version`: - interpreted: 411 ms +- 9 ms -> 342 ms +- 7 ms: 1.20x faster - compiled: 365 ms +- 15 ms -> 304 ms +- 7 ms: 1.20x faster Co-authored-by: Fabio Zadrozny <fabiofz@gmail.com>
1 parent 507234c commit fe632e6

File tree

5 files changed

+172
-145
lines changed

5 files changed

+172
-145
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575

7676
<!-- Changes that improve Black's performance. -->
7777

78+
- Reduce Black's startup time when formatting a single file by 15-30% (#3208)
79+
7880
## 22.6.0
7981

8082
### Style

docs/contributing/reference/reference_functions.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Formatting
5252

5353
.. autofunction:: black.reformat_one
5454

55-
.. autofunction:: black.schedule_formatting
55+
.. autofunction:: black.concurrency.schedule_formatting
5656

5757
File operations
5858
---------------
@@ -173,7 +173,7 @@ Utilities
173173

174174
.. autofunction:: black.linegen.should_split_line
175175

176-
.. autofunction:: black.shutdown
176+
.. autofunction:: black.concurrency.shutdown
177177

178178
.. autofunction:: black.strings.sub_twice
179179

src/black/__init__.py

Lines changed: 19 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
import asyncio
21
import io
32
import json
43
import os
54
import platform
65
import re
7-
import signal
86
import sys
97
import tokenize
108
import traceback
@@ -13,10 +11,8 @@
1311
from datetime import datetime
1412
from enum import Enum
1513
from json.decoder import JSONDecodeError
16-
from multiprocessing import Manager, freeze_support
1714
from pathlib import Path
1815
from typing import (
19-
TYPE_CHECKING,
2016
Any,
2117
Dict,
2218
Generator,
@@ -32,15 +28,19 @@
3228
Union,
3329
)
3430

31+
if sys.version_info >= (3, 8):
32+
from typing import Final
33+
else:
34+
from typing_extensions import Final
35+
3536
import click
3637
from click.core import ParameterSource
3738
from mypy_extensions import mypyc_attr
3839
from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
3940

4041
from _black_version import version as __version__
41-
from black.cache import Cache, filter_cached, get_cache_info, read_cache, write_cache
42+
from black.cache import Cache, get_cache_info, read_cache, write_cache
4243
from black.comments import normalize_fmt_off
43-
from black.concurrency import cancel, maybe_install_uvloop, shutdown
4444
from black.const import (
4545
DEFAULT_EXCLUDES,
4646
DEFAULT_INCLUDES,
@@ -90,10 +90,10 @@
9090
from blib2to3.pgen2 import token
9191
from blib2to3.pytree import Leaf, Node
9292

93-
if TYPE_CHECKING:
94-
from concurrent.futures import Executor
95-
9693
COMPILED = Path(__file__).suffix in (".pyd", ".so")
94+
DEFAULT_WORKERS: Final = os.cpu_count()
95+
# Used to know whether uvloop should be installed.
96+
__BLACK_MAIN_CALLED__ = False
9797

9898
# types
9999
FileContent = str
@@ -124,8 +124,6 @@ def from_configuration(
124124
# Legacy name, left for integrations.
125125
FileMode = Mode
126126

127-
DEFAULT_WORKERS = os.cpu_count()
128-
129127

130128
def read_pyproject_toml(
131129
ctx: click.Context, param: click.Parameter, value: Optional[str]
@@ -587,6 +585,8 @@ def main( # noqa: C901
587585
report=report,
588586
)
589587
else:
588+
from black.concurrency import reformat_many
589+
590590
reformat_many(
591591
sources=sources,
592592
fast=fast,
@@ -771,132 +771,6 @@ def reformat_one(
771771
report.failed(src, str(exc))
772772

773773

774-
# diff-shades depends on being to monkeypatch this function to operate. I know it's
775-
# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
776-
@mypyc_attr(patchable=True)
777-
def reformat_many(
778-
sources: Set[Path],
779-
fast: bool,
780-
write_back: WriteBack,
781-
mode: Mode,
782-
report: "Report",
783-
workers: Optional[int],
784-
) -> None:
785-
"""Reformat multiple files using a ProcessPoolExecutor."""
786-
from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
787-
788-
executor: Executor
789-
worker_count = workers if workers is not None else DEFAULT_WORKERS
790-
if sys.platform == "win32":
791-
# Work around https://bugs.python.org/issue26903
792-
assert worker_count is not None
793-
worker_count = min(worker_count, 60)
794-
try:
795-
executor = ProcessPoolExecutor(max_workers=worker_count)
796-
except (ImportError, NotImplementedError, OSError):
797-
# we arrive here if the underlying system does not support multi-processing
798-
# like in AWS Lambda or Termux, in which case we gracefully fallback to
799-
# a ThreadPoolExecutor with just a single worker (more workers would not do us
800-
# any good due to the Global Interpreter Lock)
801-
executor = ThreadPoolExecutor(max_workers=1)
802-
803-
loop = asyncio.new_event_loop()
804-
asyncio.set_event_loop(loop)
805-
try:
806-
loop.run_until_complete(
807-
schedule_formatting(
808-
sources=sources,
809-
fast=fast,
810-
write_back=write_back,
811-
mode=mode,
812-
report=report,
813-
loop=loop,
814-
executor=executor,
815-
)
816-
)
817-
finally:
818-
try:
819-
shutdown(loop)
820-
finally:
821-
asyncio.set_event_loop(None)
822-
if executor is not None:
823-
executor.shutdown()
824-
825-
826-
async def schedule_formatting(
827-
sources: Set[Path],
828-
fast: bool,
829-
write_back: WriteBack,
830-
mode: Mode,
831-
report: "Report",
832-
loop: asyncio.AbstractEventLoop,
833-
executor: "Executor",
834-
) -> None:
835-
"""Run formatting of `sources` in parallel using the provided `executor`.
836-
837-
(Use ProcessPoolExecutors for actual parallelism.)
838-
839-
`write_back`, `fast`, and `mode` options are passed to
840-
:func:`format_file_in_place`.
841-
"""
842-
cache: Cache = {}
843-
if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
844-
cache = read_cache(mode)
845-
sources, cached = filter_cached(cache, sources)
846-
for src in sorted(cached):
847-
report.done(src, Changed.CACHED)
848-
if not sources:
849-
return
850-
851-
cancelled = []
852-
sources_to_cache = []
853-
lock = None
854-
if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
855-
# For diff output, we need locks to ensure we don't interleave output
856-
# from different processes.
857-
manager = Manager()
858-
lock = manager.Lock()
859-
tasks = {
860-
asyncio.ensure_future(
861-
loop.run_in_executor(
862-
executor, format_file_in_place, src, fast, mode, write_back, lock
863-
)
864-
): src
865-
for src in sorted(sources)
866-
}
867-
pending = tasks.keys()
868-
try:
869-
loop.add_signal_handler(signal.SIGINT, cancel, pending)
870-
loop.add_signal_handler(signal.SIGTERM, cancel, pending)
871-
except NotImplementedError:
872-
# There are no good alternatives for these on Windows.
873-
pass
874-
while pending:
875-
done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
876-
for task in done:
877-
src = tasks.pop(task)
878-
if task.cancelled():
879-
cancelled.append(task)
880-
elif task.exception():
881-
report.failed(src, str(task.exception()))
882-
else:
883-
changed = Changed.YES if task.result() else Changed.NO
884-
# If the file was written back or was successfully checked as
885-
# well-formatted, store this information in the cache.
886-
if write_back is WriteBack.YES or (
887-
write_back is WriteBack.CHECK and changed is Changed.NO
888-
):
889-
sources_to_cache.append(src)
890-
report.done(src, changed)
891-
if cancelled:
892-
if sys.version_info >= (3, 7):
893-
await asyncio.gather(*cancelled, return_exceptions=True)
894-
else:
895-
await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
896-
if sources_to_cache:
897-
write_cache(cache, sources_to_cache, mode)
898-
899-
900774
def format_file_in_place(
901775
src: Path,
902776
fast: bool,
@@ -1495,8 +1369,14 @@ def patch_click() -> None:
14951369

14961370

14971371
def patched_main() -> None:
1498-
maybe_install_uvloop()
1499-
freeze_support()
1372+
global __BLACK_MAIN_CALLED__
1373+
__BLACK_MAIN_CALLED__ = True
1374+
1375+
if sys.platform == "win32" and getattr(sys, "frozen", False):
1376+
from multiprocessing import freeze_support
1377+
1378+
freeze_support()
1379+
15001380
patch_click()
15011381
main()
15021382

0 commit comments

Comments
 (0)
0