8000 gh-101282: Apply BOLT optimisations to libpython for shared builds by indygreg · Pull Request #104709 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-101282: Apply BOLT optimisations to libpython for shared builds #104709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
*.gc??
*.profclang?
*.profraw
# Copies of binaries before BOLT optimizations.
*.prebolt
# BOLT profile data.
*.fdata
*.dyn
.gdb_history
.purify
Expand Down Expand Up @@ -124,6 +128,7 @@ Tools/unicode/data/
/platform
/profile-clean-stamp
/profile-run-stamp
/profile-bolt-stamp
/Python/deepfreeze/*.c
/pybuilddir.txt
/pyconfig.h
Expand Down
7 changes: 7 additions & 0 deletions Doc/using/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,13 @@ also be used to improve performance.
is dependent on a combination of the build environment + the other
optimization configure args + the CPU architecture, and not all combinations
are supported.
BOLT versions before LLVM 16 are known to crash BOLT under some scenarios.
Use of LLVM 16 or newer for BOLT optimization is strongly encouraged.

The :envvar:`!BOLT_INSTRUMENT_FLAGS` and :envvar:`!BOLT_APPLY_FLAGS`
:program:`configure` variables can be defined to override the default set of
arguments for :program:`llvm-bolt` to instrument and apply BOLT data to
binaries, respectively.

.. versionadded:: 3.12

Expand Down
65 changes: 50 additions & 15 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -672,21 +672,55 @@ profile-opt: profile-run-stamp
-rm -f profile-clean-stamp
$(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)"

.PHONY: bolt-opt
bolt-opt: @PREBOLT_RULE@
# List of binaries that BOLT runs on.
BOLT_BINARIES := @BOLT_BINARIES@

BOLT_INSTRUMENT_FLAGS := @BOLT_INSTRUMENT_FLAGS@
BOLT_APPLY_FLAGS := @BOLT_APPLY_FLAGS@

.PHONY: clean-bolt
clean-bolt:
# Profile data.
rm -f *.fdata
@if $(READELF) -p .note.bolt_info $(BUILDPYTHON) | grep BOLT > /dev/null; then\
echo "skip: $(BUILDPYTHON) is already BOLTed."; \
else \
@LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst; \
./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true; \
@MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata; \
@LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot; \
rm -f *.fdata; \
rm -f $(BUILDPYTHON).bolt_inst; \
mv $(BUILDPYTHON).bolt $(BUILDPYTHON); \
fi
# Pristine binaries before BOLT optimization.
rm -f *.prebolt
# BOLT instrumented binaries.
rm -f *.bolt_inst

profile-bolt-stamp: $(BUILDPYTHON)
# Ensure a pristine, pre-BOLT copy of the binary and no profile data from last run.
for bin in $(BOLT_BINARIES); do \
prebolt="$${bin}.prebolt"; \
if [ -e "$${prebolt}" ]; then \
echo "Restoring pre-BOLT binary $${prebolt}"; \
mv "$${bin}.prebolt" "$${bin}"; \
fi; \
cp "$${bin}" "$${prebolt}"; \
rm -f $${bin}.bolt.*.fdata $${bin}.fdata; \
done
# Instrument each binary.
for bin in $(BOLT_BINARIES); do \
@LLVM_BOLT@ "$${bin}" -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $${bin}.bolt) -o $${bin}.bolt_inst $(BOLT_INSTRUMENT_FLAGS); \
mv "$${bin}.bolt_inst" "$${bin}"; \
done
# Run instrumented binaries to collect data.
$(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true
# Merge all the data files together.
for bin in $(BOLT_BINARIES); do \
@MERGE_FDATA@ $${bin}.*.fdata > "$${bin}.fdata"; \
rm -f $${bin}.*.fdata; \
done
# Run bolt against the merged data to produce an optimized binary.
for bin in $(BOLT_BINARIES); do \
@LLVM_BOLT@ "$${bin}.prebolt" -o "$${bin}.bolt" -data="$${bin}.fdata" $(BOLT_APPLY_FLAGS); \
mv "$${bin}.bolt" "$${bin}"; \
done
touch $@

.PHONY: bolt-opt
bolt-opt:
$(MAKE) @PREBOLT_RULE@
$(MAKE) profile-bolt-stamp

# Compile and run with gcov
.PHONY: coverage
Expand Down Expand Up @@ -2623,10 +2657,11 @@ profile-removal:
rm -f $(COVERAGE_INFO)
rm -rf $(COVERAGE_REPORT)
rm -f profile-run-stamp
rm -f profile-bolt-stamp

.PHONY: clean
clean: clean-retain-profile
@if test @DEF_MAKE_ALL_RULE@ = profile-opt; then \
clean: clean-retain-profile clean-bolt
@if test @DEF_MAKE_ALL_RULE@ = profile-opt -o @DEF_MAKE_ALL_RULE@ = bolt-opt; then \
rm -f profile-gen-stamp profile-clean-stamp; \
$(MAKE) profile-removal; \
fi
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
BOLT optimization is now applied to the libpython shared library if building
a shared library. BOLT instrumentation and application settings can now be
influenced via the ``BOLT_INSTRUMENT_FLAGS`` and ``BOLT_APPLY_FLAGS``
configure variables.
147 changes: 39 additions & 108 deletions configure

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 48 additions & 7 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2028,13 +2028,6 @@ if test "$Py_BOLT" = 'true' ; then
DEF_MAKE_ALL_RULE="bolt-opt"
DEF_MAKE_RULE="build_all"

AC_SUBST(READELF)
AC_CHECK_TOOLS(READELF, [readelf], "notfound")
if test "$READELF" == "notfound"
then
AC_MSG_ERROR([readelf is required for a --enable-bolt build but could not be found.])
fi

# -fno-reorder-blocks-and-partition is required for bolt to work.
# Possibly GCC only.
AX_CHECK_COMPILE_FLAG([-fno-reorder-blocks-and-partition],[
Expand Down Expand Up @@ -2067,6 +2060,54 @@ if test "$Py_BOLT" = 'true' ; then
fi
fi

dnl Enable BOLT of libpython if built.
AC_SUBST(BOLT_BINARIES)
BOLT_BINARIES='$(BUILDPYTHON)'
AS_VAR_IF([enable_shared], [yes], [
BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)"
])

AC_ARG_VAR(
[BOLT_INSTRUMENT_FLAGS],
[Arguments to llvm-bolt when instrumenting binaries]
)
AC_MSG_CHECKING([BOLT_INSTRUMENT_FLAGS])
if test -z "${BOLT_INSTRUMENT_FLAGS}"
then
BOLT_INSTRUMENT_FLAGS=
fi
AC_MSG_RESULT([$BOLT_INSTRUMENT_FLAGS])

AC_ARG_VAR(
[BOLT_APPLY_FLAGS],
[Arguments to llvm-bolt when creating a BOLT optimized binary]
)
AC_MSG_CHECKING([BOLT_APPLY_FLAGS])
if test -z "${BOLT_APPLY_FLAGS}"
then
AS_VAR_SET(
[BOLT_APPLY_FLAGS],
[m4_join([ ],
[-update-debug-sections],
[-reorder-blocks=ext-tsp],
[-reorder-functions=hfsort+],
[-split-functions],
[-icf=1],
[-inline-all],
[-split-eh],
[-reorder-functions-use-hot-size],
[-peepholes=none],
[-jump-tables=aggressive],
[-inline-ap],
[-indirect-call-promotion=all],
[-dyno-stats],
[-use-gnu-stack],
[-frame-opt=hot]
)]
)
fi
AC_MSG_RESULT([$BOLT_APPLY_FLAGS])

# XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be
# merged with this chunk of code?

Expand Down
0