8000 bpo-40495: compileall option to hardlink duplicate pyc files by frenzymadness · Pull Request #19901 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

bpo-40495: compileall option to hardlink duplicate pyc files #19901

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 14, 2020
Merged
Prev Previous commit
Next Next commit
use subTest to parametrize three tests with different combinations of…
… opt levels
  • Loading branch information
frenzymadness committed May 12, 2020
commit 4fb779a6a2cda448f22424c9421593ea92226fb0
96 changes: 27 additions & 69 deletions Lib/test/test_compileall.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import unittest
import io
import filecmp
import itertools

from unittest import mock, skipUnless
try:
Expand Down Expand Up @@ -374,88 +375,45 @@ def test_hardlink_deduplication_bad_args(self):
compileall.compile_dir(self.directory, quiet=True, optimize=0,
hardlink_dupes=True)

def test_hardlink_deduplication_same_bytecode_all_opt(self):
# 'a = 0' produces the same bytecode for all optimization levels
path = os.path.join(self.directory, "test", "same_all")
os.makedirs(path)

simple_script = script_helper.make_script(path, "test_same_bytecode",
"a = 0")
pyc_opt0 = importlib.util.cache_from_source(simple_script)
pyc_opt1 = importlib.util.cache_from_source(simple_script,
optimization=1)
pyc_opt2 = importlib.util.cache_from_source(simple_script,
optimization=2)

compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2],
hardlink_dupes=True)

# All three files should have the same inode (hardlinks)
self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1))
self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2))

for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}:
os.unlink(pyc_file)

compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2],
hardlink_dupes=False)

# Deduplication disabled, all pyc files should have different inodes
self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1))
self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2))

def test_hardlink_deduplication_same_bytecode_some_opt(self):
def test_hardlink_deduplication_same_bytecode(self):
# 'a = 0' produces the same bytecode for all optimization levels
# only two levels of optimization [0, 1] tested
path = os.path.join(self.directory, "test", "same_some")
path = os.path.join(self.directory, "test", "same")
os.makedirs(path)

simple_script = script_helper.make_script(path, "test_same_bytecode",
"a = 0")
pyc_opt0 = importlib.util.cache_from_source(simple_script)
pyc_opt2 = importlib.util.cache_from_source(simple_script,
optimization=2)

compileall.compile_dir(path, quiet=True, optimize=[0, 2],
hardlink_dupes=True)

# Both files should have the same inode (hardlink)
self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt2))

for pyc_file in {pyc_opt0, pyc_opt2}:
os.unlink(pyc_file)

compileall.compile_dir(path, quiet=True, force=True, optimize=[0, 2],
hardlink_dupes=False)

# Deduplication disabled, both pyc files should have different inodes
self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt2))
opt_combinations = ((0, 1, 2), (1, 2), (0, 2))

def test_hardlink_deduplication_same_bytecode_some_opt_2(self):
# 'a = 0' produces the same bytecode for all optimization levels
path = os.path.join(self.directory, "test", "same_some_2")
os.makedirs(path)
for opt_combination in opt_combinations:
with self.subTest(opt_combination=opt_combination):

simple_script = script_helper.make_script(path, "test_same_bytecode",
"a = 0")
pyc_opt1 = importlib.util.cache_from_source(simple_script,
optimization=1)
pyc_opt2 = importlib.util.cache_from_source(simple_script,
optimization=2)
pycs = {}
for opt_level in opt_combination:
pycs[opt_level] = importlib.util.cache_from_source(
simple_script, optimization=opt_level
)

compileall.compile_dir(path, quiet=True, optimize=[1, 2],
hardlink_dupes=True)
compileall.compile_dir(
path, quiet=True, optimize=opt_combination,
hardlink_dupes=True
)

# Both files should have the same inode (hardlinks)
self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2))
# All three files should have the same inode (hardlinks)
for pair in itertools.combinations(opt_combination, 2):
self.assertTrue(self.is_hardlink(pycs[pair[0]], pycs[pair[1]]))

for pyc_file in {pyc_opt1, pyc_opt2}:
os.unlink(pyc_file)
for pyc_file in pycs.values():
os.unlink(pyc_file)

compileall.compile_dir(path, quiet=True, optimize=[1, 2])
compileall.compile_dir(
path, quiet=True, optimize=opt_combination,
hardlink_dupes=False
)

# Deduplication disabled, all pyc files should have different inodes
self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2))
# Deduplication disabled, all pyc files should have different inodes
for pair in itertools.combinations(opt_combination, 2):
self.assertFalse(self.is_hardlink(pycs[pair[0]], pycs[pair[1]]))

def test_hardlink_deduplication_different_bytecode_all_opt(self):
# "'''string'''\nassert 1" produces a different bytecode for
Expand Down
0