8000 gh-51067: add ZipInfo.remove() (#51067) · python/cpython@314916e · GitHub
[go: up one dir, main page]

Skip to content

Commit 314916e

Browse files
committed
gh-51067: add ZipInfo.remove() (#51067)
1 parent 0708437 commit 314916e

File tree

4 files changed

+401
-0
lines changed

4 files changed

+401
-0
lines changed

Doc/library/zipfile.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,23 @@ ZipFile Objects
492492
.. versionadded:: 3.11
493493

494494

495+
.. method:: ZipFile.remove(zinfo_or_arcname)
496+
497+
Removes a member from the archive. *zinfo_or_arcname* is either the full
498+
path of the member, or a :class:`ZipInfo` instance.
499+
500+
The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``.
501+
502+
Calling :meth:`remove` on a closed ZipFile will raise a :exc:`ValueError`.
503+
504+
.. note::
505+
506+
Removing a member in an archive may involve a move of many internal data
507+
records, which can be I/O intensive for a large ZIP file.
508+
509+
.. versionadded:: 3.12
510+
511+
495512
The following data attributes are also available:
496513

497514
.. attribute:: ZipFile.filename

Lib/test/test_zipfile/test_core.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,234 @@ class LzmaWriterTests(AbstractWriterTests, unittest.TestCase):
11471147
compression = zipfile.ZIP_LZMA
11481148

11491149

1150+
class AbstractRemoveTests:
1151+
1152+
def _test_removing_indexes(self, test_files, indexes):
1153+
"""Test underlying _remove_members() for removing members at given
1154+
indexes."""
1155+
# calculate the expected results
1156+
expected_files = []
1157+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1158+
for i, (file, data) in enumerate(test_files):
1159+
if i not in indexes:
1160+
zh.writestr(file, data)
1161+
expected_files.append(file)
1162+
expected_size = os.path.getsize(TESTFN)
1163+
1164+
# prepare the test zip
1165+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1166+
for file, data in test_files:
1167+
zh.writestr(file, data)
1168+
1169+
# do the removal and check the result
1170+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1171+
members = {zh.infolist()[i] for i in indexes}
1172+
zh._remove_members(members)
1173+
1174+
# make sure internal caches have reflected the change
1175+
# and are consistent
1176+
self.assertEqual(zh.namelist(), expected_files)
1177+
for file, _ in test_files:
1178+
if file in zh.namelist():
1179+
self.assertEqual(zh.getinfo(file).filename, file)
1180+
else:
1181+
with self.assertRaises(KeyError):
1182+
zh.getinfo(file)
1183+
1184+
self.assertIsNone(zh.testzip())
1185+
self.assertEqual(os.path.getsize(TESTFN), expected_size)
1186+
1187+
def _test_removing_combinations(self, test_files, n=None):
1188+
"""Test underlying _remove_members() for removing random combinations
1189+
of members."""
1190+
ln = len(test_files)
1191+
if n is None:
1192+
# iterate n from 1 to all
1193+
for n in range(1, ln + 1):
1194+
for indexes in itertools.combinations(range(ln), n):
1195+
with self.subTest(remove=indexes):
1196+
self._test_removing_indexes(test_files, indexes)
1197+
else:
1198+
for indexes in itertools.combinations(range(ln), n):
1199+
with self.subTest(remove=indexes):
1200+
self._test_removing_indexes(test_files, indexes)
1201+
1202+
def test_basic(self):
1203+
# Test underlying _remove_members() for removing random combinations of members.
1204+
test_files = [
1205+
('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'),
1206+
('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'),
1207+
('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'),
1208+
]
1209+
1210+
self._test_removing_combinations(test_files)
1211+
1212+
def test_duplicated_arcname(self):
1213+
# Test underlying _remove_members() for removing any one of random duplicated members.
1214+
dupl_file = 'file.txt'
1215+
test_files = [
1216+
('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'),
1217+
('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'),
1218+
('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'),
1219+
]
1220+
1221+
ln = len(test_files)
1222+
for n in range(2, ln + 1):
1223+
for dups in itertools.combinations(range(ln), n):
1224+
files = []
1225+
for i, (file, data) in enumerate(test_files):
1226+
file_ = dupl_file if i in dups else file
1227+
files.append((file_, data))
1228+
1229+
for index in dups:
1230+
indexes = [index]
1231+
with self.subTest(dups=dups, indexes=indexes):
1232+
self._test_removing_indexes(files, indexes)
1233+
1234+
def test_non_physical(self):
1235+
# Test underlying _remove_members() for non-physical removing.
1236+
test_files = [
1237+
('file0.txt', b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'),
1238+
('file1.txt', b'Duis aute irure dolor in reprehenderit in voluptate velit esse'),
1239+
('file2.txt', b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'),
1240+
]
1241+
1242+
# prepare the test zip
1243+
expected_offset = {}
1244+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1245+
for file, data in test_files:
1246+
zh.writestr(file, data)
1247+
expected_offset[file] = zh.getinfo(file).header_offset
1248+
1249+
# do the removal and check the result
1250+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1251+
members = {zh.getinfo('file1.txt')}
1252+
zh._remove_members(members, remove_physical=False)
1253+
self.assertEqual(zh.namelist(), ['file0.txt', 'file2.txt'])
1254+
self.assertEqual(zh.getinfo(file).header_offset, expected_offset[file])
1255+
self.assertIsNone(zh.testzip())
1256+
1257+
def test_verify(self):
1258+
# Test if params are passed to underlying _remove_members() correctly,
1259+
# or never passed if conditions not met.
1260+
file0 = 'file0.txt'
1261+
data0 = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
1262+
file = 'datafile.txt'
1263+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
1264+
1265+
# closed: error and do nothing
1266+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1267+
zh.writestr(file, data)
1268+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1269+
zh.close()
1270+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1271+
with self.assertRaises(ValueError):
1272+
zh.remove(file)
1273+
mock_fn.assert_not_called()
1274+
1275+
# writing: error and do nothing
1276+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1277+
zh.writestr(file, data)
1278+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1279+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1280+
with zh.open(file0, 'w') as fh:
1281+
with self.assertRaises(ValueError):
1282+
zh.remove(file)
1283+
mock_fn.assert_not_called()
1284+
1285+
# mode 'r': error and do nothing
1286+
with zipfile.ZipFile(TESTFN, 'r') as zh:
1287+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1288+
with self.assertRaises(ValueError):
1289+
zh.remove(file)
1290+
mock_fn.assert_not_called()
1291+
1292+
# mode 'a': the most general use case
1293+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1294+
zh.writestr(file, data)
1295+
# -- remove with arcname
1296+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1297+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1298+
zh.remove(file)
1299+
mock_fn.assert_called_once_with({zh.getinfo(file)})
1300+
# -- remove with zinfo
1301+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1302+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1303+
zinfo = zh.getinfo(file)
1304+
zh.remove(zinfo)
1305+
mock_fn.assert_called_once_with({zinfo})
1306+
# -- remove with nonexist arcname
1307+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1308+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1309+
with self.assertRaises(KeyError):
1310+
zh.remove('nonexist.file')
1311+
mock_fn.assert_not_called()
1312+
# -- remove with nonexist zinfo (even if same name)
1313+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1314+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1315+
zinfo = zipfile.ZipInfo(file)
1316+
with self.assertRaises(KeyError):
1317+
zh.remove(zinfo)
1318+
mock_fn.assert_not_called()
1319+
1320+
# mode 'w': like 'a'; allows removing a just written member
1321+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1322+
zh.writestr(file, data)
1323+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1324+
zh.remove(file)
1325+
mock_fn.assert_called_once_with({zh.getinfo(file)})
1326+
1327+
# mode 'x': like 'w'
1328+
os.remove(TESTFN)
1329+
with zipfile.ZipFile(TESTFN, 'x') as zh:
1330+
zh.writestr(file, data)
1331+
with mock.patch('zipfile.ZipFile._remove_members') as mock_fn:
1332+
zh.remove(file)
1333+
mock_fn.assert_called_once_with({zh.getinfo(file)})
1334+
1335+
def test_zip64(self):
1336+
# Test if members use zip64.
1337+
file = 'datafile.txt'
1338+
file1 = 'pre.txt'
1339+
file2 = 'post.txt'
1340+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
1341+
data1 = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
1342+
data2 = b'Duis aute irure dolor in reprehenderit in voluptate velit esse'
1343+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1344+
with zh.open(file1, 'w', force_zip64=True) as fh:
1345+
fh.write(data1)
1346+
with zh.open(file2, 'w', force_zip64=True) as fh:
1347+
fh.write(data2)
1348+
expected_size = os.path.getsize(TESTFN)
1349+
1350+
with zipfile.ZipFile(TESTFN, 'w') as zh:
1351+
with zh.open(file1, 'w', force_zip64=True) as fh:
1352+
fh.write(data1)
1353+
with zh.open(file, 'w', force_zip64=True) as fh:
1354+
fh.write(data)
1355+
with zh.open(file2, 'w', force_zip64=True) as fh:
1356+
fh.write(data2)
1357+
with zipfile.ZipFile(TESTFN, 'a') as zh:
1358+
zh.remove(file)
1359+
self.assertIsNone(zh.testzip())
1360+
self.assertEqual(os.path.getsize(TESTFN), expected_size)
1361+
1362+
class StoredRemoveTests(AbstractRemoveTests, unittest.TestCase):
1363+
compression = zipfile.ZIP_STORED
1364+
1365+
@requires_zlib()
1366+
class DeflateRemoveTests(AbstractRemoveTests, unittest.TestCase):
1367+
compression = zipfile.ZIP_DEFLATED
1368+
1369+
@requires_bz2()
1370+
class Bzip2RemoveTests(AbstractRemoveTests, unittest.TestCase):
1371+
compression = zipfile.ZIP_BZIP2
1372+
1373+
@requires_lzma()
1374+
class LzmaRemoveTests(AbstractRemoveTests, unittest.TestCase):
1375+
compression = zipfile.ZIP_LZMA
1376+
1377+
11501378
class PyZipFileTests(unittest.TestCase):
11511379
def assertCompiledIn(self, name, namelist):
11521380
if name + 'o' not in namelist:

Lib/test/test_zipfile64.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,69 @@ def tearDown(self):
8787
os_helper.unlink(TESTFN2)
8888

8989

90+
class TestRemove(unittest.TestCase):
91+
def setUp(self):
92+
# Create test data.
93+
line_gen = ("Test of zipfile line %d." % i for i in range(1000000))
94+
self.data = '\n'.join(line_gen).encode('ascii')
95+
96+
def _write_large_file(self, fh):
97+
# It will contain enough copies of self.data to reach about 8 GiB.
98+
filecount = 8*1024**3 // len(self.data)
99+
100+
next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
101+
for num in range(filecount):
102+
fh.write(self.data)
103+
# Print still working message since this test can be really slow
104+
if next_time <= time.monotonic():
105+
next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
106+
print((
107+
' writing %d of %d, be patient...' %
108+
(num, filecount)), file=sys.__stdout__)
109+
sys.__stdout__.flush()
110+
111+
def test_remove_large_file(self):
112+
# Try the temp file. If we do TESTFN2, then it hogs
113+
# gigabytes of disk space for the duration of the test.
114+
with TemporaryFile() as f:
115+
self._test_remove_large_file(f)
116+
self.assertFalse(f.closed)
117+
118+
def _test_remove_large_file(self, f):
119+
file = 'datafile.txt'
120+
file1 = 'dummy.txt'
121+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
122+
with zipfile.ZipFile(f, 'w') as zh:
123+
with zh.open(file1, 'w', force_zip64=True) as fh:
124+
self._write_large_file(fh)
125+
zh.writestr(file, data)
126+
127+
with zipfile.ZipFile(f, 'a') as zh:
128+
zh.remove(file1)
129+
self.assertIsNone(zh.testzip())
130+
131+
def test_remove_before_large_file(self):
132+
# Try the temp file. If we do TESTFN2, then it hogs
133+
# gigabytes of disk space for the duration of the test.
134+
with TemporaryFile() as f:
135+
self._test_remove_before_large_file(f)
136+
self.assertFalse(f.closed)
137+
138+
def _test_remove_before_large_file(self, f):
139+
file = 'datafile.txt'
140+
file1 = 'dummy.txt'
141+
data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
142+
with zipfile.ZipFile(f, 'w') as zh:
143+
zh.writestr(file, data)
144+
with zh.open(file1, 'w', force_zip64=True) as fh:
145+
self._write_large_file(fh)
146+
expected_size = zh.getinfo(file1).file_size
147+
148+
with zipfile.ZipFile(f, 'a') as zh:
149+
zh.remove(file)
150+
self.assertIsNone(zh.testzip())
151+
152+
90153
class OtherTests(unittest.TestCase):
91154
def testMoreThan64kFiles(self):
92155
# This test checks that more than 64k files can be added to an archive,

0 commit comments

Comments
 (0)
0