8000 bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29… · python/cpython@bf2d44f · GitHub
[go: up one dir, main page]

Skip to content

Commit bf2d44f

Browse files
authored
bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29693)
Numeric fields of type float, notably mtime, can't be represented exactly in the ustar header, so the pax header is used. But it is helpful to set them to the nearest int (i.e. second rather than nanosecond precision mtimes) in the ustar header as well, for the benefit of unarchivers that don't understand the pax header. Add test for tarfile.TarInfo.create_pax_header to confirm correct behaviour.
1 parent c0a5ebe commit bf2d44f

File tree

3 files changed

+71
-6
lines changed

3 files changed

+71
-6
lines changed

Lib/tarfile.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -888,15 +888,24 @@ def create_pax_header(self, info, encoding):
888888
# Test number fields for values that exceed the field limit or values
889889
# that like to be stored as float.
890890
for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
891-
if name in pax_headers:
892-
# The pax header has priority. Avoid overflow.
893-
info[name] = 0
894-
continue
891+
needs_pax = False
895892

896893
val = info[name]
897-
if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
898-
pax_headers[name] = str(val)
894+
val_is_float = isinstance(val, float)
895+
val_int = round(val) if val_is_float else val
896+
if not 0 <= val_int < 8 ** (digits - 1):
897+
# Avoid overflow.
899898
info[name] = 0
899+
needs_pax = True
900+
elif val_is_float:
901+
# Put rounded value in ustar header, and full
902+
# precision value in pax header.
903+
info[name] = val_int
904+
needs_pax = True
905+
906+
# The existing pax header has priority.
907+
if needs_pax and name not in pax_headers:
908+
pax_headers[name] = str(val)
900909

901910
# Create a pax extended header if necessary.
902911
if pax_headers:

Lib/test/test_tarfile.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,6 +1911,61 @@ def test_pax_extended_header(self):
19111911
finally:
19121912
tar.close()
19131913

1914+
def test_create_pax_header(self):
1915+
# The ustar header should contain values that can be
1916+
# represented reasonably, even if a better (e.g. higher
1917+
# precision) version is set in the pax header.
1918+
# Issue #45863
1919+
1920+
# values that should be kept
1921+
t = tarfile.TarInfo()
1922+
t.name = "foo"
1923+
t.mtime = 1000.1
1924+
t.size = 100
1925+
t.uid = 123
1926+
t.gid = 124
1927+
info = t.get_info()
1928+
header = t.create_pax_header(info, encoding="iso8859-1")
1929+
self.assertEqual(info['name'], "foo")
1930+
# mtime should be rounded to nearest second
1931+
self.assertIsInstance(info['mtime'], int)
1932+
self.assertEqual(info['mtime'], 1000)
1933+
self.assertEqual(info['size'], 100)
1934+
self.assertEqual(info['uid'], 123)
1935+
self.assertEqual(info['gid'], 124)
1936+
self.assertEqual(header,
1937+
b'././@PaxHeader' + bytes(86) \
1938+
+ b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
1939+
+ bytes(100) + b'ustar\x0000'+ bytes(247) \
1940+
+ b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
1941+
+ b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
1942+
+ bytes(100) + b'ustar\x0000' + bytes(247))
1943+
1944+
# values that should be changed
1945+
t = tarfile.TarInfo()
1946+
t.name = "foo\u3374" # can't be represented in ascii
1947+
t.mtime = 10**10 # too big
1948+
t.size = 10**10 # too big
1949+
t.uid = 8**8 # too big
1950+
t.gid = 8**8+1 # too big
1951+
info = t.get_info()
1952+
header = t.create_pax_header(info, encoding="iso8859-1")
1953+
# name is kept as-is in info but should be added to pax header
1954+
self.assertEqual(info['name'], "foo\u3374")
1955+
self.assertEqual(info['mtime'], 0)
1956+
self.assertEqual(info['size'], 0)
1957+
self.assertEqual(info['uid'], 0)
1958+
self.assertEqual(info['gid'], 0)
1959+
self.assertEqual(header,
1960+
b'././@PaxHeader' + bytes(86) \
1961+
+ b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
1962+
+ bytes(100) + b'ustar\x0000' + bytes(247) \
1963+
+ b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
1964+
+ b'16 gid=16777217\n20 size=10000000000\n' \
1965+
+ b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
1966+
+ b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
1967+
+ bytes(100) + b'ustar\x0000' + bytes(247))
1968+
19141969

19151970
class UnicodeTest:
19161971

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
When the :mod:`tarfile` module creates a pax format archive, it will put an integer representation of timestamps in the ustar header (if possible) for the benefit of older unarchivers, in addition to the existing full-precision timestamps in the pax extended header.

0 commit comments

Comments
 (0)
0