8000 [release/10.0-preview4] Tar: Adjust the way we write GNU longlink and… · dotnet/runtime@a4175df · GitHub
[go: up one dir, main page]

Skip to content

Commit a4175df

Browse files
[release/10.0-preview4] Tar: Adjust the way we write GNU longlink and longpath metadata (#114942)
* Fix 3 GNU longlink and longpath bugs The Tar entry types Longlink and Longpath, which are exclusive to the GNU format, are metadata entries used to store longer-than-expected link or name strings (respectively). They precede their actual entry, and they are not visible to the user, only the data they hold is visible. Most of the regular header metadata in this metadata entries is not relevant, but we need to make sure it is well formed so other tools can read them properly. There are 3 fields that were not getting their values stored as expected by other tools: - The mtime, ctime and atime fields need to be set to the Unix Epoch so it shows up as zeros (0x30) by default. We were storing them as null (0x0) characters instead. For consistency, if a regular entry happens to get its mtime set to the Epoch, it should also be shown as zeros (0x30). - The uid, gid, uname and gname fields were not being set. These fields aren't really important in a metadata entry, but other tools set this to the default value (uid=gid=0 and uname=gname=root). Based on the fact that 'a value' is preferred for these fields, we can set them to the same value that the real entry has (not root). - The long text value (either link or name) should be null terminated, and this extra character should be counted in the size of the longpath. The library libdpkg, used by the dpkg-deb tool in Debian, expects a null terminator in longlink and longpath data starting in Debian 12. Without the terminator, these entries are considered malformed. --------- Co-authored-by: Carlos Sánchez López <1175054+carlossanlop@users.noreply.github.com>
1 parent 31289ef commit a4175df

File tree

3 files changed

+891
-681
lines changed

3 files changed

+891
-681
lines changed

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -354,26 +354,33 @@ internal async Task WriteAsPaxAsync(Stream archiveStream, Memory<byte> buffer, C
354354
await WriteWithSeekableDataStreamAsync(TarEntryFormat.Pax, archiveStream, buffer, cancellationToken).ConfigureAwait(false);
355355
}
356356
}
357+
// Checks if the linkname string is too long to fit in the regular header field.
358+
// .NET strings do not include a null terminator by default, need to add it manually and also consider it for the length.
359+
private bool IsLinkNameTooLongForRegularField() => _linkName != null && (Encoding.UTF8.GetByteCount(_linkName) + 1) > FieldLengths.LinkName;
360+
361+
// Checks if the name string is too long to fit in the regular header field.
362+
// .NET strings do not include a null terminator by default, need to add it manually and also consider it for the length.
363+
private bool IsNameTooLongForRegularField() => (Encoding.UTF8.GetByteCount(_name) + 1) > FieldLengths.Name;
357364

358365
// Writes the current header as a Gnu entry into the archive stream.
359366
// Makes sure to add the preceding LongLink and/or LongPath entries if necessary, before the actual entry.
360367
internal void WriteAsGnu(Stream archiveStream, Span<byte> buffer)
361368
{
362369
Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek);
363370

364-
// First, we determine if we need a preceding LongLink, and write it if needed
365-
if (_linkName != null && Encoding.UTF8.GetByteCount(_linkName) > FieldLengths.LinkName)
371+
if (IsLinkNameTooLongForRegularField())
366372
{
367-
TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName);
373+
// Linkname is too long for the regular header field, create a longlink entry where the linkname will be stored.
374+
TarHeader longLinkHeader = GetGnuLongLinkMetadataHeader();
368375
Debug.Assert(longLinkHeader._dataStream != null && longLinkHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
369376
longLinkHeader.WriteWithSeekableDataStream(TarEntryFormat.Gnu, archiveStream, buffer);
370377
buffer.Clear(); // Reset it to reuse it
371378
}
372379

373-
// Second, we determine if we need a preceding LongPath, and write it if needed
374-
if (Encoding.UTF8.GetByteCount(_name) > FieldLengths.Name)
380+
if (IsNameTooLongForRegularField())
375381
{
376-
TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name);
382+
// Name is too long for the regular header field, create a longpath entry where the name will be stored.
383+
TarHeader longPathHeader = GetGnuLongPathMetadataHeader();
377384
Debug.Assert(longPathHeader._dataStream != null && longPathHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
378385
longPathHeader.WriteWithSeekableDataStream(TarEntryFormat.Gnu, archiveStream, buffer);
379386
buffer.Clear(); // Reset it to reuse it
@@ -397,19 +404,19 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory<byte> buffer, C
397404
Debug.Assert(archiveStream.CanSeek || _dataStream == null || _dataStream.CanSeek);
398405
cancellationToken.ThrowIfCancellationRequested();
399406

400-
// First, we determine if we need a preceding LongLink, and write it if needed
401-
if (_linkName != null && Encoding.UTF8.GetByteCount(_linkName) > FieldLengths.LinkName)
407+
if (IsLinkNameTooLongForRegularField())
402408
{
403-
TarHeader longLinkHeader = GetGnuLongMetadataHeader(TarEntryType.LongLink, _linkName);
409+
// Linkname is too long for the regular header field, create a longlink entry where the linkname will be stored.
410+
TarHeader longLinkHeader = GetGnuLongLinkMetadataHeader();
404411
Debug.Assert(longLinkHeader._dataStream != null && longLinkHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
405412
await longLinkHeader.WriteWithSeekableDataStreamAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false);
406413
buffer.Span.Clear(); // Reset it to reuse it
407414
}
408415

409-
// Second, we determine if we need a preceding LongPath, and write it if needed
410-
if (Encoding.UTF8.GetByteCount(_name) > FieldLengths.Name)
416+
if (IsNameTooLongForRegularField())
411417
{
412-
TarHeader longPathHeader = GetGnuLongMetadataHeader(TarEntryType.LongPath, _name);
418+
// Name is too long for the regular header field, create a longpath entry where the name will be stored.
419+
TarHeader longPathHeader = GetGnuLongPathMetadataHeader();
413420
Debug.Assert(longPathHeader._dataStream != null && longPathHeader._dataStream.CanSeek); // We generate the long metadata data stream, should always be seekable
414421
await longPathHeader.WriteWithSeekableDataStreamAsync(TarEntryFormat.Gnu, archiveStream, buffer, cancellationToken).ConfigureAwait(false);
415422
buffer.Span.Clear(); // Reset it to reuse it
@@ -426,20 +433,46 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory<byte> buffer, C
426433
}
427434
}
428435

436+
private static MemoryStream GetLongMetadataStream(string text)
437+
{
438+
MemoryStream data = new MemoryStream();
439+
data.Write(Encoding.UTF8.GetBytes(text));
440+
data.WriteByte(0); // Add a null terminator at the end of the string, _size will be calculated later
441+
data.Position = 0;
442+
return data;
443+
}
444+
445+
private TarHeader GetGnuLongLinkMetadataHeader()
446+
{
447+
Debug.Assert(_linkName != null);
448+
MemoryStream dataStream = GetLongMetadataStream(_linkName);
449+
return GetGnuLongMetadataHeader(dataStream, TarEntryType.LongLink, _uid, _gid, _uName, _gName);
450+
}
451+
452+
private TarHeader GetGnuLongPathMetadataHeader()
453+
{
454+
MemoryStream dataStream = GetLongMetadataStream(_name);
455+
return GetGnuLongMetadataHeader(dataStream, TarEntryType.LongPath, _uid, _gid, _uName, _gName);
456+
}
457+
429458
// Creates and returns a GNU long metadata header, with the specified long text written into its data stream (seekable).
430-
private static TarHeader GetGnuLongMetadataHeader(TarEntryType entryType, string longText)
459+
private static TarHeader GetGnuLongMetadataHeader(MemoryStream dataStream, TarEntryType entryType, int mainEntryUid, int mainEntryGid, string? mainEntryUname, string? mainEntryGname)
431460
{
432461
Debug.Assert(entryType is TarEntryType.LongPath or TarEntryType.LongLink);
433462

434463
return new(TarEntryFormat.Gnu)
435464
{
436465
_name = GnuLongMetadataName, // Same name for both longpath or longlink
437466
_mode = TarHelpers.GetDefaultMode(entryType),
438-
_uid = 0,
439-
_gid = 0,
440-
_mTime = DateTimeOffset.MinValue, // 0
467+
_uid = mainEntryUid,
468+
_gid = mainEntryGid,
469+
_mTime = DateTimeOffset.UnixEpoch, // 0
441470
_typeFlag = entryType,
442-
_dataStream = new MemoryStream(Encoding.UTF8.GetBytes(longText))
471+
_dataStream = dataStream,
472+
_uName = mainEntryUname,
473+
_gName = mainEntryGname,
474+
_aTime = DateTimeOffset.UnixEpoch, // 0
475+
_cTime = DateTimeOffset.UnixEpoch, // 0
443476
};
444477
}
445478

@@ -614,17 +647,17 @@ private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)
614647

615648
int checksum = 0;
616649

617-
if (_mode > 0)
650+
if (_mode >= 0)
618651
{
619652
checksum += FormatNumeric(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
620653
}
621654

622-
if (_uid > 0)
655+
if (_uid >= 0)
623656
{
624657
checksum += FormatNumeric(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
625658
}
626659

627-
if (_gid > 0)
660+
if (_gid >= 0)
628661
{
629662
checksum += FormatNumeric(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
630663
}
@@ -750,8 +783,8 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)
750783
// Saves the gnu-specific fields into the specified spans.
751784
private int WriteGnuFields(Span<byte> buffer)
752785
{
753-
int checksum = WriteAsGnuTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
754-
checksum += WriteAsGnuTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
786+
int checksum = WriteAsTimestamp(_aTime, buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
787+
checksum += WriteAsTimestamp(_cTime, buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
755788

756789
if (_gnuUnusedBytes != null)
757790
{
@@ -1060,7 +1093,6 @@ private static int Checksum(ReadOnlySpan<byte> bytes)
10601093
}
10611094
return checksum;
10621095
}
1063-
10641096
private int FormatNumeric(int value, Span<byte> destination)
10651097
{
10661098
Debug.Assert(destination.Length == 8, "8 byte field expected.");
@@ -1118,6 +1150,7 @@ private static int FormatOctal(long value, Span<byte> destination)
11181150
Span<byte> digits = stackalloc byte[32]; // longer than any possible octal formatting of a ulong
11191151

11201152
int i = digits.Length - 1;
1153+
11211154
while (true)
11221155
{
11231156
digits[i] = (byte)('0' + (remaining % 8));
@@ -1136,24 +1169,6 @@ private int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
11361169
return FormatNumeric(unixTimeSeconds, destination);
11371170
}
11381171

1139-
// Writes the specified DateTimeOffset's Unix time seconds, and returns its checksum.
1140-
// If the timestamp is UnixEpoch, it writes 0s into the destination span.
1141-
private int WriteAsGnuTimestamp(DateTimeOffset timestamp, Span<byte> destination)
1142-
{
1143-
if (timestamp == DateTimeOffset.UnixEpoch)
1144-
{
1145-
#if DEBUG
1146-
for (int i = 0; i < destination.Length; i++)
1147-
{
1148-
Debug.Assert(destination[i] == 0, "Destination span should be zeroed.");
1149-
}
1150-
#endif
1151-
return 0;
1152-
}
1153-
1154-
return WriteAsTimestamp(timestamp, destination);
1155-
}
1156-
11571172
// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
11581173
private static int WriteAsUtf8String(ReadOnlySpan<char> text, Span<byte> buffer)
11591174
{

0 commit comments

Comments
 (0)
0