8000 Convert.FromHexString for raw UTF-8 bytes · Issue #117216 · dotnet/runtime · GitHub
[go: up one dir, main page]

Skip to content
Convert.FromHexString for raw UTF-8 bytes #117216
@sator-imaging

Description

@sator-imaging

I realize that Convert.FromHexString doesn't have overload for accepting UTF-8 bytes span directly.
So I wrote simple helper to address it.

using System;
using System.Text;
using System.Linq;

public class Program
{
    // don't validate input!!
    public static void UnsafeConvertHexAsciiStringToBytes(ReadOnlySpan<byte> utf8, Span<byte> result, out int bytesWritten)
    {
        int resultIndex = 0;

        unchecked  // believe!!
        {
            for (int i = 0; i < utf8.Length; i += 2)
            {
                var upper = utf8[i];
                var lower = utf8[i + 1];

                const byte ALPHABET_OFFSET = (byte)'a' - 10;

                if (upper <= (byte)'9')
                    upper -= (byte)'0';
                else
                    upper = (byte)((upper | 0x20) - ALPHABET_OFFSET);

                if (lower <= (byte)'9')
                    lower -= (byte)'0';
                else
                    lower = (byte)((lower | 0x20) - ALPHABET_OFFSET);

                result[resultIndex] = (byte)((upper << 4) | lower);
                resultIndex++;
            }
        }

        bytesWritten = resultIndex;
    }

    public static void Main()
    {
        const string HASH = "9F86D081884C7d659a2feAa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
        var expected = Convert.FromHexString(HASH);
        
        var test = (stackalloc byte[HASH.Length / 2]);
        UnsafeConvertHexAsciiStringToBytes(Encoding.UTF8.GetBytes(HASH), test, out var len);

        Console.WriteLine(test.SequenceEqual(expected));
        Console.WriteLine(string.Join(" ", expected.Select(x => x.ToString("x2")).ToArray()));
        Console.WriteLine(string.Join(" ", test.ToArray().Select(x => x.ToString("x2")).ToArray()));
    }
}

Is your feature request related to a problem? Please describe.

Not a problem. For convenience and usability.

Describe the solution you'd like

See above.

Additional context

I am trying to validate hash for content. Expected hash data is coming as UTF-8 bytes from cloud source.

SIMD version

For my exercise, I've written SIMD version.
I'm not sure it's worth to do on small dataset like 256~512 bit hashes.

Anyway, I hope dotnet support utf8 and utf8-lovers happy!!

using System;
using System.Runtime.Intrinsics;

public class Program
{
    public static void Main()
    {
        SIMD(new byte[] { (byte)'0', (byte)'1', (byte)'8', (byte)'9', (byte)'a', (byte)'B', (byte)'E', (byte)'f' });
    }

    public static void SIMD(ReadOnlySpan<byte> bytes)
    {
        unchecked
        {
            var makeNumbersNegative = Vector64.Create((sbyte)('9' + 1));
            var makeLettersLowercase = Vector64.Create((sbyte)0x20);
            var numberOffset = Vector64.Create((sbyte)'0');
            var letterOffset = Vector64.Create((sbyte)('a' - 10));
            var zero = Vector64.Create((sbyte)0);
            // convert sbytes x8 to ushort x4
            const sbyte ZEROFILL = (sbyte)0x80;
            var takeUpper = Vector64.Create((sbyte)0, ZEROFILL, 2, ZEROFILL, 4, ZEROFILL, 6, ZEROFILL);
            var takeLower = Vector64.Create((sbyte)1, ZEROFILL, 3, ZEROFILL, 5, ZEROFILL, 7, ZEROFILL);

            for (int i = 0; i < bytes.Length; i += 8)
            {
                var vec = Vector64.Create(
                    (sbyte)(bytes[i/**/]),
                    (sbyte)(bytes[i + 1]),
                    (sbyte)(bytes[i + 2]),
                    (sbyte)(bytes[i + 3]),
                    (sbyte)(bytes[i + 4]),
                    (sbyte)(bytes[i + 5]),
                    (sbyte)(bytes[i + 6]),
                    (sbyte)(bytes[i + 7]));

                vec = Vector64.BitwiseOr(vec, makeLettersLowercase);

                var cond = Vector64.GreaterThan<sbyte>(zero, Vector64.Subtract(vec, makeNumbersNegative));
                var offset = Vector64.ConditionalSelect(cond, numberOffset, letterOffset);
                var values = Vector64.Subtract<sbyte>(vec, offset);
                Console.WriteLine(values);

                // ushort x4
                var upper = Vector64.ShiftLeft(Vector64.Shuffle(values, takeUpper).AsUInt16(), 4);
                Console.WriteLine(upper);

                var result = Vector64.Add(upper, Vector64.Shuffle(values, takeLower).AsUInt16());

                Console.WriteLine();
                Console.WriteLine(result);
            }
        }
    }
}

Metadata

Metadata

Assignees

Labels

needs-area-labelAn area label is needed to ensure this gets routed to the appropriate area owners

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions

    0