-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Closed as duplicate of#114079
Labels
needs-area-labelAn area label is needed to ensure this gets routed to the appropriate area ownersAn area label is needed to ensure this gets routed to the appropriate area owners
Description
I realize that Convert.FromHexString
doesn't have overload for accepting UTF-8 bytes span directly.
So I wrote simple helper to address it.
using System;
using System.Text;
using System.Linq;
public class Program
{
// don't validate input!!
public static void UnsafeConvertHexAsciiStringToBytes(ReadOnlySpan<byte> utf8, Span<byte> result, out int bytesWritten)
{
int resultIndex = 0;
unchecked // believe!!
{
for (int i = 0; i < utf8.Length; i += 2)
{
var upper = utf8[i];
var lower = utf8[i + 1];
const byte ALPHABET_OFFSET = (byte)'a' - 10;
if (upper <= (byte)'9')
upper -= (byte)'0';
else
upper = (byte)((upper | 0x20) - ALPHABET_OFFSET);
if (lower <= (byte)'9')
lower -= (byte)'0';
else
lower = (byte)((lower | 0x20) - ALPHABET_OFFSET);
result[resultIndex] = (byte)((upper << 4) | lower);
resultIndex++;
}
}
bytesWritten = resultIndex;
}
public static void Main()
{
const string HASH = "9F86D081884C7d659a2feAa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08";
var expected = Convert.FromHexString(HASH);
var test = (stackalloc byte[HASH.Length / 2]);
UnsafeConvertHexAsciiStringToBytes(Encoding.UTF8.GetBytes(HASH), test, out var len);
Console.WriteLine(test.SequenceEqual(expected));
Console.WriteLine(string.Join(" ", expected.Select(x => x.ToString("x2")).ToArray()));
Console.WriteLine(string.Join(" ", test.ToArray().Select(x => x.ToString("x2")).ToArray()));
}
}
Is your feature request related to a problem? Please describe.
Not a problem. For convenience and usability.
Describe the solution you'd like
See above.
Additional context
I am trying to validate hash for content. Expected hash data is coming as UTF-8 bytes from cloud source.
SIMD version
For my exercise, I've written SIMD version.
I'm not sure it's worth to do on small dataset like 256~512 bit hashes.
Anyway, I hope dotnet support utf8 and utf8-lovers happy!!
using System;
using System.Runtime.Intrinsics;
public class Program
{
public static void Main()
{
SIMD(new byte[] { (byte)'0', (byte)'1', (byte)'8', (byte)'9', (byte)'a', (byte)'B', (byte)'E', (byte)'f' });
}
public static void SIMD(ReadOnlySpan<byte> bytes)
{
unchecked
{
var makeNumbersNegative = Vector64.Create((sbyte)('9' + 1));
var makeLettersLowercase = Vector64.Create((sbyte)0x20);
var numberOffset = Vector64.Create((sbyte)'0');
var letterOffset = Vector64.Create((sbyte)('a' - 10));
var zero = Vector64.Create((sbyte)0);
// convert sbytes x8 to ushort x4
const sbyte ZEROFILL = (sbyte)0x80;
var takeUpper = Vector64.Create((sbyte)0, ZEROFILL, 2, ZEROFILL, 4, ZEROFILL, 6, ZEROFILL);
var takeLower = Vector64.Create((sbyte)1, ZEROFILL, 3, ZEROFILL, 5, ZEROFILL, 7, ZEROFILL);
for (int i = 0; i < bytes.Length; i += 8)
{
var vec = Vector64.Create(
(sbyte)(bytes[i/**/]),
(sbyte)(bytes[i + 1]),
(sbyte)(bytes[i + 2]),
(sbyte)(bytes[i + 3]),
(sbyte)(bytes[i + 4]),
(sbyte)(bytes[i + 5]),
(sbyte)(bytes[i + 6]),
(sbyte)(bytes[i + 7]));
vec = Vector64.BitwiseOr(vec, makeLettersLowercase);
var cond = Vector64.GreaterThan<sbyte>(zero, Vector64.Subtract(vec, makeNumbersNegative));
var offset = Vector64.ConditionalSelect(cond, numberOffset, letterOffset);
var values = Vector64.Subtract<sbyte>(vec, offset);
Console.WriteLine(values);
// ushort x4
var upper = Vector64.ShiftLeft(Vector64.Shuffle(values, takeUpper).AsUInt16(), 4);
Console.WriteLine(upper);
var result = Vector64.Add(upper, Vector64.Shuffle(values, takeLower).AsUInt16());
Console.WriteLine();
Console.WriteLine(result);
}
}
}
}
Metadata
Metadata
Assignees
Labels
needs-area-labelAn area label is needed to ensure this gets routed to the appropriate area ownersAn area label is needed to ensure this gets routed to the appropriate area owners