diff --git a/src/System.Management.Automation/engine/Utils.cs b/src/System.Management.Automation/engine/Utils.cs index 5ef3339eac1..56b12ecf072 100644 --- a/src/System.Management.Automation/engine/Utils.cs +++ b/src/System.Management.Automation/engine/Utils.cs @@ -5,17 +5,18 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Collections.ObjectModel; +using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Linq; -using System.ComponentModel; using System.Management.Automation.Configuration; using System.Management.Automation.Internal; using System.Management.Automation.Language; using System.Management.Automation.Runspaces; using System.Management.Automation.Security; +using System.Numerics; using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -38,6 +39,229 @@ namespace System.Management.Automation /// internal static class Utils { + /// + /// Converts a given double value to BigInteger via Math.Round(). + /// + /// The value to convert. + /// Returns a BigInteger value equivalent to the input value rounded to nearest integer. + internal static BigInteger AsBigInt(this double d) => new BigInteger(Math.Round(d)); + + internal static bool TryCast(BigInteger value, out byte b) + { + if (value < byte.MinValue || byte.MaxValue < value) + { + b = 0; + return false; + } + + b = (byte)value; + return true; + } + + internal static bool TryCast(BigInteger value, out sbyte sb) + { + if (value < sbyte.MinValue || sbyte.MaxValue < value) + { + sb = 0; + return false; + } + + sb = (sbyte)value; + return true; + } + + internal static bool TryCast(BigInteger value, out short s) + { + if (value < short.MinValue || short.MaxValue < value) + { + s = 0; + return false; + } + + s = (short)value; + return true; + } + + internal static bool TryCast(BigInteger value, out ushort us) + { + if (value < ushort.MinValue || ushort.MaxValue < value) + { + us = 0; + return false; + } + + us = (ushort)value; + return true; + } + + internal static bool TryCast(BigInteger value, out int i) + { + if (value < int.MinValue || int.MaxValue < value) + { + i = 0; + return false; + } + + i = (int)value; + return true; + } + + internal static bool TryCast(BigInteger value, out uint u) + { + if (value < uint.MinValue || uint.MaxValue < value) + { + u = 0; + return false; + } + + u = (uint)value; + return true; + } + + internal static bool TryCast(BigInteger value, out long l) + { + if (value < long.MinValue || long.MaxValue < value) + { + l = 0; + return false; + } + + l = (long)value; + return true; + } + + internal static bool TryCast(BigInteger value, out ulong ul) + { + if (value < ulong.MinValue || ulong.MaxValue < value) + { + ul = 0; + return false; + } + + ul = (ulong)value; + return true; + } + + internal static bool TryCast(BigInteger value, out decimal dm) + { + if (value < (BigInteger)decimal.MinValue || (BigInteger)decimal.MaxValue < value) + { + dm = 0; + return false; + } + + dm = (decimal)value; + return true; + } + + internal static bool TryCast(BigInteger value, out double db) + { + if (value < (BigInteger)double.MinValue || (BigInteger)double.MaxValue < value) + { + db = 0; + return false; + } + + db = (double)value; + return true; + } + + /// + /// Parses a given string or ReadOnlySpan<char> to calculate its value as a binary number. + /// Assumes input has already been sanitized and only contains zeroes (0) or ones (1). + /// + /// Span or string of binary digits. Assumes all digits are either 1 or 0. + /// + /// Whether to treat the number as unsigned. When false, respects established conventions + /// with sign bits for certain input string lengths. + /// + /// Returns the value of the binary string as a BigInteger. + internal static BigInteger ParseBinary(ReadOnlySpan digits, bool unsigned) + { + if (!unsigned) + { + if (digits[0] == '0') + { + unsigned = true; + } + else + { + switch (digits.Length) + { + // Only accept sign bits at these lengths: + case 8: // byte + case 16: // short + case 32: // int + case 64: // long + case 96: // decimal + case int n when n >= 128: // BigInteger + break; + default: + // If we do not flag these as unsigned, bigint assumes a sign bit for any (8 * n) string length + unsigned = true; + break; + } + } + } + + // Only use heap allocation for very large numbers + const int MaxStackAllocation = 512; + + // Calculate number of 8-bit bytes needed to hold the input, rounded up to next whole number. + int outputByteCount = (digits.Length + 7) / 8; + Span outputBytes = outputByteCount <= MaxStackAllocation ? stackalloc byte[outputByteCount] : new byte[outputByteCount]; + int outputByteIndex = outputBytes.Length - 1; + + // We need to be prepared for any partial leading bytes, (e.g., 010|00000011|00101100), or cases + // where we only have less than 8 bits to work with from the beginning. + // + // Walk bytes right to left, stepping one whole byte at a time (if there are any whole bytes). + int byteWalker; + for (byteWalker = digits.Length - 1; byteWalker >= 7; byteWalker -= 8) + { + // Use bit shifts and binary-or to sum the values in each byte. These calculations will + // create values higher than a single byte, but the higher bits will be stripped out when cast + // to byte. + // + // The low bits are added in separately to allow us to strip the higher 'noise' bits before we + // sum the values using binary-or. + // + // Simplified representation of logic: (byte)( (7)|(6)|(5)|(4) ) | ( ( (3)|(2)|(1)|(0) ) & 0b1111 ) + // + // N.B.: This code has been tested against a straight for loop iterating through the byte, and in no + // circumstance was it faster or more effective than this unrolled version. + outputBytes[outputByteIndex--] = + (byte)( + ( (digits[byteWalker - 7] << 7) + | (digits[byteWalker - 6] << 6) + | (digits[byteWalker - 5] << 5) + | (digits[byteWalker - 4] << 4) + ) + | ( + ( (digits[byteWalker - 3] << 3) + | (digits[byteWalker - 2] << 2) + | (digits[byteWalker - 1] << 1) + | (digits[byteWalker]) + ) & 0b1111 + ) + ); + } + + // With complete bytes parsed, byteWalker is either at the partial byte start index, or at -1 + if (byteWalker >= 0) + { + int currentByteValue = 0; + for (int i = 0; i <= byteWalker; i++) + { + currentByteValue = (currentByteValue << 1) | (digits[i] - '0'); + } + + outputBytes[outputByteIndex] = (byte)currentByteValue; + } + + return new BigInteger(outputBytes, isUnsigned: unsigned, isBigEndian: true); + } + // From System.Web.Util.HashCodeCombiner internal static int CombineHashCodes(int h1, int h2) { diff --git a/src/System.Management.Automation/engine/parser/CharTraits.cs b/src/System.Management.Automation/engine/parser/CharTraits.cs index aecb00b7bbd..76a7ed542d9 100644 --- a/src/System.Management.Automation/engine/parser/CharTraits.cs +++ b/src/System.Management.Automation/engine/parser/CharTraits.cs @@ -27,39 +27,64 @@ internal static class SpecialChars [Flags] internal enum CharTraits { + /// + /// No specific character traits. + /// None = 0x0000, - // For identifiers, is the character a letter? + /// + /// For identifiers, the first character must be a letter or underscore. + /// IdentifierStart = 0x0002, - // The character is a valid first character of a multiplier + /// + /// The character is a valid first character of a multiplier. + /// MultiplierStart = 0x0004, - // The character is a valid type suffix for numeric literals + /// + /// The character is a valid type suffix for numeric literals. + /// TypeSuffix = 0x0008, - // The character is a whitespace character + /// + /// The character is a whitespace character. + /// Whitespace = 0x0010, - // The character terminates a line. + /// + /// The character terminates a line. + /// Newline = 0x0020, - // The character is a hexadecimal digit. + /// + /// The character is a hexadecimal digit. + /// HexDigit = 0x0040, - // The character is a decimal digit. + /// + /// The character is a decimal digit. + /// Digit = 0x0080, - // The character is allowed as the first character in an unbraced variable name. + /// + /// The character is allowed as the first character in an unbraced variable name. + /// VarNameFirst = 0x0100, - // The character is not part of the token being scanned. + /// + /// The character is not part of the token being scanned. + /// ForceStartNewToken = 0x0200, - // The character is not part of the token being scanned, when the token is known to be part of an assembly name. + /// + /// The character is not part of the token being scanned, when the token is known to be part of an assembly name. + /// ForceStartNewAssemblyNameSpecToken = 0x0400, - // The character is the first character of some operator (and hence is not part of a token that starts a number) + /// + /// The character is the first character of some operator (and hence is not part of a token that starts a number). + /// ForceStartNewTokenAfterNumber = 0x0800, } @@ -150,7 +175,7 @@ static CharExtensions() /* K */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, /* L */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* M */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, -/* N */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, +/* N */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* O */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* P */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, /* Q */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, @@ -182,7 +207,7 @@ static CharExtensions() /* k */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, /* l */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* m */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, -/* n */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, +/* n */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* o */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* p */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, /* q */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, @@ -298,18 +323,17 @@ internal static bool IsHexDigit(this char c) return false; } - // Return true if the character is a decimal digit. - internal static bool IsDecimalDigit(this char c) - { - if (c < 128) - { - return (s_traits[c] & CharTraits.Digit) != 0; - } + // Returns true if the character is a decimal digit. + internal static bool IsDecimalDigit(this char c) => (uint)(c - '0') <= 9; - return false; - } + // These decimal/binary checking methods are more performant than the alternatives due to requiring + // less overall operations than a more readable check such as {(this char c) => c == 0 | c == 1}, + // especially in the case of IsDecimalDigit(). + + // Returns true if the character is a binary digit. + internal static bool IsBinaryDigit(this char c) => (uint)(c - '0') <= 1; - // Return true if the character is a type suffix character. + // Returns true if the character is a type suffix character. internal static bool IsTypeSuffix(this char c) { if (c < 128) diff --git a/src/System.Management.Automation/engine/parser/tokenizer.cs b/src/System.Management.Automation/engine/parser/tokenizer.cs index aab4571a9c7..19f340367c8 100644 --- a/src/System.Management.Automation/engine/parser/tokenizer.cs +++ b/src/System.Management.Automation/engine/parser/tokenizer.cs @@ -540,7 +540,33 @@ internal enum NumberSuffixFlags /// /// Indicates 'd' suffix for decimal (128-bit) real numbers. /// - Decimal = 0x10 + Decimal = 0x10, + + /// + /// Indicates 'I' suffix for BigInteger (arbitrarily large integer) numerals. + /// + BigInteger = 0x20 + } + + /// + /// Indicates the format of a numeric literal. + /// + internal enum NumberFormat + { + /// + /// Indicates standard decimal literal, no necessary prefix. + /// + Decimal = 0x0, + + /// + /// Indicates hexadecimal literal, with '0x' prefix. + /// + Hex = 0x1, + + /// + /// Indicates binary literal, with '0b' prefix. + /// + Binary = 0x2 } // @@ -3352,6 +3378,17 @@ private int ScanDecimalDigits(StringBuilder sb) return countDigits; } + private void ScanBinaryDigits(StringBuilder sb) + { + char c = PeekChar(); + while (c.IsBinaryDigit()) + { + SkipChar(); + sb.Append(c); + c = PeekChar(); + } + } + private void ScanExponent(StringBuilder sb, ref int signIndex, ref bool notNumber) { char c = PeekChar(); @@ -3387,7 +3424,13 @@ private void ScanNumberAfterDot(StringBuilder sb, ref int signIndex, ref bool no } } - private static bool TryGetNumberValue(string strNum, bool hex, bool real, NumberSuffixFlags suffix, long multiplier, out object result) + private static bool TryGetNumberValue( + ReadOnlySpan strNum, + NumberFormat format, + NumberSuffixFlags suffix, + bool real, + long multiplier, + out object result) { checked { @@ -3396,22 +3439,23 @@ private static bool TryGetNumberValue(string strNum, bool hex, bool real, Number NumberStyles style = NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent; - // Decimal parser does not accept hex literals, and 'd' is a valid hex character, so will never be read as Decimal literal - // e.g., 0x1d == 29 - if (suffix == NumberSuffixFlags.Decimal) + if (real) { - if (decimal.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out decimal d)) + // Decimal parser does not accept hex literals, and 'd' is a valid hex character, so will + // never be read as Decimal literal + // e.g., 0x1d == 29 + if (suffix == NumberSuffixFlags.Decimal) { - result = d * multiplier; - return true; - } + if (decimal.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out decimal d)) + { + result = d * multiplier; + return true; + } - result = null; - return false; - } + result = null; + return false; + } - if (real) - { if (double.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out double doubleValue)) { // TryParse incorrectly return +0 when the result should be -0, so check for that case @@ -3420,204 +3464,272 @@ private static bool TryGetNumberValue(string strNum, bool hex, bool real, Number doubleValue = -0.0; } + doubleValue *= multiplier; switch (suffix) { case NumberSuffixFlags.None: - result = doubleValue * multiplier; + result = doubleValue; + return true; + case NumberSuffixFlags.SignedByte: + if (Utils.TryCast(doubleValue.AsBigInt(), out sbyte sb)) + { + result = sb; + return true; + } + break; - case NumberSuffixFlags.Long: - result = (long)Convert.ChangeType(doubleValue, typeof(long), CultureInfo.InvariantCulture) * multiplier; + case NumberSuffixFlags.UnsignedByte: + if (Utils.TryCast(doubleValue.AsBigInt(), out byte b)) + { + result = b; + return true; + } + break; case NumberSuffixFlags.Short: - result = (short)((short)Convert.ChangeType(doubleValue, typeof(short), CultureInfo.InvariantCulture) * multiplier); - break; - case NumberSuffixFlags.SignedByte: - result = (sbyte)((sbyte)Convert.ChangeType(doubleValue, typeof(sbyte), CultureInfo.InvariantCulture) * multiplier); + if (Utils.TryCast(doubleValue.AsBigInt(), out short s)) + { + result = s; + return true; + } + break; - case NumberSuffixFlags.UnsignedLong: - result = (ulong)Convert.ChangeType(doubleValue, typeof(ulong), CultureInfo.InvariantCulture) * (ulong)multiplier; + case NumberSuffixFlags.Long: + if (Utils.TryCast(doubleValue.AsBigInt(), out long l)) + { + result = l; + return true; + } + break; case NumberSuffixFlags.UnsignedShort: - result = (ushort)((ushort)Convert.ChangeType(doubleValue, typeof(ushort), CultureInfo.InvariantCulture) * multiplier); - break; - case NumberSuffixFlags.UnsignedByte: - result = (byte)((byte)Convert.ChangeType(doubleValue, typeof(byte), CultureInfo.InvariantCulture) * multiplier); + if (Utils.TryCast(doubleValue.AsBigInt(), out ushort us)) + { + result = us; + return true; + } + break; case NumberSuffixFlags.Unsigned: - ulong testresult = (ulong)Convert.ChangeType(doubleValue, typeof(ulong), CultureInfo.InvariantCulture) * (ulong)multiplier; - if (testresult < uint.MaxValue) + BigInteger testValue = doubleValue.AsBigInt(); + if (Utils.TryCast(testValue, out uint u)) { - result = (uint)testresult; + result = u; + return true; } - else + else if (Utils.TryCast(testValue, out ulong ul)) { - result = testresult; + result = ul; + return true; } break; - default: - result = null; - return false; + case NumberSuffixFlags.UnsignedLong: + if (Utils.TryCast(doubleValue.AsBigInt(), out ulong ulValue)) + { + result = ulValue; + return true; + } + + break; + case NumberSuffixFlags.BigInteger: + result = doubleValue.AsBigInt(); + return true; } - return true; + // Invalid NumberSuffixFlags combination, or outside bounds of specified type. + result = null; + return false; } - // TryParse on (real) number fails. + // TryParse for real numeric literal failed result = null; return false; } - if (hex && !strNum[0].IsHexDigit()) + BigInteger bigValue; + + switch (format) { - if (strNum[0] == '-') - { - multiplier = -multiplier; - } + case NumberFormat.Hex: + if (!strNum[0].IsHexDigit()) + { + if (strNum[0] == '-') + { + multiplier = -multiplier; + } + + // Remove leading char (expected: - or +) + strNum = strNum.Slice(1); + } + + // If we're expecting a sign bit, remove the leading 0 added in ScanNumberHelper + if (!suffix.HasFlag(NumberSuffixFlags.Unsigned) && ((strNum.Length - 1) & 7) == 0) + { + strNum = strNum.Slice(1); + } + + style = NumberStyles.AllowHexSpecifier; + if (!BigInteger.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out bigValue)) + { + result = null; + return false; + } + + // If we have a hex literal denoting (u)int64, treat it as such, even if the value is low + if (strNum.Length == 16 && (suffix == NumberSuffixFlags.None || suffix == NumberSuffixFlags.Unsigned)) + { + suffix |= NumberSuffixFlags.Long; + } + + break; + case NumberFormat.Binary: + if (!strNum[0].IsBinaryDigit()) + { + if (strNum[0] == '-') + { + multiplier = -multiplier; + } + + // Remove leading char (expected: - or +) + strNum = strNum.Slice(1); + } + + bigValue = Utils.ParseBinary(strNum, suffix.HasFlag(NumberSuffixFlags.Unsigned)); + + // If we have a binary literal denoting (u)int64, treat it as such + if (strNum.Length == 64 && (suffix == NumberSuffixFlags.None || suffix == NumberSuffixFlags.Unsigned)) + { + suffix |= NumberSuffixFlags.Long; + } - strNum = strNum.Substring(1); + break; + default: + style = NumberStyles.AllowLeadingSign; + if (!BigInteger.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out bigValue)) + { + result = null; + return false; + } + + break; } - style = hex ? NumberStyles.AllowHexSpecifier : NumberStyles.AllowLeadingSign; + // Apply multiplier before attempting casting to prevent overflow + bigValue *= multiplier; switch (suffix) { - case NumberSuffixFlags.None: + case NumberSuffixFlags.SignedByte: + if (Utils.TryCast(bigValue, out sbyte sb)) + { + result = sb; + return true; + } + break; - case NumberSuffixFlags.Long: - if (long.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out long l)) + case NumberSuffixFlags.UnsignedByte: + if (Utils.TryCast(bigValue, out byte b)) { - result = l * multiplier; + result = b; return true; } - result = null; - return false; + break; case NumberSuffixFlags.Short: - if (short.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out short s)) + if (Utils.TryCast(bigValue, out short s)) { - result = (short)(s * multiplier); + result = s; return true; } - result = null; - return false; - case NumberSuffixFlags.SignedByte: - // Multiplier for hex-parsed values can be negative to permit - prefix for hex values - if (Math.Abs(multiplier) == 1 && sbyte.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out sbyte sb)) + break; + case NumberSuffixFlags.Long: + if (Utils.TryCast(bigValue, out long l)) { - result = (sbyte)(sb * multiplier); + result = l; return true; } - result = null; - return false; - case NumberSuffixFlags.Unsigned: - if (ulong.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out ulong u)) + break; + case NumberSuffixFlags.UnsignedShort: + if (Utils.TryCast(bigValue, out ushort us)) { - u *= (ulong)multiplier; - - if (u <= uint.MaxValue) - { - result = (uint)u; - } - else - { - result = u; - } + result = us; + return true; + } + break; + case NumberSuffixFlags.Unsigned: + if (Utils.TryCast(bigValue, out uint u)) + { + result = u; + return true; + } + else if (Utils.TryCast(bigValue, out ulong ul)) + { + result = ul; return true; } - result = null; - return false; + break; case NumberSuffixFlags.UnsignedLong: - if (ulong.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out ulong ul)) + if (Utils.TryCast(bigValue, out ulong ulValue)) { - result = (ulong)(ul * (ulong)multiplier); + result = ulValue; return true; } - result = null; - return false; - case NumberSuffixFlags.UnsignedShort: - if (ushort.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out ushort us)) + break; + case NumberSuffixFlags.Decimal: + if (Utils.TryCast(bigValue, out decimal dm)) { - result = (ushort)(us * (ushort)multiplier); + result = dm; return true; } - result = null; - return false; - case NumberSuffixFlags.UnsignedByte: - // If multiplier is negative or greater than 1, we can assume it will fail since the - // minimum multiplier is 1024 (already exceeds byte.MaxValue), and byte is unsigned - if (multiplier == 1 && byte.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out byte b)) + break; + case NumberSuffixFlags.BigInteger: + result = bigValue; + return true; + case NumberSuffixFlags.None: + // Type not specified; fit value into narrowest signed type available, int32 minimum + if (Utils.TryCast(bigValue, out int i)) { - result = b; + result = i; return true; } - result = null; - return false; - default: - result = null; - return false; - } - - // From here on - the user hasn't specified the type, so we need to figure it out. - BigInteger bigValue; - TypeCode whichTryParseWorked; - if (int.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out int intValue)) - { - whichTryParseWorked = TypeCode.Int32; - bigValue = intValue; - } - else if (long.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out long longValue)) - { - whichTryParseWorked = TypeCode.Int64; - bigValue = longValue; - } - else if (decimal.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out decimal decimalValue)) - { - whichTryParseWorked = TypeCode.Decimal; - bigValue = (BigInteger)decimalValue; - } - else - { - // The result must be double if we get here. - if (!hex) - { - if (double.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out double dbl)) + if (Utils.TryCast(bigValue, out long lValue)) { - result = dbl * multiplier; + result = lValue; return true; } - } - result = null; - return false; - } + // Result is too big for anything else; fallback to decimal or double + if (format == NumberFormat.Decimal) + { + if (Utils.TryCast(bigValue, out decimal dmValue)) + { + result = dmValue; + return true; + } - bigValue *= multiplier; - if (bigValue >= int.MinValue && bigValue <= int.MaxValue && whichTryParseWorked <= TypeCode.Int32) - { - result = (int)bigValue; - } - else if (bigValue >= long.MinValue && bigValue <= long.MaxValue && whichTryParseWorked <= TypeCode.Int64) - { - result = (long)bigValue; - } - else if (bigValue >= (BigInteger)decimal.MinValue && bigValue <= (BigInteger)decimal.MaxValue && whichTryParseWorked <= TypeCode.Decimal) - { - result = (decimal)bigValue; - } - else - { - result = (double)bigValue; + if (Utils.TryCast(bigValue, out double d)) + { + result = d; + return true; + } + } + + // Hex or Binary value, too big for generic non-suffixed parsing + result = null; + return false; } - return true; + // Value cannot be contained in type specified by suffix, or invalid suffix flags. + result = null; + return false; } catch (Exception) { @@ -3630,14 +3742,12 @@ private static bool TryGetNumberValue(string strNum, bool hex, bool real, Number private Token ScanNumber(char firstChar) { - Diagnostics.Assert(firstChar == '.' || (firstChar >= '0' && firstChar <= '9') + Diagnostics.Assert( + firstChar == '.' || (firstChar >= '0' && firstChar <= '9') || (AllowSignedNumbers && (firstChar == '+' || firstChar.IsDash())), "Number must start with '.', '-', or digit."); - bool hex, real; - NumberSuffixFlags suffix; - long multiplier; + ReadOnlySpan strNum = ScanNumberHelper(firstChar, out NumberFormat format, out NumberSuffixFlags suffix, out bool real, out long multiplier); - string strNum = ScanNumberHelper(firstChar, out hex, out real, out suffix, out multiplier); // the token is not a number. i.e. 77z.exe if (strNum == null) { @@ -3647,7 +3757,7 @@ private Token ScanNumber(char firstChar) } object value; - if (!TryGetNumberValue(strNum, hex, real, suffix, multiplier, out value)) + if (!TryGetNumberValue(strNum, format, suffix, real, multiplier, out value)) { if (!InExpressionMode()) { @@ -3665,21 +3775,24 @@ private Token ScanNumber(char firstChar) return NewNumberToken(value); } + /// + /// Scans a numeric string to determine its characteristics. + /// /// The first character. - /// Indicate if it's a hex number. - /// Indicate if it's a real number. - /// Indicate the type suffix. + /// Indicate if it's a hex, binary, or decimal number. + /// Indicate the format suffix. + /// Indicate if the number is real (non-integer). /// Indicate the specified multiplier. /// - /// return null if the token is not a number + /// Return null if the token is not a number /// OR - /// return the string format of the number + /// Return the string format of the number. /// - private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out NumberSuffixFlags suffix, out long multiplier) + private ReadOnlySpan ScanNumberHelper(char firstChar, out NumberFormat format, out NumberSuffixFlags suffix, out bool real, out long multiplier) { - hex = false; - real = false; + format = NumberFormat.Decimal; suffix = NumberSuffixFlags.None; + real = false; multiplier = 1; bool notNumber = false; @@ -3702,16 +3815,36 @@ private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out else { c = PeekChar(); - if (firstChar == '0' && (c == 'x' || c == 'X')) + bool isHexOrBinary = firstChar == '0' && (c == 'x' || c == 'X' || c == 'b' || c == 'B'); + + if (isHexOrBinary) { SkipChar(); - ScanHexDigits(sb); - if (sb.Length == 0) + + switch (c) { - notNumber = true; - } + case 'x': + case 'X': + sb.Append('0'); // Prepend a 0 to the number before any numeric digits are added + ScanHexDigits(sb); + if (sb.Length == 0) + { + notNumber = true; + } + + format = NumberFormat.Hex; + break; + case 'b': + case 'B': + ScanBinaryDigits(sb); + if (sb.Length == 0) + { + notNumber = true; + } - hex = true; + format = NumberFormat.Binary; + break; + } } else { @@ -3773,6 +3906,10 @@ private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out case 'Y': suffix |= NumberSuffixFlags.SignedByte; break; + case 'n': + case 'N': + suffix |= NumberSuffixFlags.BigInteger; + break; default: notNumber = true; break; @@ -3783,30 +3920,32 @@ private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out if (c.IsTypeSuffix()) { SkipChar(); - if (suffix == NumberSuffixFlags.Unsigned) - { - switch (c) - { - case 'l': - case 'L': - suffix |= NumberSuffixFlags.Long; - break; - case 's': - case 'S': - suffix |= NumberSuffixFlags.Short; - break; - case 'y': - case 'Y': - suffix |= NumberSuffixFlags.SignedByte; - break; - default: - notNumber = true; - break; - } - } - else + switch (suffix) { - notNumber = true; + case NumberSuffixFlags.Unsigned: + switch (c) + { + case 'l': + case 'L': + suffix |= NumberSuffixFlags.Long; + break; + case 's': + case 'S': + suffix |= NumberSuffixFlags.Short; + break; + case 'y': + case 'Y': + suffix |= NumberSuffixFlags.SignedByte; + break; + default: + notNumber = true; + break; + } + + break; + default: + notNumber = true; + break; } c = PeekChar(); @@ -3817,11 +3956,29 @@ private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out { SkipChar(); - if (c == 'k' || c == 'K') { multiplier = 1024; } - else if (c == 'm' || c == 'M') { multiplier = 1024 * 1024; } - else if (c == 'g' || c == 'G') { multiplier = 1024 * 1024 * 1024; } - else if (c == 't' || c == 'T') { multiplier = 1024L * 1024 * 1024 * 1024; } - else if (c == 'p' || c == 'P') { multiplier = 1024L * 1024 * 1024 * 1024 * 1024; } + switch (c) + { + case 'k': + case 'K': + multiplier = 1024; + break; + case 'm': + case 'M': + multiplier = 1024 * 1024; + break; + case 'g': + case 'G': + multiplier = 1024 * 1024 * 1024; + break; + case 't': + case 'T': + multiplier = 1024L * 1024 * 1024 * 1024; + break; + case 'p': + case 'P': + multiplier = 1024L * 1024 * 1024 * 1024 * 1024; + break; + } char c1 = PeekChar(); if (c1 == 'b' || c1 == 'B') @@ -3860,7 +4017,7 @@ private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out sb[0] = '-'; } - return GetStringAndRelease(sb); + return GetStringAndRelease(sb).AsSpan(); } #endregion Numbers @@ -4677,12 +4834,8 @@ internal Token NextToken() if (InExpressionMode() && (char.IsDigit(c1) || c1 == '.')) { - bool hex, real; - NumberSuffixFlags suffix; - long multiplier; - // check if the next token is actually a number - string strNum = ScanNumberHelper(c, out hex, out real, out suffix, out multiplier); + ReadOnlySpan strNum = ScanNumberHelper(c, out NumberFormat format, out NumberSuffixFlags suffix, out bool real, out long multiplier); // rescan characters after the check _currentIndex = _tokenStart; c = GetChar(); diff --git a/test/powershell/Language/Parser/Parser.Tests.ps1 b/test/powershell/Language/Parser/Parser.Tests.ps1 index 6387417648a..28c40682446 100644 --- a/test/powershell/Language/Parser/Parser.Tests.ps1 +++ b/test/powershell/Language/Parser/Parser.Tests.ps1 @@ -678,9 +678,24 @@ foo``u{2195}abc @{ Script = "0x12"; ExpectedValue = "18"; ExpectedType = [int] } @{ Script = "-0x12"; ExpectedValue = "-18"; ExpectedType = [int] } @{ Script = "0x80000000"; ExpectedValue = $([int32]::MinValue); ExpectedType = [int] } - @{ Script = "0xFFFFFFFF"; ExpectedValue = "-1"; ExpectedType = [int] } @{ Script = "0x7fffffff"; ExpectedValue = $([int32]::MaxValue); ExpectedType = [int] } @{ Script = "0x100000000"; ExpectedValue = [int64]0x100000000; ExpectedType = [long] } + @{ Script = "0xFF"; ExpectedValue = "255"; ExpectedType = [int] } + @{ Script = "0xFFFF"; ExpectedValue = "65535"; ExpectedType = [int] } + @{ Script = "0xFFFFFF"; ExpectedValue = "16777215"; ExpectedType = [int] } + @{ Script = "0xFFFFFFFF"; ExpectedValue = "-1"; ExpectedType = [int] } + @{ Script = "0xFFFFFFFFFF"; ExpectedValue = "1099511627775"; ExpectedType = [long] } + @{ Script = "0xFFFFFFFFFFFF"; ExpectedValue = "281474976710655"; ExpectedType = [long] } + @{ Script = "0xFFFFFFFFFFFFFF"; ExpectedValue = "72057594037927935"; ExpectedType = [long] } + @{ Script = "0xFFFFFFFFFFFFFFFF"; ExpectedValue = "-1"; ExpectedType = [long] } + #Binary + @{ Script = "0b0"; ExpectedValue = "0"; ExpectedType = [int] } + @{ Script = "0b10"; ExpectedValue = "2"; ExpectedType = [int] } + @{ Script = "-0b10"; ExpectedValue = "-2"; ExpectedType = [int] } + @{ Script = "0b11111111"; ExpectedValue = "-1"; ExpectedType = [int] } + @{ Script = "0b1111111111111111"; ExpectedValue = "-1"; ExpectedType = [int] } + @{ Script = "0b11111111111111111111111111111111"; ExpectedValue = "-1"; ExpectedType = [int] } + @{ Script = "0b1111111111111111111111111111111111111111111111111111111111111111"; ExpectedValue = "-1"; ExpectedType = [long] } #Multipliers @{ Script = "1kb"; ExpectedValue = "1024"; ExpectedType = [int] } @{ Script = "1mb"; ExpectedValue = "1048576"; ExpectedType = [int] } @@ -729,6 +744,11 @@ foo``u{2195}abc @{ Script = "0x0y"; ExpectedValue = "0"; ExpectedType = [sbyte] } @{ Script = "0x41y"; ExpectedValue = "65"; ExpectedType = [sbyte] } @{ Script = "-0x41y"; ExpectedValue = "-65"; ExpectedType = [sbyte] } + #Binary + @{ Script = "0b0y"; ExpectedValue = "0"; ExpectedType = [sbyte] } + @{ Script = "0b10y"; ExpectedValue = "2"; ExpectedType = [sbyte] } + @{ Script = "-0b10y"; ExpectedValue = "-2"; ExpectedType = [sbyte] } + @{ Script = "0b11111111y"; ExpectedValue = "-1"; ExpectedType = [sbyte] } #Short Integer notation #Standard @@ -750,6 +770,11 @@ foo``u{2195}abc @{ Script = "0x0s"; ExpectedValue = "0"; ExpectedType = [short] } @{ Script = "0x41s"; ExpectedValue = "65"; ExpectedType = [short] } @{ Script = "-0x41s"; ExpectedValue = "-65"; ExpectedType = [short] } + #Binary + @{ Script = "0b0s"; ExpectedValue = "0"; ExpectedType = [short] } + @{ Script = "0b10s"; ExpectedValue = "2"; ExpectedType = [short] } + @{ Script = "-0b10s"; ExpectedValue = "-2"; ExpectedType = [short] } + @{ Script = "0b11111111s"; ExpectedValue = "-1"; ExpectedType = [short] } #Multipliers @{ Script = "1skb"; ExpectedValue = "1024"; ExpectedType = [short] } @@ -765,12 +790,19 @@ foo``u{2195}abc @{ Script = "-2.5l"; ExpectedValue = "-2"; ExpectedType = [long] } #Exponential @{ Script = "0e0l"; ExpectedValue = "0"; ExpectedType = [long] } + @{ Script = "3e0l"; ExpectedValue = "3"; ExpectedType = [long] } + @{ Script = "-3e0l"; ExpectedValue = "-3"; ExpectedType = [long] } @{ Script = "3e2l"; ExpectedValue = "300"; ExpectedType = [long] } @{ Script = "-3e2l"; ExpectedValue = "-300"; ExpectedType = [long] } #Hexadecimal @{ Script = "0x0l"; ExpectedValue = "0"; ExpectedType = [long] } @{ Script = "0x41l"; ExpectedValue = "65"; ExpectedType = [long] } @{ Script = "-0x41l"; ExpectedValue = "-65"; ExpectedType = [long] } + #Binary + @{ Script = "0b0l"; ExpectedValue = "0"; ExpectedType = [long] } + @{ Script = "0b10l"; ExpectedValue = "2"; ExpectedType = [long] } + @{ Script = "-0b10l"; ExpectedValue = "-2"; ExpectedType = [long] } + @{ Script = "0b11111111l"; ExpectedValue = "-1"; ExpectedType = [long] } #Multipliers @{ Script = "1lkb"; ExpectedValue = "1024"; ExpectedType = [long] } @{ Script = "1lmb"; ExpectedValue = "1048576"; ExpectedType = [long] } @@ -778,6 +810,36 @@ foo``u{2195}abc @{ Script = "1ltb"; ExpectedValue = "1099511627776"; ExpectedType = [long] } @{ Script = "1lpb"; ExpectedValue = "1125899906842624"; ExpectedType = [long] } + #BigInteger Integer notation + #Standard + @{ Script = "0n"; ExpectedValue = "0"; ExpectedType = [bigint] } + @{ Script = "10n"; ExpectedValue = "10"; ExpectedType = [bigint] } + @{ Script = "-10n"; ExpectedValue = "-10"; ExpectedType = [bigint] } + @{ Script = "+10n"; ExpectedValue = "10"; ExpectedType = [bigint] } + #Conversion from + @{ Script = "0.0n"; ExpectedValue = "0"; ExpectedType = [bigint] } + @{ Script = "2.5n"; ExpectedValue = "2"; ExpectedType = [bigint] } + @{ Script = "-2.5n"; ExpectedValue = "-2"; ExpectedType = [bigint] } + #Exponential + @{ Script = "0e0n"; ExpectedValue = "0"; ExpectedType = [bigint] } + @{ Script = "3e2n"; ExpectedValue = "300"; ExpectedType = [bigint] } + @{ Script = "-3e2n"; ExpectedValue = "-300"; ExpectedType = [bigint] } + #Hexadecimal + @{ Script = "0x0n"; ExpectedValue = "0"; ExpectedType = [bigint] } + @{ Script = "0x41n"; ExpectedValue = "65"; ExpectedType = [bigint] } + @{ Script = "-0x41n"; ExpectedValue = "-65"; ExpectedType = [bigint] } + #Binary + @{ Script = "0b0n"; ExpectedValue = "0"; ExpectedType = [bigint] } + @{ Script = "0b10n"; ExpectedValue = "2"; ExpectedType = [bigint] } + @{ Script = "-0b10n"; ExpectedValue = "-2"; ExpectedType = [bigint] } + @{ Script = "0b11111111n"; ExpectedValue = "-1"; ExpectedType = [bigint] } + #Multipliers + @{ Script = "1Nkb"; ExpectedValue = "1024"; ExpectedType = [bigint] } + @{ Script = "1Nmb"; ExpectedValue = "1048576"; ExpectedType = [bigint] } + @{ Script = "1Ngb"; ExpectedValue = "1073741824"; ExpectedType = [bigint] } + @{ Script = "1Ntb"; ExpectedValue = "1099511627776"; ExpectedType = [bigint] } + @{ Script = "1Npb"; ExpectedValue = "1125899906842624"; ExpectedType = [bigint] } + #Unsigned Integer notation #Standard @{ Script = "0u"; ExpectedValue = "0"; ExpectedType = [uint] } @@ -792,6 +854,21 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0u"; ExpectedValue = "0"; ExpectedType = [uint] } @{ Script = "0x41u"; ExpectedValue = "65"; ExpectedType = [uint] } + @{ Script = "0xFFu"; ExpectedValue = "255"; ExpectedType = [uint] } + @{ Script = "0xFFFFu"; ExpectedValue = "65535"; ExpectedType = [uint] } + @{ Script = "0xFFFFFFu"; ExpectedValue = "16777215"; ExpectedType = [uint] } + @{ Script = "0xFFFFFFFFu"; ExpectedValue = "$([uint]::MaxValue)"; ExpectedType = [uint] } + @{ Script = "0xFFFFFFFFFFu"; ExpectedValue = "1099511627775"; ExpectedType = [ulong] } + @{ Script = "0xFFFFFFFFFFFFu"; ExpectedValue = "281474976710655"; ExpectedType = [ulong] } + @{ Script = "0xFFFFFFFFFFFFFFu"; ExpectedValue = "72057594037927935"; ExpectedType = [ulong] } + @{ Script = "0xFFFFFFFFFFFFFFFFu"; ExpectedValue = "$([ulong]::MaxValue)"; ExpectedType = [ulong] } + #Binary + @{ Script = "0b0u"; ExpectedValue = "0"; ExpectedType = [uint] } + @{ Script = "0b10u"; ExpectedValue = "2"; ExpectedType = [uint] } + @{ Script = "0b11111111u"; ExpectedValue = "255"; ExpectedType = [uint] } + @{ Script = "0b1111111111111111u"; ExpectedValue = "65535"; ExpectedType = [uint] } + @{ Script = "0b11111111111111111111111111111111u"; ExpectedValue = "4294967295"; ExpectedType = [uint] } + @{ Script = "0b1111111111111111111111111111111111111111111111111111111111111111u"; ExpectedValue = "18446744073709551615"; ExpectedType = [ulong] } #Multipliers @{ Script = "1ukb"; ExpectedValue = "1024"; ExpectedType = [uint] } @{ Script = "1umb"; ExpectedValue = "1048576"; ExpectedType = [uint] } @@ -814,6 +891,10 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0uy"; ExpectedValue = "0"; ExpectedType = [byte] } @{ Script = "0x41uy"; ExpectedValue = "65"; ExpectedType = [byte] } + #Binary + @{ Script = "0b0uy"; ExpectedValue = "0"; ExpectedType = [byte] } + @{ Script = "0b10uy"; ExpectedValue = "2"; ExpectedType = [byte] } + @{ Script = "0b11111111uy"; ExpectedValue = "255"; ExpectedType = [byte] } #Unsigned-Short Integer Notation #Standard @@ -830,6 +911,10 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0us"; ExpectedValue = "0"; ExpectedType = [ushort] } @{ Script = "0x41us"; ExpectedValue = "65"; ExpectedType = [ushort] } + #Binary + @{ Script = "0b0us"; ExpectedValue = "0"; ExpectedType = [ushort] } + @{ Script = "0b10us"; ExpectedValue = "2"; ExpectedType = [ushort] } + @{ Script = "0b11111111us"; ExpectedValue = "255"; ExpectedType = [ushort] } #Multipliers @{ Script = "1uskb"; ExpectedValue = "1024"; ExpectedType = [ushort] } @@ -847,6 +932,10 @@ foo``u{2195}abc #Hexadecimal @{ Script = "0x0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } @{ Script = "0x41ul"; ExpectedValue = "65"; ExpectedType = [ulong] } + #Binary + @{ Script = "0b0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } + @{ Script = "0b10ul"; ExpectedValue = "2"; ExpectedType = [ulong] } + @{ Script = "0b11111111ul"; ExpectedValue = "255"; ExpectedType = [ulong] } #Multipliers @{ Script = "1ulkb"; ExpectedValue = "1024"; ExpectedType = [ulong] } @{ Script = "1ulmb"; ExpectedValue = "1048576"; ExpectedType = [ulong] } @@ -862,28 +951,41 @@ foo``u{2195}abc } $testInvalidNumerals = @( - @{ Script = "16p" } - @{ Script = "80x" } - @{ Script = "20ux" } - @{ Script = "18uu" } - @{ Script = "21ss" } - @{ Script = "100ll" } - @{ Script = "150su" } - @{ Script = "160ud" } - @{ Script = "160ld" } - @{ Script = "160sd" } - @{ Script = "160dd" } - @{ Script = "10ds" } - @{ Script = "10ud" } - @{ Script = "16sl" } - @{ Script = "188lu" } - @{ Script = "500sgb" } - @{ Script = "10000usgb" } - @{ Script = "10000.0usgb" } + @{ Script = "16p"; ErrorID = "CommandNotFoundException" } + @{ Script = "1_6"; ErrorID = "CommandNotFoundException" } + @{ Script = "80x"; ErrorID = "CommandNotFoundException" } + @{ Script = "20ux"; ErrorID = "CommandNotFoundException" } + @{ Script = "18uu"; ErrorID = "CommandNotFoundException" } + @{ Script = "21ss"; ErrorID = "CommandNotFoundException" } + @{ Script = "100ll"; ErrorID = "CommandNotFoundException" } + @{ Script = "100Il"; ErrorID = "CommandNotFoundException" } + @{ Script = "100Is"; ErrorID = "CommandNotFoundException" } + @{ Script = "100un"; ErrorID = "CommandNotFoundException" } + @{ Script = "100ln"; ErrorID = "CommandNotFoundException" } + @{ Script = "100sn"; ErrorID = "CommandNotFoundException" } + @{ Script = "100In"; ErrorID = "CommandNotFoundException" } + @{ Script = "100yu"; ErrorID = "CommandNotFoundException" } + @{ Script = "150su"; ErrorID = "CommandNotFoundException" } + @{ Script = "160ud"; ErrorID = "CommandNotFoundException" } + @{ Script = "160ld"; ErrorID = "CommandNotFoundException" } + @{ Script = "160yd"; ErrorID = "CommandNotFoundException" } + @{ Script = "160sd"; ErrorID = "CommandNotFoundException" } + @{ Script = "160dd"; ErrorID = "CommandNotFoundException" } + @{ Script = "10ds"; ErrorID = "CommandNotFoundException" } + @{ Script = "10ud"; ErrorID = "CommandNotFoundException" } + @{ Script = "16sl"; ErrorID = "CommandNotFoundException" } + @{ Script = "188lu"; ErrorID = "CommandNotFoundException" } + @{ Script = "0xFFFFy"; ErrorID = "ParseException" } + @{ Script = "500sgb"; ErrorID = "ParseException" } + @{ Script = "10000usgb"; ErrorID = "ParseException" } + @{ Script = "10000.0usgb"; ErrorID = "ParseException" } + @{ Script = "1uykb"; ErrorID = "ParseException" } + @{ Script = "10_000ul"; ErrorID = "CommandNotFoundException" } ) + It "