8000 lib: implement all 1-byte encodings in js · nodejs/node@86bce15 · GitHub
[go: up one dir, main page]

Skip to content

Commit 86bce15

Browse files
ChALkeRRafaelGSS
authored andcommitted
lib: implement all 1-byte encodings in js
PR-URL: #61093 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: René <contact.9a5d6388@renegade334.me.uk>
1 parent f12eb28 commit 86bce15

File tree

37 files changed

+4262
-172
lines changed
  • parallel
  • typings/internalBinding
  • 37 files changed

    +4262
    -172
    lines changed

    lib/internal/encoding.js

    Lines changed: 49 additions & 42 deletions
    Original file line numberDiff line numberDiff line change
    @@ -13,25 +13,28 @@ const {
    1313
    StringPrototypeSlice,
    1414
    Symbol,
    1515
    SymbolToStringTag,
    16-
    Uint8Array,
    1716
    } = primordials;
    1817

    18+
    const { FastBuffer } = require('internal/buffer');
    19+
    1920
    const {
    2021
    ERR_ENCODING_NOT_SUPPORTED,
    2122
    ERR_INVALID_ARG_TYPE,
    2223
    ERR_INVALID_THIS,
    2324
    ERR_NO_ICU,
    2425
    } = require('internal/errors').codes;
    26+
    const kMethod = Symbol('method');
    2527
    const kHandle = Symbol('handle');
    2628
    const kFlags = Symbol('flags');
    2729
    const kEncoding = Symbol('encoding');
    2830
    const kDecoder = Symbol('decoder');
    2931
    const kEncoder = Symbol('encoder');
    3032
    const kFatal = Symbol('kFatal');
    3133
    const kUTF8FastPath = Symbol('kUTF8FastPath');
    32-
    const kWindows1252FastPath = Symbol('kWindows1252FastPath');
    3334
    const kIgnoreBOM = Symbol('kIgnoreBOM');
    3435

    36+
    const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
    37+
    3538
    const {
    3639
    getConstructorOf,
    3740
    customInspectSymbol: inspect,
    @@ -56,11 +59,8 @@ const {
    5659
    encodeIntoResults,
    5760
    encodeUtf8String,
    5861
    decodeUTF8,
    59-
    decodeWindows1252,
    6062
    } = binding;
    6163

    62-
    const { Buffer } = require('buffer');
    63-
    6464
    function validateEncoder(obj) {
    6565
    if (obj == null || obj[kEncoder] !== true)
    6666
    throw new ERR_INVALID_THIS('TextEncoder');
    @@ -75,7 +75,7 @@ const CONVERTER_FLAGS_FLUSH = 0x1;
    7575
    const CONVERTER_FLAGS_FATAL = 0x2;
    7676
    const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
    7777

    78-
    const empty = new Uint8Array(0);
    78+
    const empty = new FastBuffer();
    7979

    8080
    const encodings = new SafeMap([
    8181
    ['unicode-1-1-utf-8', 'utf-8'],
    @@ -388,6 +388,24 @@ ObjectDefineProperties(
    388388
    [SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' },
    389389
    });
    390390

    391+
    function parseInput(input) {
    392+
    if (isAnyArrayBuffer(input)) {
    393+
    try {
    394+
    return new FastBuffer(input);
    395+
    } catch {
    396+
    return empty;
    397+
    }
    398+
    } else if (isArrayBufferView(input)) {
    399+
    try {
    400+
    return new FastBuffer(input.buffer, input.byteOffset, input.byteLength);
    401+
    } catch {
    402+
    return empty;
    403+
    }
    404+
    } else {
    405+
    throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input);
    406+
    }
    407+
    }
    408+
    391409
    const TextDecoder =
    392410
    internalBinding('config').hasIntl ?
    393411
    makeTextDecoderICU() :
    @@ -421,10 +439,12 @@ function makeTextDecoderICU() {
    421439
    this[kFatal] = Boolean(options?.fatal);
    422440
    // Only support fast path for UTF-8.
    423441
    this[kUTF8FastPath] = enc === 'utf-8';
    424-
    this[kWindows1252FastPath] = enc === 'windows-1252';
    425442
    this[kHandle] = undefined;
    443+
    this[kMethod] = undefined;
    426444

    427-
    if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
    445+
    if (isSinglebyteEncoding(this.encoding)) {
    446+
    this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
    447+
    } else if (!this[kUTF8FastPath]) {
    428448
    this.#prepareConverter();
    429449
    }
    430450
    }
    @@ -441,22 +461,18 @@ function makeTextDecoderICU() {
    441461

    442462
    decode(input = empty, options = kEmptyObject) {
    443463
    validateDecoder(this);
    464+
    validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
    465+
    466+
    if (this[kMethod]) return this[kMethod](parseInput(input));
    444467

    445468
    this[kUTF8FastPath] &&= !(options?.stream);
    446-
    this[kWindows1252FastPath] &&= !(options?.stream);
    447469

    448470
    if (this[kUTF8FastPath]) {
    449471
    return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
    450472
    }
    451473

    452-
    if (this[kWindows1252FastPath]) {
    453-
    return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
    454-
    }
    455-
    456474
    this.#prepareConverter();
    457475

    458-
    validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
    459-
    460476
    let flags = 0;
    461477
    if (options !== null)
    462478
    flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
    @@ -488,47 +504,40 @@ function makeTextDecoderJS() {
    488504
    validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
    489505

    490506
    const enc = getEncodingFromLabel(encoding);
    491-
    if (enc === undefined || !hasConverter(enc))
    507+
    if (enc === undefined)
    492508
    throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
    493509

    494510
    let flags = 0;
    495511
    if (options !== null) {
    496-
    if (options.fatal) {
    497-
    throw new ERR_NO_ICU('"fatal" option');
    498-
    }
    512+
    flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
    499513
    flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
    500514
    }
    501515

    502516
    this[kDecoder] = true;
    503-
    // StringDecoder will normalize WHATWG encoding to Node.js encoding.
    504-
    this[kHandle] = new (lazyStringDecoder())(enc);
    505517
    this[kFlags] = flags;
    506518
    this[kEncoding] = enc;
    519+
    this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
    520+
    this[kFatal] = Boolean(options?.fatal);
    507521
    this[kBOMSeen] = false;
    522+
    this[kMethod] = undefined;
    523+
    524+
    if (isSinglebyteEncoding(enc)) {
    525+
    this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
    526+
    } else {
    527+
    if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
    528+
    if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
    529+
    // StringDecoder will normalize WHATWG encoding to Node.js encoding.
    530+
    this[kHandle] = new (lazyStringDecoder())(enc);
    531+
    }
    508532
    }
    509533

    510534
    decode(input = empty, options = kEmptyObject) {
    511535
    validateDecoder(this);
    512-
    if (isAnyArrayBuffer(input)) {
    513-
    try {
    514-
    input = Buffer.from(input);
    515-
    } catch {
    516-
    input = empty;
    517-
    }
    518-
    } else if (isArrayBufferView(input)) {
    519-
    try {
    520-
    input = Buffer.from(input.buffer, input.byteOffset,
    521-
    input.byteLength);
    522-
    } catch {
    523-
    input = empty;
    524-
    }
    525-
    } else {
    526-
    throw new ERR_INVALID_ARG_TYPE('input',
    527-
    ['ArrayBuffer', 'ArrayBufferView'],
    528-
    input);
    529-
    }
    536+
    input = parseInput(input);
    530537
    validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
    531538

    539+
    if (this[kMethod]) return this[kMethod](input);
    540+
    532541
    if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
    533542
    this[kBOMSeen] = false;
    534543
    }
    @@ -543,9 +552,7 @@ function makeTextDecoderJS() {
    543552
    this[kHandle].end(input) :
    544553
    this[kHandle].write(input);
    545554

    546-
    if (result.length > 0 &&
    547-
    !this[kBOMSeen] &&
    548-
    !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
    555+
    if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
    549556
    // If the very first result in the stream is a BOM, and we are not
    550557
    // explicitly told to ignore it, then we discard it.
    551558
    if (result[0] === '\ufeff') {
    Lines changed: 155 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -0,0 +1,155 @@
    1+
    // Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings
    2+
    // Copyright Exodus Movement. Licensed under MIT License.
    3+
    4+
    'use strict';
    5+
    6+
    const {
    7+
    Array,
    8+
    ArrayPrototypeFill,
    9+
    ObjectKeys,
    10+
    ObjectPrototypeHasOwnProperty,
    11+
    SafeArrayIterator,
    12+
    SafeMap,
    13+
    SafeSet,
    14+
    StringPrototypeIncludes,
    15+
    TypedArrayFrom,
    16+
    TypedArrayOf,
    17+
    TypedArrayPrototypeIncludes,
    18+
    TypedArrayPrototypeSet,
    19+
    Uint16Array,
    20+
    } = primordials;
    21+
    22+
    const { isAscii } = require('buffer');
    23+
    24+
    const { FastBuffer } = require('internal/buffer');
    25+
    26+
    const {
    27+
    ERR_ENCODING_NOT_SUPPORTED,
    28+
    ERR_ENCODING_INVALID_ENCODED_DATA,
    29+
    } = require('internal/errors').codes;
    30+
    31+
    const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2;
    32+
    33+
    const it = (x) => new SafeArrayIterator(x);
    34+
    35+
    /* fallback/single-byte.encodings.js */
    36+
    37+
    const r = 0xfffd;
    38+
    const e = (x) => it(ArrayPrototypeFill(new Array(x), 1));
    39+
    const h = (x) => it(ArrayPrototypeFill(new Array(x), r));
    40+
    41+
    /* eslint-disable @stylistic/js/max-len */
    42+
    43+
    // Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
    44+
    // Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity
    45+
    // Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets
    46+
    // All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII)
    47+
    // Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2]
    48+
    // Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped
    49+
    const encodings = {
    50+
    '__proto__': null,
    51+
    'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472],
    52+
    'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -15 8020 4, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58],
    53+
    'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835],
    54+
    'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122],
    55+
    'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37],
    56+
    'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258],
    57+
    'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
    58+
    'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380],
    59+
    'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366],
    60+
    'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1],
    61+
    'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)],
    62+
    'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
    63+
    'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r],
    64+
    'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
    65+
    'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
    66+
    'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20],
    67+
    'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
    68+
    'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)],
    69+
    'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6],
    70+
    'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
    71+
    'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46],
    72+
    'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r],
    73+
    'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461],
    74+
    'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347],
    75+
    'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931],
    76+
    'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r],
    77+
    'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262],
    78+
    };
    79+
    80+
    /* eslint-enable @stylistic/js/max-len */
    81+
    82+
    /* fallback/single-byte.js + single-byte.node.js, simplified */
    83+
    84+
    const l256 = { __proto__: null, length: 256 };
    85+
    86+
    function getEncoding(encoding) {
    87+
    if (encoding === 'x-user-defined') {
    88+
    // https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder
    89+
    return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
    90+
    }
    91+
    92+
    if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) {
    93+
    throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
    94+
    }
    95+
    96+
    const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset
    97+
    let prev = 127;
    98+
    map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128);
    99+
    return map;
    100+
    }
    101+
    102+
    const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined');
    103+
    const isSinglebyteEncoding = (enc) => supported.has(enc);
    104+
    105+
    const decodersLoose = new SafeMap();
    106+
    const decodersFatal = new SafeMap();
    107+
    108+
    function createSinglebyteDecoder(encoding, fatal) {
    109+
    const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding;
    110+
    const decoders = fatal ? decodersFatal : decodersLoose;
    111+
    const cached = decoders.get(id);
    112+
    if (cached) return cached;
    113+
    114+
    const map = getEncoding(id);
    115+
    const incomplete = TypedArrayPrototypeIncludes(map, r);
    116+
    117+
    // Expects type-checked Buffer input
    118+
    const decoder = (buf) => {
    119+
    if (buf.byteLength === 0) return '';
    120+
    if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice
    121+
    const o = new Uint16Array(buf.length);
    122+
    TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now
    123+
    124+
    let i = 0;
    125+
    for (const end7 = o.length - 7; i < end7; i += 8) {
    126+
    o[i] = map[o[i]];
    127+
    o[i + 1] = map[o[i + 1]];
    128+
    o[i + 2] = map[o[i + 2]];
    129+
    o[i + 3] = map[o[i + 3]];
    130+
    o[i + 4] = map[o[i + 4]];
    131+
    o[i + 5] = map[o[i + 5]];
    132+
    o[i + 6] = map[o[i + 6]];
    133+
    o[i + 7] = map[o[i + 7]];
    134+
    }
    135+
    136+
    for (const end = o.length; i < end; i++) o[i] = map[o[i]];
    137+
    138+
    const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength);
    139+
    if (isBigEndian) b.swap16();
    140+
    const string = b.ucs2Slice();
    141+
    if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) {
    142+
    throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined);
    143+
    }
    144+
    return string;
    145+
    };
    146+
    147+
    decoders.set(id, decoder);
    148+
    return decoder;
    149+
    }
    150+
    151+
    module.exports = {
    152+
    isSinglebyteEncoding,
    153+
    createSinglebyteDecoder,
    154+
    getEncoding, // for tests
    155+
    };

    0 commit comments

    Comments
     (0)
    0