diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05e4637..9f236af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,21 +8,22 @@ jobs: strategy: matrix: - version: [ 18, 19, 20, 21 ] + version: [ 18, 19, 20, 21, 22, 23 ] + vector-length: [ 256, 512 ] steps: - uses: actions/checkout@v4 - - uses: gradle/wrapper-validation-action@v1 + - uses: gradle/actions/wrapper-validation@v4 - name: Set up JDK ${{ matrix.version }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.version }} - name: Setup Gradle - uses: gradle/gradle-build-action@v2 + uses: gradle/actions/setup-gradle@v4 - name: Tests - run: ./gradlew check + run: ./gradlew test${{ matrix.vector-length }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fa59c3d..1cdb32f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -15,16 +15,16 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: gradle/wrapper-validation-action@v1 + - uses: gradle/actions/wrapper-validation@v3 - name: Set up JDK 18 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 18 - name: Setup Gradle - uses: gradle/gradle-build-action@v2 + uses: gradle/actions/setup-gradle@v3 - name: Release if: github.ref == 'refs/heads/main' diff --git a/.gitignore b/.gitignore index 5241245..6b6051c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ build profilers testdata +hotspot_*.log \ No newline at end of file diff --git a/README.md b/README.md index 99e8143..1802aa2 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ by Geoff Langdale and Daniel Lemire. ## Code Sample +### DOM Parser + ```java byte[] json = loadTwitterJson(); @@ -25,6 +27,30 @@ while (tweets.hasNext()) { } ``` +### Schema-Based Parser + +```java +byte[] json = loadTwitterJson(); + +SimdJsonParser parser = new SimdJsonParser(); +SimdJsonTwitter twitter = simdJsonParser.parse(buffer, buffer.length, SimdJsonTwitter.class); +for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + System.out.println(user.screen_name()); + } +} + +record SimdJsonUser(boolean default_profile, String screen_name) { +} + +record SimdJsonStatus(SimdJsonUser user) { +} + +record SimdJsonTwitter(List statuses) { +} +``` + ## Installation The library is available in the [Maven Central Repository](https://mvnrepository.com/artifact/org.simdjson/simdjson-java). @@ -67,24 +93,60 @@ This section presents a performance comparison of different JSON parsers availab the [twitter.json](src/jmh/resources/twitter.json) dataset, and its goal was to measure the throughput (ops/s) of parsing and finding all unique users with a default profile. -**Note that simdjson-java is still missing several features (see [GitHub Issues](https://github.com/simdjson/simdjson-java/issues)), -so the following results may not reflect its real performance.** +### 256-bit Vectors Environment: -* CPU: Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz -* OS: Ubuntu 23.04, kernel 6.2.0-23-generic -* Java: OpenJDK 64-Bit Server VM Temurin-20.0.1+9 - - Library | Version | Throughput (ops/s) ----------------------------------------------------|---------|-------------------- - simdjson-java | - | 1450.951 - simdjson-java (padded) | - | 1505.227 - [jackson](https://github.com/FasterXML/jackson) | 2.15.2 | 504.562 - [fastjson2](https://github.com/alibaba/fastjson) | 2.0.35 | 590.743 - [jsoniter](https://github.com/json-iterator/java) | 0.9.23 | 384.664 +* CPU: Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz +* OS: Ubuntu 24.04 LTS, kernel 6.8.0-1008-aws +* Java: OpenJDK 64-Bit Server VM (build 21.0.3+9-Ubuntu-1ubuntu1, mixed mode, sharing) + +DOM parsers ([ParseAndSelectBenchmark](src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java)): + +| Library | Version | Throughput (ops/s) | +|--------------------------------------------------|---------|--------------------| +| simdjson-java (padded) | 0.3.0 | 783.878 | +| simdjson-java | 0.3.0 | 760.426 | +| [fastjson2](https://github.com/alibaba/fastjson) | 2.0.49 | 308.660 | +| [jackson](https://github.com/FasterXML/jackson) | 2.17.0 | 259.536 | + +Schema-based parsers ([SchemaBasedParseAndSelectBenchmark](src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java)): + +| Library | Version | Throughput (ops/s) | +|-----------------------------------------------------------------|---------|--------------------| +| simdjson-java (padded) | 0.3.0 | 1237.432 | +| simdjson-java | 0.3.0 | 1216.891 | +| [jsoniter-scala](https://github.com/plokhotnyuk/jsoniter-scala) | 2.28.4 | 614.138 | +| [fastjson2](https://github.com/alibaba/fastjson) | 2.0.49 | 494.362 | +| [jackson](https://github.com/FasterXML/jackson) | 2.17.0 | 339.904 | + +### 512-bit Vectors + +Environment: +* CPU: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz +* OS: Ubuntu 24.04 LTS, kernel 6.8.0-1008-aws +* Java: OpenJDK 64-Bit Server VM (build 21.0.3+9-Ubuntu-1ubuntu1, mixed mode, sharing) + +DOM parsers ([ParseAndSelectBenchmark](src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java)): + +| Library | Version | Throughput (ops/s) | +|--------------------------------------------------|---------|--------------------| +| simdjson-java (padded) | 0.3.0 | 1842.146 | +| simdjson-java | 0.3.0 | 1765.592 | +| [fastjson2](https://github.com/alibaba/fastjson) | 2.0.49 | 718.133 | +| [jackson](https://github.com/FasterXML/jackson) | 2.17.0 | 616.617 | + +Schema-based parsers ([SchemaBasedParseAndSelectBenchmark](src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java)): + +| Library | Version | Throughput (ops/s) | +|-----------------------------------------------------------------|---------|--------------------| +| simdjson-java (padded) | 0.3.0 | 3164.274 | +| simdjson-java | 0.3.0 | 2990.289 | +| [jsoniter-scala](https://github.com/plokhotnyuk/jsoniter-scala) | 2.28.4 | 1826.229 | +| [fastjson2](https://github.com/alibaba/fastjson) | 2.0.49 | 1259.622 | +| [jackson](https://github.com/FasterXML/jackson) | 2.17.0 | 789.030 | To reproduce the benchmark results, execute the following command: ```./gradlew jmh -Pjmh.includes='.*ParseAndSelectBenchmark.*'``` -The benchmark may take several minutes. Remember that you need Java 18 or better. \ No newline at end of file +The benchmark may take several minutes. Remember that you need Java 18 or better. diff --git a/build.gradle b/build.gradle index 2cae88f..4bcf450 100644 --- a/build.gradle +++ b/build.gradle @@ -1,6 +1,7 @@ import me.champeau.jmh.JmhBytecodeGeneratorTask -import org.gradle.internal.os.OperatingSystem import org.ajoberstar.grgit.Grgit +import org.gradle.internal.os.OperatingSystem + import java.time.Duration plugins { @@ -42,20 +43,20 @@ java { } ext { - junitVersion = '5.9.1' - jsoniterScalaVersion = '2.24.4' + junitVersion = '5.12.0' + jsoniterScalaVersion = '2.33.2' } dependencies { - jmhImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.16.0' - jmhImplementation group: 'com.alibaba.fastjson2', name: 'fastjson2', version: '2.0.42' - jmhImplementation group: 'com.jsoniter', name: 'jsoniter', version: '0.9.23' + jmhImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.18.2' + jmhImplementation group: 'com.alibaba.fastjson2', name: 'fastjson2', version: '2.0.56' jmhImplementation group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-core_2.13', version: jsoniterScalaVersion - jmhImplementation group: 'com.google.guava', name: 'guava', version: '32.1.2-jre' + jmhImplementation group: 'com.google.guava', name: 'guava', version: '33.4.0-jre' compileOnly group: 'com.github.plokhotnyuk.jsoniter-scala', name: 'jsoniter-scala-macros_2.13', version: jsoniterScalaVersion - testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2' - testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' + testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.27.3' + testImplementation group: 'org.apache.commons', name: 'commons-text', version: '1.13.0' + testImplementation group: 'org.junit-pioneer', name: 'junit-pioneer', version: '2.3.0' testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion @@ -135,18 +136,20 @@ jmh { jvmArgsPrepend = [ '--add-modules=jdk.incubator.vector' ] - if (getBooleanProperty('jmh.profilersEnabled', false)) { - if (OperatingSystem.current().isLinux()) { - profilers = [ - 'perf', - 'perfasm:intelSyntax=true', - 'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getAsyncProfilerLibPath('LD_LIBRARY_PATH') - ] - } else if (OperatingSystem.current().isMacOsX()) { - profilers = [ - 'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getAsyncProfilerLibPath('DYLD_LIBRARY_PATH') - ] + if (OperatingSystem.current().isLinux()) { + def profilerList = [] + if (getBooleanProperty('jmh.asyncProfilerEnabled', false)) { + createDirIfDoesNotExist('./profilers/async') + profilerList += ['async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getLibPath('LD_LIBRARY_PATH')] + } + if (getBooleanProperty('jmh.perfAsmEnabled', false)) { + createDirIfDoesNotExist('./profilers/perfasm') + profilerList += ['perfasm:intelSyntax=true;saveLog=true;saveLogTo=./profilers/perfasm'] } + if (getBooleanProperty('jmh.perfEnabled', false)) { + profilerList += ['perf'] + } + profilers = profilerList } if (project.hasProperty('jmh.includes')) { includes = [project.findProperty('jmh.includes')] @@ -218,6 +221,11 @@ def getBooleanProperty(String name, boolean defaultValue) { Boolean.valueOf((project.findProperty(name) ?: defaultValue) as String) } -static def getAsyncProfilerLibPath(String envVarName) { +static def getLibPath(String envVarName) { System.getenv(envVarName) ?: System.getProperty('java.library.path') } + +static createDirIfDoesNotExist(String dir) { + File file = new File(dir) + file.mkdirs() +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 7f93135..9bbc975 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 3fa8f86..37f853b 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew index 1aa94a4..faf9300 100755 --- a/gradlew +++ b/gradlew @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# SPDX-License-Identifier: Apache-2.0 +# ############################################################################## # @@ -55,7 +57,7 @@ # Darwin, MinGW, and NonStop. # # (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt # within the Gradle project. # # You can find Gradle at https://github.com/gradle/gradle/. @@ -84,7 +86,7 @@ done # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -203,7 +205,7 @@ fi DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, # and any embedded shellness will be escaped. # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be # treated as '${Hostname}' itself on the command line. diff --git a/gradlew.bat b/gradlew.bat index 93e3f59..9b42019 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -1,92 +1,94 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem - -@if "%DEBUG%"=="" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%"=="" set DIRNAME=. -@rem This is normally unused -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if %ERRORLEVEL% equ 0 goto execute - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/src/jmh/java/org/simdjson/NumberParserBenchmark.java b/src/jmh/java/org/simdjson/NumberParserBenchmark.java index 1b8c9dd..f73dd83 100644 --- a/src/jmh/java/org/simdjson/NumberParserBenchmark.java +++ b/src/jmh/java/org/simdjson/NumberParserBenchmark.java @@ -21,7 +21,7 @@ public class NumberParserBenchmark { private final Tape tape = new Tape(100); - private final NumberParser numberParser = new NumberParser(tape); + private final NumberParser numberParser = new NumberParser(); @Param({ "2.2250738585072013e-308", // fast path @@ -43,7 +43,7 @@ public double baseline() { @Benchmark public double simdjson() { tape.reset(); - numberParser.parseNumber(numberUtf8Bytes, 0); + numberParser.parseNumber(numberUtf8Bytes, 0, tape); return tape.getDouble(0); } } diff --git a/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java b/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java index a37135c..1cae0b1 100644 --- a/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java +++ b/src/jmh/java/org/simdjson/ParseAndSelectBenchmark.java @@ -4,10 +4,6 @@ import com.alibaba.fastjson2.JSONObject; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.plokhotnyuk.jsoniter_scala.core.ReaderConfig$; -import com.github.plokhotnyuk.jsoniter_scala.core.package$; -import com.jsoniter.JsonIterator; -import com.jsoniter.any.Any; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Level; @@ -43,19 +39,7 @@ public void setup() throws IOException { buffer = is.readAllBytes(); bufferPadded = padded(buffer); } - } - - @Benchmark - public int countUniqueUsersWithDefaultProfile_jsoniter_scala() throws IOException { - Twitter twitter = package$.MODULE$.readFromArray(buffer, ReaderConfig$.MODULE$, Twitter$.MODULE$.codec()); - Set defaultUsers = new HashSet<>(); - for (Status tweet: twitter.statuses()) { - User user = tweet.user(); - if (user.default_profile()) { - defaultUsers.add(user.screen_name()); - } - } - return defaultUsers.size(); + System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES); } @Benchmark @@ -88,19 +72,6 @@ public int countUniqueUsersWithDefaultProfile_fastjson() { return defaultUsers.size(); } - @Benchmark - public int countUniqueUsersWithDefaultProfile_jsoniter() { - Any json = JsonIterator.deserialize(buffer); - Set defaultUsers = new HashSet<>(); - for (Any tweet : json.get("statuses")) { - Any user = tweet.get("user"); - if (user.get("default_profile").toBoolean()) { - defaultUsers.add(user.get("screen_name").toString()); - } - } - return defaultUsers.size(); - } - @Benchmark public int countUniqueUsersWithDefaultProfile_simdjson() { JsonValue simdJsonValue = simdJsonParser.parse(buffer, buffer.length); diff --git a/src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java b/src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java new file mode 100644 index 0000000..cdd98f9 --- /dev/null +++ b/src/jmh/java/org/simdjson/SchemaBasedParseAndSelectBenchmark.java @@ -0,0 +1,123 @@ +package org.simdjson; + +import com.alibaba.fastjson2.JSON; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.plokhotnyuk.jsoniter_scala.core.ReaderConfig$; +import com.github.plokhotnyuk.jsoniter_scala.core.package$; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static org.simdjson.SimdJsonPaddingUtil.padded; + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +public class SchemaBasedParseAndSelectBenchmark { + + private final SimdJsonParser simdJsonParser = new SimdJsonParser(); + private final ObjectMapper objectMapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + + private byte[] buffer; + private byte[] bufferPadded; + + @Setup(Level.Trial) + public void setup() throws IOException { + try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) { + buffer = is.readAllBytes(); + bufferPadded = padded(buffer); + } + System.out.println("VectorSpecies = " + VectorUtils.BYTE_SPECIES); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_simdjson() { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = simdJsonParser.parse(buffer, buffer.length, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_simdjsonPadded() { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = simdJsonParser.parse(bufferPadded, buffer.length, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_jackson() throws IOException { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = objectMapper.readValue(buffer, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_jsoniter_scala() { + Twitter twitter = package$.MODULE$.readFromArray(buffer, ReaderConfig$.MODULE$, Twitter$.MODULE$.codec()); + Set defaultUsers = new HashSet<>(); + for (Status tweet: twitter.statuses()) { + User user = tweet.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + @Benchmark + public int countUniqueUsersWithDefaultProfile_fastjson() { + Set defaultUsers = new HashSet<>(); + SimdJsonTwitter twitter = JSON.parseObject(buffer, SimdJsonTwitter.class); + for (SimdJsonStatus status : twitter.statuses()) { + SimdJsonUser user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + return defaultUsers.size(); + } + + record SimdJsonUser(boolean default_profile, String screen_name) { + + } + + record SimdJsonStatus(SimdJsonUser user) { + + } + + record SimdJsonTwitter(List statuses) { + + } +} diff --git a/src/jmh/java/org/simdjson/Utf8ValidatorBenchmark.java b/src/jmh/java/org/simdjson/Utf8ValidatorBenchmark.java index 51a6948..7661d38 100644 --- a/src/jmh/java/org/simdjson/Utf8ValidatorBenchmark.java +++ b/src/jmh/java/org/simdjson/Utf8ValidatorBenchmark.java @@ -1,7 +1,15 @@ package org.simdjson; import com.google.common.base.Utf8; -import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; import java.io.IOException; import java.io.InputStream; @@ -11,6 +19,7 @@ @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) public class Utf8ValidatorBenchmark { + @Param({"/twitter.json", "/gsoc-2018.json", "/github_events.json"}) String fileName; byte[] bytes; @@ -24,7 +33,7 @@ public void setup() throws IOException { @Benchmark public void utf8Validator() { - Utf8Validator.validate(bytes); + Utf8Validator.validate(bytes, bytes.length); } @Benchmark diff --git a/src/main/java/org/simdjson/BitIndexes.java b/src/main/java/org/simdjson/BitIndexes.java index 4ab1dde..59c0dc3 100644 --- a/src/main/java/org/simdjson/BitIndexes.java +++ b/src/main/java/org/simdjson/BitIndexes.java @@ -44,11 +44,26 @@ private long clearLowestBit(long bits) { return bits & (bits - 1); } - int advance() { + void advance() { + readIdx++; + } + + int getAndAdvance() { + assert readIdx <= writeIdx; return indexes[readIdx++]; } + int getLast() { + return indexes[writeIdx - 1]; + } + + int advanceAndGet() { + assert readIdx + 1 <= writeIdx; + return indexes[++readIdx]; + } + int peek() { + assert readIdx <= writeIdx; return indexes[readIdx]; } @@ -60,6 +75,26 @@ boolean isEnd() { return writeIdx == readIdx; } + boolean isPastEnd() { + return readIdx > writeIdx; + } + + void finish() { + // If we go past the end of the detected structural indexes, it means we are dealing with an invalid JSON. + // Thus, we need to stop processing immediately and throw an exception. To avoid checking after every increment + // of readIdx whether this has happened, we jump to the first structural element. This should produce the + // desired outcome, i.e., an iterator should detect invalid JSON. To understand how this works, let's first + // exclude primitive values (numbers, strings, booleans, nulls) from the scope of possible JSON documents. We + // can do this because, when these values are parsed, the length of the input buffer is verified, ensuring we + // never go past its end. Therefore, we can focus solely on objects and arrays. Since we always check that if + // the first character is '{', the last one must be '}', and if the first character is '[', the last one must + // be ']', we know that if we've reached beyond the buffer without crashing, the input is either '{...}' or '[...]'. + // Thus, if we jump to the first structural element, we will generate either '{...}{' or '[...]['. Both of these + // are invalid sequences and will be detected by the iterator, which will then stop processing and throw an + // exception informing about the invalid JSON. + indexes[writeIdx] = 0; + } + void reset() { writeIdx = 0; readIdx = 0; diff --git a/src/main/java/org/simdjson/BlockReader.java b/src/main/java/org/simdjson/BlockReader.java deleted file mode 100644 index 4567386..0000000 --- a/src/main/java/org/simdjson/BlockReader.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.simdjson; - -import java.util.Arrays; - -class BlockReader { - - private static final byte SPACE = 0x20; - - private final int stepSize; - private final byte[] lastBlock; - private final byte[] spaces; - - private byte[] buffer; - private int len; - private int idx = 0; - private int lenMinusStep; - - BlockReader(int stepSize) { - this.stepSize = stepSize; - this.lastBlock = new byte[stepSize]; - this.spaces = new byte[stepSize]; - Arrays.fill(spaces, SPACE); - } - - void reset(byte[] buffer, int len) { - this.idx = 0; - this.len = len; - this.buffer = buffer; - this.lenMinusStep = len < stepSize ? 0 : len - stepSize; - } - - boolean hasFullBlock() { - return idx < lenMinusStep; - } - - byte[] remainder() { - System.arraycopy(spaces, 0, lastBlock, 0, lastBlock.length); - System.arraycopy(buffer, idx, lastBlock, 0, len - idx); - return lastBlock; - } - - void advance() { - idx += stepSize; - } - - int getBlockIndex() { - return idx; - } -} diff --git a/src/main/java/org/simdjson/CharactersClassifier.java b/src/main/java/org/simdjson/CharactersClassifier.java deleted file mode 100644 index 68b685c..0000000 --- a/src/main/java/org/simdjson/CharactersClassifier.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.simdjson; - -import jdk.incubator.vector.ByteVector; -import jdk.incubator.vector.VectorShuffle; - -class CharactersClassifier { - - private static final byte LOW_NIBBLE_MASK = 0x0f; - - private static final ByteVector WHITESPACE_TABLE = - ByteVector.fromArray( - StructuralIndexer.BYTE_SPECIES, - repeat(new byte[]{' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100}, StructuralIndexer.BYTE_SPECIES.vectorByteSize() / 4), - 0); - - private static final ByteVector OP_TABLE = - ByteVector.fromArray( - StructuralIndexer.BYTE_SPECIES, - repeat(new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', ',', '}', 0, 0}, StructuralIndexer.BYTE_SPECIES.vectorByteSize() / 4), - 0); - - private static byte[] repeat(byte[] array, int n) { - byte[] result = new byte[n * array.length]; - for (int dst = 0; dst < result.length; dst += array.length) { - System.arraycopy(array, 0, result, dst, array.length); - } - return result; - } - - JsonCharacterBlock classify(ByteVector chunk0) { - VectorShuffle chunk0Low = extractLowNibble(chunk0).toShuffle(); - long whitespace = eq(chunk0, WHITESPACE_TABLE.rearrange(chunk0Low)); - ByteVector curlified0 = curlify(chunk0); - long op = eq(curlified0, OP_TABLE.rearrange(chunk0Low)); - return new JsonCharacterBlock(whitespace, op); - } - - JsonCharacterBlock classify(ByteVector chunk0, ByteVector chunk1) { - VectorShuffle chunk0Low = extractLowNibble(chunk0).toShuffle(); - VectorShuffle chunk1Low = extractLowNibble(chunk1).toShuffle(); - long whitespace = eq(chunk0, WHITESPACE_TABLE.rearrange(chunk0Low), chunk1, WHITESPACE_TABLE.rearrange(chunk1Low)); - ByteVector curlified0 = curlify(chunk0); - ByteVector curlified1 = curlify(chunk1); - long op = eq(curlified0, OP_TABLE.rearrange(chunk0Low), curlified1, OP_TABLE.rearrange(chunk1Low)); - return new JsonCharacterBlock(whitespace, op); - } - - private ByteVector extractLowNibble(ByteVector vector) { - return vector.and(LOW_NIBBLE_MASK); - } - - private ByteVector curlify(ByteVector vector) { - // turns [ into { and ] into } - return vector.or((byte) 0x20); - } - - private long eq(ByteVector chunk0, ByteVector mask0) { - return chunk0.eq(mask0).toLong(); - } - - private long eq(ByteVector chunk0, ByteVector mask0, ByteVector chunk1, ByteVector mask1) { - long r0 = chunk0.eq(mask0).toLong(); - long r1 = chunk1.eq(mask1).toLong(); - return r0 | (r1 << 32); - } -} diff --git a/src/main/java/org/simdjson/ClassResolver.java b/src/main/java/org/simdjson/ClassResolver.java new file mode 100644 index 0000000..613aa2f --- /dev/null +++ b/src/main/java/org/simdjson/ClassResolver.java @@ -0,0 +1,24 @@ +package org.simdjson; + +import java.lang.reflect.Type; +import java.util.HashMap; +import java.util.Map; + +class ClassResolver { + + private final Map classCache = new HashMap<>(); + + ResolvedClass resolveClass(Type type) { + ResolvedClass resolvedClass = classCache.get(type); + if (resolvedClass != null) { + return resolvedClass; + } + resolvedClass = new ResolvedClass(type, this); + classCache.put(type, resolvedClass); + return resolvedClass; + } + + void reset() { + classCache.clear(); + } +} diff --git a/src/main/java/org/simdjson/ConstructorArgument.java b/src/main/java/org/simdjson/ConstructorArgument.java new file mode 100644 index 0000000..05a68a9 --- /dev/null +++ b/src/main/java/org/simdjson/ConstructorArgument.java @@ -0,0 +1,4 @@ +package org.simdjson; + +record ConstructorArgument(int idx, ResolvedClass resolvedClass) { +} diff --git a/src/main/java/org/simdjson/ConstructorArgumentsMap.java b/src/main/java/org/simdjson/ConstructorArgumentsMap.java new file mode 100644 index 0000000..d53a544 --- /dev/null +++ b/src/main/java/org/simdjson/ConstructorArgumentsMap.java @@ -0,0 +1,94 @@ +package org.simdjson; + +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.util.Arrays; + +import static java.lang.invoke.MethodHandles.byteArrayViewVarHandle; + +class ConstructorArgumentsMap { + + private static final VarHandle VAR_HANDLE_LONG = byteArrayViewVarHandle(Long.TYPE.arrayType(), ByteOrder.nativeOrder()); + private static final VarHandle VAR_HANDLE_INT = byteArrayViewVarHandle(Integer.TYPE.arrayType(), ByteOrder.nativeOrder()); + // Large prime number. This one is taken from https://vanilla-java.github.io/2018/08/15/Looking-at-randomness-and-performance-for-hash-codes.html + private static final long M2 = 0x7a646e4d; + + private final int argumentCount; + private final int capacity; + private final int moduloMask; + private final byte[][] keys; + private final ConstructorArgument[] arguments; + + ConstructorArgumentsMap(int argumentCount) { + this.argumentCount = argumentCount; + this.capacity = ceilingPowerOfTwo(argumentCount); + this.moduloMask = capacity - 1; + this.arguments = new ConstructorArgument[capacity]; + this.keys = new byte[capacity][]; + } + + private static int ceilingPowerOfTwo(int argumentCount) { + // We don't need to check if argumentCount is greater than 2^30 because, in Java, the limit for method arguments + // is equal to 255 (https://docs.oracle.com/javase/specs/jvms/se21/html/jvms-4.html#jvms-4.3.3). + return 1 << -Integer.numberOfLeadingZeros(argumentCount - 1); + } + + int getArgumentCount() { + return argumentCount; + } + + void put(byte[] fieldName, ConstructorArgument argument) { + int place = findPlace(fieldName, fieldName.length); + + while (keys[place] != null) { + place = (place + 1) & moduloMask; + } + arguments[place] = argument; + keys[place] = fieldName; + } + + ConstructorArgument get(byte[] buffer, int len) { + int place = findPlace(buffer, len); + for (int i = 0; i < capacity; i++) { + byte[] key = keys[place]; + if (key == null) { + return null; + } + if (Arrays.equals(key, 0, key.length, buffer, 0, len)) { + return arguments[place]; + } + place = (place + 1) & moduloMask; + } + return null; + } + + private int findPlace(byte[] buffer, int len) { + int hash = hash(buffer, len); + return hash & moduloMask; + } + + private static int hash(byte[] data, int len) { + long h = 0; + int i = 0; + for (; i + 7 < len; i += 8) { + h = h * M2 + getLongFromArray(data, i); + } + if (i + 3 < len) { + h = h * M2 + getIntFromArray(data, i); + i += 4; + } + for (; i < len; i++) { + h = h * M2 + data[i]; + } + h *= M2; + return (int) (h ^ h >>> 32); + } + + private static int getIntFromArray(byte[] value, int i) { + return (int) VAR_HANDLE_INT.get(value, i); + } + + private static long getLongFromArray(byte[] value, int i) { + return (long) VAR_HANDLE_LONG.get(value, i); + } +} diff --git a/src/main/java/org/simdjson/DoubleParser.java b/src/main/java/org/simdjson/DoubleParser.java new file mode 100644 index 0000000..c5927f9 --- /dev/null +++ b/src/main/java/org/simdjson/DoubleParser.java @@ -0,0 +1,505 @@ +package org.simdjson; + +import static java.lang.Double.NEGATIVE_INFINITY; +import static java.lang.Double.POSITIVE_INFINITY; +import static java.lang.Double.longBitsToDouble; +import static java.lang.Long.compareUnsigned; +import static java.lang.Long.divideUnsigned; +import static java.lang.Long.numberOfLeadingZeros; +import static java.lang.Long.remainderUnsigned; +import static java.lang.Math.abs; +import static java.lang.Math.unsignedMultiplyHigh; +import static org.simdjson.ExponentParser.isExponentIndicator; +import static org.simdjson.NumberParserTables.MIN_POWER_OF_FIVE; +import static org.simdjson.NumberParserTables.NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT; +import static org.simdjson.NumberParserTables.POWERS_OF_FIVE; +import static org.simdjson.NumberParserTables.POWER_OF_FIVE_DIGITS; + +class DoubleParser { + + // When parsing doubles, we assume that a long used to store digits is unsigned. Thus, it can safely accommodate + // up to 19 digits (9999999999999999999 < 2^64). + private static final int FAST_PATH_MAX_DIGIT_COUNT = 19; + // The smallest non-zero number representable in binary64 is 2^-1074, which is about 4.941 * 10^-324. + // If we consider a number in the form of w * 10^q where 1 <= w <= 9999999999999999999, then + // 1 * 10^q <= w * 10^q <= 9.999999999999999999 * 10^18 * 10^q. To ensure w * 10^q < 2^-1074, q must satisfy the + // following inequality: 9.999999999999999999 * 10^(18 + q) < 2^-1074. This condition holds true whenever + // 18 + q < -324. Thus, for q < -342, we can reliably conclude that the number w * 10^q is smaller than 2^-1074, + // and this, in turn means the number is equal to zero. + private static final int FAST_PATH_MIN_POWER_OF_TEN = -342; + // We know that (1 - 2^-53) * 2^1024, which is about 1.798 * 10^308, is the largest number representable in binary64. + // When the parsed number is expressed as w * 10^q, where w >= 1, we are sure that for any q > 308, the number is + // infinite. + private static final int FAST_PATH_MAX_POWER_OF_TEN = 308; + private static final double[] POWERS_OF_TEN = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 + }; + private static final long MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY = (1L << 53) - 1; + private static final int IEEE64_EXPONENT_BIAS = 1023; + private static final int IEEE64_SIGN_BIT_INDEX = 63; + private static final int IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT = 52; + private static final int IEEE64_SIGNIFICAND_SIZE_IN_BITS = IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT + 1; + private static final int IEEE64_MAX_FINITE_NUMBER_EXPONENT = 1023; + private static final int IEEE64_MIN_FINITE_NUMBER_EXPONENT = -1022; + private static final int IEEE64_SUBNORMAL_EXPONENT = -1023; + // This is the upper limit for the count of decimal digits taken into account in the slow path. All digits exceeding + // this threshold are excluded. + private static final int SLOW_PATH_MAX_DIGIT_COUNT = 800; + private static final int SLOW_PATH_MAX_SHIFT = 60; + private static final byte[] SLOW_PATH_SHIFTS = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, + 33, 36, 39, 43, 46, 49, 53, 56, 59, + }; + private static final long MULTIPLICATION_MASK = 0xFFFFFFFFFFFFFFFFL >>> IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT + 3; + + private final SlowPathDecimal slowPathDecimal = new SlowPathDecimal(); + private final ExponentParser exponentParser = new ExponentParser(); + + double parse(byte[] buffer, int offset, boolean negative, int digitsStartIdx, int digitCount, long digits, long exponent) { + if (shouldBeHandledBySlowPath(buffer, digitsStartIdx, digitCount)) { + return slowlyParseDouble(buffer, offset); + } else { + return computeDouble(negative, digits, exponent); + } + } + + private static boolean shouldBeHandledBySlowPath(byte[] buffer, int startDigitsIdx, int digitCount) { + if (digitCount <= FAST_PATH_MAX_DIGIT_COUNT) { + return false; + } + int start = startDigitsIdx; + while (buffer[start] == '0' || buffer[start] == '.') { + start++; + } + int significantDigitCount = digitCount - (start - startDigitsIdx); + return significantDigitCount > FAST_PATH_MAX_DIGIT_COUNT; + } + + private static double computeDouble(boolean negative, long significand10, long exp10) { + if (abs(exp10) < POWERS_OF_TEN.length && compareUnsigned(significand10, MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY) <= 0) { + // This path has been described in https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/. + double result = significand10; + if (exp10 < 0) { + result = result / POWERS_OF_TEN[(int) -exp10]; + } else { + result = result * POWERS_OF_TEN[(int) exp10]; + } + return negative ? -result : result; + } + + // The following path is an implementation of the Eisel-Lemire algorithm described by Daniel Lemire in + // "Number Parsing at a Gigabyte per Second" (https://arxiv.org/abs/2101.11408). + + if (exp10 < FAST_PATH_MIN_POWER_OF_TEN || significand10 == 0) { + return zero(negative); + } else if (exp10 > FAST_PATH_MAX_POWER_OF_TEN) { + return infinity(negative); + } + + // We start by normalizing the decimal significand so that it is within the range of [2^63, 2^64). + int lz = numberOfLeadingZeros(significand10); + significand10 <<= lz; + + // Initially, the number we are parsing is in the form of w * 10^q = w * 5^q * 2^q, and our objective is to + // convert it to m * 2^p. We can represent w * 10^q as w * 5^q * 2^r * 2^p, where w * 5^q * 2^r = m. + // Therefore, in the next step we compute w * 5^q. The implementation of this multiplication is optimized + // to minimize necessary operations while ensuring precise results. For more information, refer to the + // aforementioned paper. + int powersOfFiveTableIndex = 2 * (int) (exp10 - MIN_POWER_OF_FIVE); + long upper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex]); + long lower = significand10 * POWERS_OF_FIVE[powersOfFiveTableIndex]; + if ((upper & MULTIPLICATION_MASK) == MULTIPLICATION_MASK) { + long secondUpper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex + 1]); + lower += secondUpper; + if (compareUnsigned(secondUpper, lower) > 0) { + upper++; + } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without Fallback" + // (https://arxiv.org/abs/2212.06644), at this point we are sure that the product is sufficiently accurate, + // and more computation is not needed. + } + + // Here, we extract the binary significand from the product. Although in binary64 the significand has 53 bits, + // we extract 54 bits to use the least significant bit for rounding. Since both the decimal significand and the + // values stored in POWERS_OF_FIVE are normalized, ensuring that their most significant bits are set, the product + // has either 0 or 1 leading zeros. As a result, we need to perform a right shift of either 9 or 10 bits. + long upperBit = upper >>> 63; + long upperShift = upperBit + 9; + long significand2 = upper >>> upperShift; + + // Now, we have to determine the value of the binary exponent. Let's begin by calculating the contribution of + // 10^q. Our goal is to compute f0 and f1 such that: + // - when q >= 0: 10^q = (5^q / 2^(f0 - q)) * 2^f0 + // - when q < 0: 10^q = (2^(f1 - q) / 5^-q) * 2^f1 + // Both (5^q / 2^(f0 - q)) and (2^(f1 - q) / 5^-q) must fall within the range of [1, 2). + // It turns out that these conditions are met when: + // - 0 <= q <= FAST_PATH_MAX_POWER_OF_TEN, and f0 = floor(log2(5^q)) + q = floor(q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // - FAST_PATH_MIN_POWER_OF_TEN <= q < 0, and f1 = -ceil(log2(5^-q)) + q = -ceil(-q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // Thus, we can express the contribution of 10^q to the exponent as (217706 * exp10) >> 16. + // + // Furthermore, we need to factor in the following normalizations we've performed: + // - shifting the decimal significand left bitwise + // - shifting the binary significand right bitwise if the most significant bit of the product was 1 + // Therefore, we add (63 - lz + upperBit) to the exponent. + long exp2 = ((217706 * exp10) >> 16) + 63 - lz + upperBit; + if (exp2 < IEEE64_MIN_FINITE_NUMBER_EXPONENT) { + // In the next step, we right-shift the binary significand by the difference between the minimum exponent + // and the binary exponent. In Java, the shift distance is limited to the range of 0 to 63, inclusive. + // Thus, we need to handle the case when the distance is >= 64 separately and always return zero. + if (exp2 <= IEEE64_MIN_FINITE_NUMBER_EXPONENT - 64) { + return zero(negative); + } + + // In this branch, it is likely that we are handling a subnormal number. Therefore, we adjust the significand + // to conform to the formula representing subnormal numbers: (significand2 * 2^(1 - IEEE64_EXPONENT_BIAS)) / 2^52. + significand2 >>= 1 - IEEE64_EXPONENT_BIAS - exp2; + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + significand2 += significand2 & 1; + significand2 >>= 1; + + // Here, we are addressing a scenario in which the original number was subnormal, but it became normal after + // rounding up. For example, when we are parsing 2.2250738585072013e-308 before rounding and removing the + // least significant bit significand2 = 0x3fffffffffffff and exp2 = -1023. After rounding, we get + // significand2 = 0x10000000000000, which is the significand of the smallest normal number. + exp2 = (significand2 < (1L << 52)) ? IEEE64_SUBNORMAL_EXPONENT : IEEE64_MIN_FINITE_NUMBER_EXPONENT; + return toDouble(negative, significand2, exp2); + } + + // Here, we are addressing a scenario of rounding the binary significand when it falls precisely halfway + // between two integers. To understand the rationale behind the conditions used to identify this case, refer to + // sections 6, 8.1, and 9.1 of "Number Parsing at a Gigabyte per Second". + if (exp10 >= -4 && exp10 <= 23) { + if ((significand2 << upperShift == upper) && (compareUnsigned(lower, 1) <= 0)) { + if ((significand2 & 3) == 1) { + significand2 &= ~1; + } + } + } + + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + significand2 += significand2 & 1; + significand2 >>= 1; + + if (significand2 == (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(negative); + } + return toDouble(negative, significand2, exp2); + } + + private static double toDouble(boolean negative, long significand2, long exp2) { + long bits = significand2; + bits &= ~(1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT); // clear the implicit bit + bits |= (exp2 + IEEE64_EXPONENT_BIAS) << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT; + bits = negative ? (bits | (1L << IEEE64_SIGN_BIT_INDEX)) : bits; + return longBitsToDouble(bits); + } + + private static double infinity(boolean negative) { + return negative ? NEGATIVE_INFINITY : POSITIVE_INFINITY; + } + + private static double zero(boolean negative) { + return negative ? -0.0 : 0.0; + } + + // The following parser is based on the idea described in + // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html and implemented in + // https://github.com/simdjson/simdjson/blob/caff09cafceb0f5f6fc9109236d6dd09ac4bc0d8/src/from_chars.cpp + private double slowlyParseDouble(byte[] buffer, int offset) { + final SlowPathDecimal decimal = slowPathDecimal; + decimal.reset(); + + decimal.negative = buffer[offset] == '-'; + int currentIdx = decimal.negative ? offset + 1 : offset; + long exp10 = 0; + + currentIdx = skipZeros(buffer, currentIdx); + currentIdx = parseDigits(buffer, decimal, currentIdx); + if (buffer[currentIdx] == '.') { + currentIdx++; + int firstIdxAfterPeriod = currentIdx; + if (decimal.digitCount == 0) { + currentIdx = skipZeros(buffer, currentIdx); + } + currentIdx = parseDigits(buffer, decimal, currentIdx); + exp10 = firstIdxAfterPeriod - currentIdx; + } + + int currentIdxMovingBackwards = currentIdx - 1; + int trailingZeros = 0; + // Here, we also skip the period to handle cases like 100000000000000000000.000000 + while (buffer[currentIdxMovingBackwards] == '0' || buffer[currentIdxMovingBackwards] == '.') { + if (buffer[currentIdxMovingBackwards] == '0') { + trailingZeros++; + } + currentIdxMovingBackwards--; + } + exp10 += decimal.digitCount; + decimal.digitCount -= trailingZeros; + + if (decimal.digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + decimal.truncated = true; + } + + if (isExponentIndicator(buffer[currentIdx])) { + currentIdx++; + exp10 = exponentParser.parse(buffer, currentIdx, exp10).exponent(); + } + + // At this point, the number we are parsing is represented in the following way: w * 10^exp10, where -1 < w < 1. + if (exp10 <= -324) { + // We know that -1e-324 < w * 10^exp10 < 1e-324. In binary64 -1e-324 = -0.0 and 1e-324 = +0.0, so we can + // safely return +/-0.0. + return zero(decimal.negative); + } else if (exp10 >= 310) { + // We know that either w * 10^exp10 <= -0.1e310 or w * 10^exp10 >= 0.1e310. + // In binary64 -0.1e310 = -inf and 0.1e310 = +inf, so we can safely return +/-inf. + return infinity(decimal.negative); + } + + decimal.exp10 = (int) exp10; + int exp2 = 0; + + // We start the following loop with the decimal in the form of w * 10^exp10. After a series of + // right-shifts (dividing by a power of 2), we transform the decimal into w' * 2^exp2 * 10^exp10', + // where exp10' is <= 0. Resultantly, w' * 10^exp10' is in the range of [0, 1). + while (decimal.exp10 > 0) { + int shift = resolveShiftDistanceBasedOnExponent10(decimal.exp10); + decimal.shiftRight(shift); + exp2 += shift; + } + + // Now, we are left-shifting to get to the point where w'' * 10^exp10'' is within the range of [1/2, 1). + while (decimal.exp10 <= 0) { + int shift; + if (decimal.exp10 == 0) { + if (decimal.digits[0] >= 5) { + break; + } + shift = (decimal.digits[0] < 2) ? 2 : 1; + } else { + shift = resolveShiftDistanceBasedOnExponent10(-decimal.exp10); + } + decimal.shiftLeft(shift); + exp2 -= shift; + } + + // Here, w'' * 10^exp10'' falls within the range of [1/2, 1). In binary64, the significand must be within the + // range of [1, 2). We can get to the target range by decreasing the binary exponent. Resultantly, the decimal + // is represented as w'' * 10^exp10'' * 2^exp2, where w'' * 10^exp10'' is in the range of [1, 2). + exp2--; + + while (IEEE64_MIN_FINITE_NUMBER_EXPONENT > exp2) { + int n = IEEE64_MIN_FINITE_NUMBER_EXPONENT - exp2; + if (n > SLOW_PATH_MAX_SHIFT) { + n = SLOW_PATH_MAX_SHIFT; + } + decimal.shiftRight(n); + exp2 += n; + } + + // To conform to the IEEE 754 standard, the binary significand must fall within the range of [2^52, 2^53). Hence, + // we perform the following multiplication. If, after this step, the significand is less than 2^52, we have a + // subnormal number, which we will address later. + decimal.shiftLeft(IEEE64_SIGNIFICAND_SIZE_IN_BITS); + + long significand2 = decimal.computeSignificand(); + if (significand2 >= (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (significand2 < (1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT)) { + exp2 = IEEE64_SUBNORMAL_EXPONENT; + } + if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(decimal.negative); + } + return toDouble(decimal.negative, significand2, exp2); + } + + private static int resolveShiftDistanceBasedOnExponent10(int exp10) { + return (exp10 < SLOW_PATH_SHIFTS.length) ? SLOW_PATH_SHIFTS[exp10] : SLOW_PATH_MAX_SHIFT; + } + + private int skipZeros(byte[] buffer, int currentIdx) { + while (buffer[currentIdx] == '0') { + currentIdx++; + } + return currentIdx; + } + + private int parseDigits(byte[] buffer, SlowPathDecimal decimal, int currentIdx) { + while (isDigit(buffer[currentIdx])) { + if (decimal.digitCount < SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digits[decimal.digitCount] = convertCharacterToDigit(buffer[currentIdx]); + } + decimal.digitCount++; + currentIdx++; + } + return currentIdx; + } + + private static byte convertCharacterToDigit(byte b) { + return (byte) (b - '0'); + } + + private static boolean isDigit(byte b) { + return b >= '0' && b <= '9'; + } + + private static class SlowPathDecimal { + + final byte[] digits = new byte[SLOW_PATH_MAX_DIGIT_COUNT]; + int digitCount; + int exp10; + boolean truncated; + boolean negative; + + // Before calling this method we have to make sure that the significand is within the range of [0, 2^53 - 1]. + long computeSignificand() { + if (digitCount == 0 || exp10 < 0) { + return 0; + } + long significand = 0; + for (int i = 0; i < exp10; i++) { + significand = (10 * significand) + ((i < digitCount) ? digits[i] : 0); + } + boolean roundUp = false; + if (exp10 < digitCount) { + roundUp = digits[exp10] >= 5; + if ((digits[exp10] == 5) && (exp10 + 1 == digitCount)) { + // If the digits haven't been truncated, then we are exactly halfway between two integers. In such + // cases, we round to even, otherwise we round up. + roundUp = truncated || (significand & 1) == 1; + } + } + return roundUp ? ++significand : significand; + } + + void shiftLeft(int shift) { + if (digitCount == 0) { + return; + } + + int numberOfAdditionalDigits = calculateNumberOfAdditionalDigitsAfterLeftShift(shift); + int readIndex = digitCount - 1; + int writeIndex = digitCount - 1 + numberOfAdditionalDigits; + long n = 0; + + while (readIndex >= 0) { + n += (long) digits[readIndex] << shift; + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + readIndex--; + } + + while (compareUnsigned(n, 0) > 0) { + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + } + digitCount += numberOfAdditionalDigits; + if (digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + } + exp10 += numberOfAdditionalDigits; + trimTrailingZeros(); + } + + // See https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html#hpd-shifts + private int calculateNumberOfAdditionalDigitsAfterLeftShift(int shift) { + int a = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift]; + int b = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift + 1]; + int newDigitCount = a >> 11; + int pow5OffsetA = 0x7FF & a; + int pow5OffsetB = 0x7FF & b; + + int n = pow5OffsetB - pow5OffsetA; + for (int i = 0; i < n; i++) { + if (i >= digitCount) { + return newDigitCount - 1; + } else if (digits[i] < POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount - 1; + } else if (digits[i] > POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount; + } + } + return newDigitCount; + } + + void shiftRight(int shift) { + int readIndex = 0; + int writeIndex = 0; + long n = 0; + + while ((n >>> shift) == 0) { + if (readIndex < digitCount) { + n = (10 * n) + digits[readIndex++]; + } else if (n == 0) { + return; + } else { + while ((n >>> shift) == 0) { + n = 10 * n; + readIndex++; + } + break; + } + } + exp10 -= (readIndex - 1); + long mask = (1L << shift) - 1; + while (readIndex < digitCount) { + byte newDigit = (byte) (n >>> shift); + n = (10 * (n & mask)) + digits[readIndex++]; + digits[writeIndex++] = newDigit; + } + while (compareUnsigned(n, 0) > 0) { + byte newDigit = (byte) (n >>> shift); + n = 10 * (n & mask); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex++] = newDigit; + } else if (newDigit > 0) { + truncated = true; + } + } + digitCount = writeIndex; + trimTrailingZeros(); + } + + private void trimTrailingZeros() { + while ((digitCount > 0) && (digits[digitCount - 1] == 0)) { + digitCount--; + } + } + + private void reset() { + digitCount = 0; + exp10 = 0; + truncated = false; + } + } +} diff --git a/src/main/java/org/simdjson/ExponentParser.java b/src/main/java/org/simdjson/ExponentParser.java new file mode 100644 index 0000000..be07a37 --- /dev/null +++ b/src/main/java/org/simdjson/ExponentParser.java @@ -0,0 +1,94 @@ +package org.simdjson; + +class ExponentParser { + + private final ExponentParsingResult result = new ExponentParsingResult(); + + static boolean isExponentIndicator(byte b) { + return 'e' == b || 'E' == b; + } + + ExponentParsingResult parse(byte[] buffer, int currentIdx, long exponent) { + boolean negative = '-' == buffer[currentIdx]; + if (negative || '+' == buffer[currentIdx]) { + currentIdx++; + } + int exponentStartIdx = currentIdx; + + long parsedExponent = 0; + byte digit = convertCharacterToDigit(buffer[currentIdx]); + while (digit >= 0 && digit <= 9) { + parsedExponent = 10 * parsedExponent + digit; + currentIdx++; + digit = convertCharacterToDigit(buffer[currentIdx]); + } + + if (exponentStartIdx == currentIdx) { + throw new JsonParsingException("Invalid number. Exponent indicator has to be followed by a digit."); + } + // Long.MAX_VALUE = 9223372036854775807 (19 digits). Therefore, any number with <= 18 digits can be safely + // stored in a long without causing an overflow. + int maxDigitCountLongCanAccommodate = 18; + if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { + // Potentially, we have an overflow here. We try to skip leading zeros. + while (buffer[exponentStartIdx] == '0') { + exponentStartIdx++; + } + if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { + // We still have more digits than a long can safely accommodate. + // + // The largest finite number that can be represented in binary64 is (1-2^-53) * 2^1024, which is about + // 1.798e308, and the smallest non-zero number is 2^-1074, roughly 4.941e-324. So, we might, potentially, + // care only about numbers with explicit exponents falling within the range of [-324, 308], and return + // either zero or infinity for everything outside of this range.However, we have to take into account + // the fractional part of the parsed number. This part can potentially cancel out the value of the + // explicit exponent. For example, 1000e-325 (1 * 10^3 * 10^-325 = 1 * 10^-322) is not equal to zero + // despite the explicit exponent being less than -324. + // + // Let's consider a scenario where the explicit exponent is greater than 999999999999999999. As long as + // the fractional part has <= 999999999999999690 digits, it doesn't matter whether we take + // 999999999999999999 or its actual value as the explicit exponent. This is due to the fact that the + // parsed number is infinite anyway (w * 10^-q * 10^999999999999999999 > (1-2^-53) * 2^1024, 0 < w < 10, + // 0 <= q <= 999999999999999690). Similarly, in a scenario where the explicit exponent is less than + // -999999999999999999, as long as the fractional part has <= 999999999999999674 digits, we can safely + // take 999999999999999999 as the explicit exponent, given that the parsed number is zero anyway + // (w * 10^q * 10^-999999999999999999 < 2^-1074, 0 < w < 10, 0 <= q <= 999999999999999674) + // + // Note that if the fractional part had 999999999999999674 digits, the JSON size would need to be + // 999999999999999674 bytes, which is approximately ~888 PiB. Consequently, it's reasonable to assume + // that the fractional part contains no more than 999999999999999674 digits. + parsedExponent = 999999999999999999L; + } + } + // Note that we don't check if 'exponent' has overflowed after the following addition. This is because we + // know that the parsed exponent falls within the range of [-999999999999999999, 999999999999999999]. We also + // assume that 'exponent' before the addition is within the range of [-9223372036854775808, 9223372036854775807]. + // This assumption should always be valid as the value of 'exponent' is constrained by the size of the JSON input. + exponent += negative ? -parsedExponent : parsedExponent; + return result.of(exponent, currentIdx); + } + + private static byte convertCharacterToDigit(byte b) { + return (byte) (b - '0'); + } + + static class ExponentParsingResult { + + private long exponent; + private int currentIdx; + + ExponentParsingResult of(long exponent, int currentIdx) { + this.exponent = exponent; + this.currentIdx = currentIdx; + return this; + } + + long exponent() { + return exponent; + } + + int currentIdx() { + return currentIdx; + } + } +} diff --git a/src/main/java/org/simdjson/FloatParser.java b/src/main/java/org/simdjson/FloatParser.java new file mode 100644 index 0000000..0d3ebfa --- /dev/null +++ b/src/main/java/org/simdjson/FloatParser.java @@ -0,0 +1,504 @@ +package org.simdjson; + +import static java.lang.Float.NEGATIVE_INFINITY; +import static java.lang.Float.POSITIVE_INFINITY; +import static java.lang.Long.compareUnsigned; +import static java.lang.Long.divideUnsigned; +import static java.lang.Long.numberOfLeadingZeros; +import static java.lang.Long.remainderUnsigned; +import static java.lang.Math.abs; +import static java.lang.Math.unsignedMultiplyHigh; +import static org.simdjson.ExponentParser.isExponentIndicator; +import static org.simdjson.NumberParserTables.MIN_POWER_OF_FIVE; +import static org.simdjson.NumberParserTables.NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT; +import static org.simdjson.NumberParserTables.POWERS_OF_FIVE; +import static org.simdjson.NumberParserTables.POWER_OF_FIVE_DIGITS; + +class FloatParser { + + // When parsing floats, we assume that a long used to store digits is unsigned. Thus, it can safely accommodate + // up to 19 digits (9999999999999999999 < 2^64). + private static final int FAST_PATH_MAX_DIGIT_COUNT = 19; + // The smallest non-zero number representable in binary32 is 2^-149, which is about 1.4 * 10^-45. + // If we consider a number in the form of w * 10^q where 1 <= w <= 9999999999999999999, then + // 1 * 10^q <= w * 10^q <= 9.999999999999999999 * 10^18 * 10^q. To ensure w * 10^q < 2^-149, q must satisfy the + // following inequality: 9.999999999999999999 * 10^(18 + q) < 2^-149. This condition holds true whenever + // 18 + q < -45. Thus, for q < -63, we can reliably conclude that the number w * 10^q is smaller than 2^-149, + // and this, in turn means the number is equal to zero. + private static final int FAST_PATH_MIN_POWER_OF_TEN = -63; // todo: https://github.com/fastfloat/fast_float/pull/167 + // We know that (1 - 2^-24) * 2^128, which is about 3.4 * 10^38, is the largest number representable in binary64. + // When the parsed number is expressed as w * 10^q, where w >= 1, we are sure that for any q > 38, the number is + // infinite. + private static final int FAST_PATH_MAX_POWER_OF_TEN = 38; + private static final float[] POWERS_OF_TEN = { + 1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f, 1e8f, 1e9f, 1e10f + }; + private static final long MAX_LONG_REPRESENTED_AS_FLOAT_EXACTLY = (1L << 24) - 1; + private static final int IEEE32_EXPONENT_BIAS = 127; + private static final int IEEE32_SIGN_BIT_INDEX = 31; + private static final int IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT = 23; + private static final int IEEE32_SIGNIFICAND_SIZE_IN_BITS = IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT + 1; + private static final int IEEE32_MAX_FINITE_NUMBER_EXPONENT = 127; + private static final int IEEE32_MIN_FINITE_NUMBER_EXPONENT = -126; + private static final int IEEE32_SUBNORMAL_EXPONENT = -127; + // This is the upper limit for the count of decimal digits taken into account in the slow path. All digits exceeding + // this threshold are excluded. + private static final int SLOW_PATH_MAX_DIGIT_COUNT = 800; + private static final int SLOW_PATH_MAX_SHIFT = 60; + private static final byte[] SLOW_PATH_SHIFTS = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, + 33, 36, 39, 43, 46, 49, 53, 56, 59, + }; + private static final long MULTIPLICATION_MASK = 0xFFFFFFFFFFFFFFFFL >>> IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT + 3; + + private final SlowPathDecimal slowPathDecimal = new SlowPathDecimal(); + private final ExponentParser exponentParser = new ExponentParser(); + + float parse(byte[] buffer, int offset, boolean negative, int digitsStartIdx, int digitCount, long digits, long exponent) { + if (shouldBeHandledBySlowPath(buffer, digitsStartIdx, digitCount)) { + return slowlyParseFloat(buffer, offset); + } else { + return computeFloat(negative, digits, exponent); + } + } + + private static boolean shouldBeHandledBySlowPath(byte[] buffer, int startDigitsIdx, int digitCount) { + if (digitCount <= FAST_PATH_MAX_DIGIT_COUNT) { + return false; + } + int start = startDigitsIdx; + while (buffer[start] == '0' || buffer[start] == '.') { + start++; + } + int significantDigitCount = digitCount - (start - startDigitsIdx); + return significantDigitCount > FAST_PATH_MAX_DIGIT_COUNT; + } + + private static float computeFloat(boolean negative, long significand10, long exp10) { + if (abs(exp10) < POWERS_OF_TEN.length && compareUnsigned(significand10, MAX_LONG_REPRESENTED_AS_FLOAT_EXACTLY) <= 0) { + // This path has been described in https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/. + float result = significand10; + if (exp10 < 0) { + result = result / POWERS_OF_TEN[(int) -exp10]; + } else { + result = result * POWERS_OF_TEN[(int) exp10]; + } + return negative ? -result : result; + } + + // The following path is an implementation of the Eisel-Lemire algorithm described by Daniel Lemire in + // "Number Parsing at a Gigabyte per Second" (https://arxiv.org/abs/2101.11408). + + if (exp10 < FAST_PATH_MIN_POWER_OF_TEN || significand10 == 0) { + return zero(negative); + } else if (exp10 > FAST_PATH_MAX_POWER_OF_TEN) { + return infinity(negative); + } + + // We start by normalizing the decimal significand so that it is within the range of [2^63, 2^64). + int lz = numberOfLeadingZeros(significand10); + significand10 <<= lz; + + // Initially, the number we are parsing is in the form of w * 10^q = w * 5^q * 2^q, and our objective is to + // convert it to m * 2^p. We can represent w * 10^q as w * 5^q * 2^r * 2^p, where w * 5^q * 2^r = m. + // Therefore, in the next step we compute w * 5^q. The implementation of this multiplication is optimized + // to minimize necessary operations while ensuring precise results. For more information, refer to the + // aforementioned paper. + int powersOfFiveTableIndex = 2 * (int) (exp10 - MIN_POWER_OF_FIVE); + long upper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex]); + long lower = significand10 * POWERS_OF_FIVE[powersOfFiveTableIndex]; + if ((upper & MULTIPLICATION_MASK) == MULTIPLICATION_MASK) { + long secondUpper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex + 1]); + lower += secondUpper; + if (compareUnsigned(secondUpper, lower) > 0) { + upper++; + } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without Fallback" + // (https://arxiv.org/abs/2212.06644), at this point we are sure that the product is sufficiently accurate, + // and more computation is not needed. + } + + // Here, we extract the binary significand from the product. Although in binary32 the significand has 24 bits, + // we extract 25 bits to use the least significant bit for rounding. Since both the decimal significand and the + // values stored in POWERS_OF_FIVE are normalized, ensuring that their most significant bits are set, the product + // has either 0 or 1 leading zeros. As a result, we need to perform a right shift of either 38 or 39 bits. + long upperBit = upper >>> 63; + long upperShift = upperBit + 38; + long significand2 = upper >>> upperShift; + + // Now, we have to determine the value of the binary exponent. Let's begin by calculating the contribution of + // 10^q. Our goal is to compute f0 and f1 such that: + // - when q >= 0: 10^q = (5^q / 2^(f0 - q)) * 2^f0 + // - when q < 0: 10^q = (2^(f1 - q) / 5^-q) * 2^f1 + // Both (5^q / 2^(f0 - q)) and (2^(f1 - q) / 5^-q) must fall within the range of [1, 2). + // It turns out that these conditions are met when: + // - 0 <= q <= FAST_PATH_MAX_POWER_OF_TEN, and f0 = floor(log2(5^q)) + q = floor(q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // - FAST_PATH_MIN_POWER_OF_TEN <= q < 0, and f1 = -ceil(log2(5^-q)) + q = -ceil(-q * log(5) / log(2)) + q = (217706 * q) / 2^16. + // Thus, we can express the contribution of 10^q to the exponent as (217706 * exp10) >> 16. + // + // Furthermore, we need to factor in the following normalizations we've performed: + // - shifting the decimal significand left bitwise + // - shifting the binary significand right bitwise if the most significant bit of the product was 1 + // Therefore, we add (63 - lz + upperBit) to the exponent. + long exp2 = ((217706 * exp10) >> 16) + 63 - lz + upperBit; + if (exp2 < IEEE32_MIN_FINITE_NUMBER_EXPONENT) { + // In the next step, we right-shift the binary significand by the difference between the minimum exponent + // and the binary exponent. In Java, the shift distance is limited to the range of 0 to 63, inclusive. + // Thus, we need to handle the case when the distance is >= 64 separately and always return zero. + if (exp2 <= IEEE32_MIN_FINITE_NUMBER_EXPONENT - 64) { + return zero(negative); + } + + // In this branch, it is likely that we are handling a subnormal number. Therefore, we adjust the significand + // to conform to the formula representing subnormal numbers: (significand2 * 2^(1 - IEEE32_EXPONENT_BIAS)) / 2^23. + significand2 >>= 1 - IEEE32_EXPONENT_BIAS - exp2; + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + + significand2 += significand2 & 1; + significand2 >>= 1; + + // Here, we are addressing a scenario in which the original number was subnormal, but it became normal after + // rounding up. For example, when we are parsing 1.17549433e-38 before rounding and removing the least + // significant bit significand2 = 0x1FFFFFF and exp2 = -127. After rounding, we get significand2 = 0x800000, + // which is the significand of the smallest normal number. + exp2 = (significand2 < (1L << 23)) ? IEEE32_SUBNORMAL_EXPONENT : IEEE32_MIN_FINITE_NUMBER_EXPONENT; + return toFloat(negative, (int) significand2, (int) exp2); + } + + // Here, we are addressing a scenario of rounding the binary significand when it falls precisely halfway + // between two integers. To understand the rationale behind the conditions used to identify this case, refer to + // sections 6, 8.1, and 9.1 of "Number Parsing at a Gigabyte per Second". + if (exp10 >= -17 && exp10 <= 10) { + if ((significand2 << upperShift == upper) && (compareUnsigned(lower, 1) <= 0)) { + if ((significand2 & 3) == 1) { + significand2 &= ~1; + } + } + } + + // Round up if the significand is odd and remove the least significant bit that we've left for rounding. + significand2 += significand2 & 1; + significand2 >>= 1; + + if (significand2 == (1L << IEEE32_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (exp2 > IEEE32_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(negative); + } + return toFloat(negative, (int) significand2, (int) exp2); + } + + private static float toFloat(boolean negative, int significand2, int exp2) { + int bits = significand2; + bits &= ~(1 << IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT); // clear the implicit bit + bits |= (exp2 + IEEE32_EXPONENT_BIAS) << IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT; + bits = negative ? (bits | (1 << IEEE32_SIGN_BIT_INDEX)) : bits; + return Float.intBitsToFloat(bits); + } + + private static float infinity(boolean negative) { + return negative ? NEGATIVE_INFINITY : POSITIVE_INFINITY; + } + + private static float zero(boolean negative) { + return negative ? -0.0f : 0.0f; + } + + // The following parser is based on the idea described in + // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html and implemented in + // https://github.com/simdjson/simdjson/blob/caff09cafceb0f5f6fc9109236d6dd09ac4bc0d8/src/from_chars.cpp + private float slowlyParseFloat(byte[] buffer, int offset) { + final SlowPathDecimal decimal = slowPathDecimal; + decimal.reset(); + + decimal.negative = buffer[offset] == '-'; + int currentIdx = decimal.negative ? offset + 1 : offset; + long exp10 = 0; + + currentIdx = skipZeros(buffer, currentIdx); + currentIdx = parseDigits(buffer, decimal, currentIdx); + if (buffer[currentIdx] == '.') { + currentIdx++; + int firstIdxAfterPeriod = currentIdx; + if (decimal.digitCount == 0) { + currentIdx = skipZeros(buffer, currentIdx); + } + currentIdx = parseDigits(buffer, decimal, currentIdx); + exp10 = firstIdxAfterPeriod - currentIdx; + } + + int currentIdxMovingBackwards = currentIdx - 1; + int trailingZeros = 0; + // Here, we also skip the period to handle cases like 100000000000000000000.000000 + while (buffer[currentIdxMovingBackwards] == '0' || buffer[currentIdxMovingBackwards] == '.') { + if (buffer[currentIdxMovingBackwards] == '0') { + trailingZeros++; + } + currentIdxMovingBackwards--; + } + exp10 += decimal.digitCount; + decimal.digitCount -= trailingZeros; + + if (decimal.digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + decimal.truncated = true; + } + + if (isExponentIndicator(buffer[currentIdx])) { + currentIdx++; + exp10 = exponentParser.parse(buffer, currentIdx, exp10).exponent(); + } + + // At this point, the number we are parsing is represented in the following way: w * 10^exp10, where -1 < w < 1. + if (exp10 <= -46) { + // We know that -1e-46 < w * 10^exp10 < 1e-46. In binary32 -1e-46 = -0.0 and 1e-46 = +0.0, so we can + // safely return +/-0.0. + return zero(decimal.negative); + } else if (exp10 >= 40) { + // We know that either w * 10^exp10 <= -0.1e40 or w * 10^exp10 >= 0.1e40. + // In binary32 -0.1e40 = -inf and 0.1e40 = +inf, so we can safely return +/-inf. + return infinity(decimal.negative); + } + + decimal.exp10 = (int) exp10; + int exp2 = 0; + + // We start the following loop with the decimal in the form of w * 10^exp10. After a series of + // right-shifts (dividing by a power of 2), we transform the decimal into w' * 2^exp2 * 10^exp10', + // where exp10' is <= 0. Resultantly, w' * 10^exp10' is in the range of [0, 1). + while (decimal.exp10 > 0) { + int shift = resolveShiftDistanceBasedOnExponent10(decimal.exp10); + decimal.shiftRight(shift); + exp2 += shift; + } + + // Now, we are left-shifting to get to the point where w'' * 10^exp10'' is within the range of [1/2, 1). + while (decimal.exp10 <= 0) { + int shift; + if (decimal.exp10 == 0) { + if (decimal.digits[0] >= 5) { + break; + } + shift = (decimal.digits[0] < 2) ? 2 : 1; + } else { + shift = resolveShiftDistanceBasedOnExponent10(-decimal.exp10); + } + decimal.shiftLeft(shift); + exp2 -= shift; + } + + // Here, w'' * 10^exp10'' falls within the range of [1/2, 1). In binary32, the significand must be within the + // range of [1, 2). We can get to the target range by decreasing the binary exponent. Resultantly, the decimal + // is represented as w'' * 10^exp10'' * 2^exp2, where w'' * 10^exp10'' is in the range of [1, 2). + exp2--; + + while (IEEE32_MIN_FINITE_NUMBER_EXPONENT > exp2) { + int n = IEEE32_MIN_FINITE_NUMBER_EXPONENT - exp2; + if (n > SLOW_PATH_MAX_SHIFT) { + n = SLOW_PATH_MAX_SHIFT; + } + decimal.shiftRight(n); + exp2 += n; + } + + // To conform to the IEEE 754 standard, the binary significand must fall within the range of [2^23, 2^24). Hence, + // we perform the following multiplication. If, after this step, the significand is less than 2^23, we have a + // subnormal number, which we will address later. + decimal.shiftLeft(IEEE32_SIGNIFICAND_SIZE_IN_BITS); + + long significand2 = decimal.computeSignificand(); + if (significand2 >= (1L << IEEE32_SIGNIFICAND_SIZE_IN_BITS)) { + // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand + // by 2 and update the exponent accordingly. + significand2 >>= 1; + exp2++; + } + + if (significand2 < (1L << IEEE32_SIGNIFICAND_EXPLICIT_BIT_COUNT)) { + exp2 = IEEE32_SUBNORMAL_EXPONENT; + } + if (exp2 > IEEE32_MAX_FINITE_NUMBER_EXPONENT) { + return infinity(decimal.negative); + } + return toFloat(decimal.negative, (int) significand2, exp2); + } + + private static int resolveShiftDistanceBasedOnExponent10(int exp10) { + return (exp10 < SLOW_PATH_SHIFTS.length) ? SLOW_PATH_SHIFTS[exp10] : SLOW_PATH_MAX_SHIFT; + } + + private int skipZeros(byte[] buffer, int currentIdx) { + while (buffer[currentIdx] == '0') { + currentIdx++; + } + return currentIdx; + } + + private int parseDigits(byte[] buffer, SlowPathDecimal decimal, int currentIdx) { + while (isDigit(buffer[currentIdx])) { + if (decimal.digitCount < SLOW_PATH_MAX_DIGIT_COUNT) { + decimal.digits[decimal.digitCount] = convertCharacterToDigit(buffer[currentIdx]); + } + decimal.digitCount++; + currentIdx++; + } + return currentIdx; + } + + private static byte convertCharacterToDigit(byte b) { + return (byte) (b - '0'); + } + + private static boolean isDigit(byte b) { + return b >= '0' && b <= '9'; + } + + private static class SlowPathDecimal { + + final byte[] digits = new byte[SLOW_PATH_MAX_DIGIT_COUNT]; + int digitCount; + int exp10; + boolean truncated; + boolean negative; + + // Before calling this method we have to make sure that the significand is within the range of [0, 2^24 - 1]. + long computeSignificand() { + if (digitCount == 0 || exp10 < 0) { + return 0; + } + long significand = 0; + for (int i = 0; i < exp10; i++) { + significand = (10 * significand) + ((i < digitCount) ? digits[i] : 0); + } + boolean roundUp = false; + if (exp10 < digitCount) { + roundUp = digits[exp10] >= 5; + if ((digits[exp10] == 5) && (exp10 + 1 == digitCount)) { + // If the digits haven't been truncated, then we are exactly halfway between two integers. In such + // cases, we round to even, otherwise we round up. + roundUp = truncated || (significand & 1) == 1; + } + } + return roundUp ? ++significand : significand; + } + + void shiftLeft(int shift) { + if (digitCount == 0) { + return; + } + + int numberOfAdditionalDigits = calculateNumberOfAdditionalDigitsAfterLeftShift(shift); + int readIndex = digitCount - 1; + int writeIndex = digitCount - 1 + numberOfAdditionalDigits; + long n = 0; + + while (readIndex >= 0) { + n += (long) digits[readIndex] << shift; + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + readIndex--; + } + + while (compareUnsigned(n, 0) > 0) { + long quotient = divideUnsigned(n, 10); + long remainder = remainderUnsigned(n, 10); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex] = (byte) remainder; + } else if (remainder > 0) { + truncated = true; + } + n = quotient; + writeIndex--; + } + digitCount += numberOfAdditionalDigits; + if (digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { + digitCount = SLOW_PATH_MAX_DIGIT_COUNT; + } + exp10 += numberOfAdditionalDigits; + trimTrailingZeros(); + } + + // See https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html#hpd-shifts + private int calculateNumberOfAdditionalDigitsAfterLeftShift(int shift) { + int a = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift]; + int b = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift + 1]; + int newDigitCount = a >> 11; + int pow5OffsetA = 0x7FF & a; + int pow5OffsetB = 0x7FF & b; + + int n = pow5OffsetB - pow5OffsetA; + for (int i = 0; i < n; i++) { + if (i >= digitCount) { + return newDigitCount - 1; + } else if (digits[i] < POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount - 1; + } else if (digits[i] > POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { + return newDigitCount; + } + } + return newDigitCount; + } + + void shiftRight(int shift) { + int readIndex = 0; + int writeIndex = 0; + long n = 0; + + while ((n >>> shift) == 0) { + if (readIndex < digitCount) { + n = (10 * n) + digits[readIndex++]; + } else if (n == 0) { + return; + } else { + while ((n >>> shift) == 0) { + n = 10 * n; + readIndex++; + } + break; + } + } + exp10 -= (readIndex - 1); + long mask = (1L << shift) - 1; + while (readIndex < digitCount) { + byte newDigit = (byte) (n >>> shift); + n = (10 * (n & mask)) + digits[readIndex++]; + digits[writeIndex++] = newDigit; + } + while (compareUnsigned(n, 0) > 0) { + byte newDigit = (byte) (n >>> shift); + n = 10 * (n & mask); + if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { + digits[writeIndex++] = newDigit; + } else if (newDigit > 0) { + truncated = true; + } + } + digitCount = writeIndex; + trimTrailingZeros(); + } + + private void trimTrailingZeros() { + while ((digitCount > 0) && (digits[digitCount - 1] == 0)) { + digitCount--; + } + } + + private void reset() { + digitCount = 0; + exp10 = 0; + truncated = false; + } + } +} diff --git a/src/main/java/org/simdjson/JsonCharacterBlock.java b/src/main/java/org/simdjson/JsonCharacterBlock.java deleted file mode 100644 index b99db20..0000000 --- a/src/main/java/org/simdjson/JsonCharacterBlock.java +++ /dev/null @@ -1,8 +0,0 @@ -package org.simdjson; - -record JsonCharacterBlock(long whitespace, long op) { - - long scalar() { - return ~(op | whitespace); - } -} diff --git a/src/main/java/org/simdjson/JsonIterator.java b/src/main/java/org/simdjson/JsonIterator.java index 3e96274..2ca2f26 100644 --- a/src/main/java/org/simdjson/JsonIterator.java +++ b/src/main/java/org/simdjson/JsonIterator.java @@ -17,21 +17,28 @@ class JsonIterator { private final BitIndexes indexer; private final boolean[] isArray; - JsonIterator(BitIndexes indexer, int capacity, int maxDepth, int padding) { + JsonIterator(BitIndexes indexer, byte[] stringBuffer, int capacity, int maxDepth, int padding) { this.indexer = indexer; this.isArray = new boolean[maxDepth]; - this.tapeBuilder = new TapeBuilder(capacity, maxDepth, padding); + this.tapeBuilder = new TapeBuilder(capacity, maxDepth, padding, stringBuffer); } JsonValue walkDocument(byte[] buffer, int len) { + if (indexer.isEnd()) { + throw new JsonParsingException("No structural element found."); + } + tapeBuilder.visitDocumentStart(); int depth = 0; int state; - int idx = indexer.advance(); + int idx = indexer.getAndAdvance(); switch (buffer[idx]) { case '{' -> { + if (buffer[indexer.getLast()] != '}') { + throw new JsonParsingException("Unclosed object. Missing '}' for starting '{'."); + } if (buffer[indexer.peek()] == '}') { indexer.advance(); tapeBuilder.visitEmptyObject(); @@ -41,6 +48,9 @@ JsonValue walkDocument(byte[] buffer, int len) { } } case '[' -> { + if (buffer[indexer.getLast()] != ']') { + throw new JsonParsingException("Unclosed array. Missing ']' for starting '['."); + } if (buffer[indexer.peek()] == ']') { indexer.advance(); tapeBuilder.visitEmptyArray(); @@ -55,13 +65,13 @@ JsonValue walkDocument(byte[] buffer, int len) { } } - while (indexer.hasNext()) { + while (state != DOCUMENT_END) { if (state == OBJECT_BEGIN) { depth++; isArray[depth] = false; tapeBuilder.visitObjectStart(depth); - int keyIdx = indexer.advance(); + int keyIdx = indexer.getAndAdvance(); if (buffer[keyIdx] != '"') { throw new JsonParsingException("Object does not start with a key"); } @@ -71,10 +81,10 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == OBJECT_FIELD) { - if (buffer[indexer.advance()] != ':') { + if (buffer[indexer.getAndAdvance()] != ':') { throw new JsonParsingException("Missing colon after key in object"); } - idx = indexer.advance(); + idx = indexer.getAndAdvance(); switch (buffer[idx]) { case '{' -> { if (buffer[indexer.peek()] == '}') { @@ -102,10 +112,10 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == OBJECT_CONTINUE) { - switch (buffer[indexer.advance()]) { + switch (buffer[indexer.getAndAdvance()]) { case ',' -> { tapeBuilder.incrementCount(depth); - int keyIdx = indexer.advance(); + int keyIdx = indexer.getAndAdvance(); if (buffer[keyIdx] != '"') { throw new JsonParsingException("Key string missing at beginning of field in object"); } @@ -140,7 +150,7 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == ARRAY_VALUE) { - idx = indexer.advance(); + idx = indexer.getAndAdvance(); switch (buffer[idx]) { case '{' -> { if (buffer[indexer.peek()] == '}') { @@ -168,7 +178,7 @@ JsonValue walkDocument(byte[] buffer, int len) { } if (state == ARRAY_CONTINUE) { - switch (buffer[indexer.advance()]) { + switch (buffer[indexer.getAndAdvance()]) { case ',' -> { tapeBuilder.incrementCount(depth); state = ARRAY_VALUE; @@ -180,14 +190,11 @@ JsonValue walkDocument(byte[] buffer, int len) { default -> throw new JsonParsingException("Missing comma between array values"); } } + } + tapeBuilder.visitDocumentEnd(); - if (state == DOCUMENT_END) { - tapeBuilder.visitDocumentEnd(); - - if (!indexer.isEnd()) { - throw new JsonParsingException("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - } - } + if (!indexer.isEnd()) { + throw new JsonParsingException("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); } return tapeBuilder.createJsonValue(buffer); } diff --git a/src/main/java/org/simdjson/JsonParsingException.java b/src/main/java/org/simdjson/JsonParsingException.java index 5091eb2..2c924d8 100644 --- a/src/main/java/org/simdjson/JsonParsingException.java +++ b/src/main/java/org/simdjson/JsonParsingException.java @@ -5,4 +5,8 @@ public class JsonParsingException extends RuntimeException { JsonParsingException(String message) { super(message); } + + JsonParsingException(String message, Throwable throwable) { + super(message, throwable); + } } diff --git a/src/main/java/org/simdjson/JsonStringBlock.java b/src/main/java/org/simdjson/JsonStringBlock.java deleted file mode 100644 index d806681..0000000 --- a/src/main/java/org/simdjson/JsonStringBlock.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.simdjson; - -record JsonStringBlock(long quote, long inString) { - - long stringTail() { - return inString ^ quote; - } - - long nonQuoteInsideString(long mask) { - return mask & inString; - } -} diff --git a/src/main/java/org/simdjson/JsonStringScanner.java b/src/main/java/org/simdjson/JsonStringScanner.java deleted file mode 100644 index 6d856ac..0000000 --- a/src/main/java/org/simdjson/JsonStringScanner.java +++ /dev/null @@ -1,90 +0,0 @@ -package org.simdjson; - -import jdk.incubator.vector.ByteVector; - -class JsonStringScanner { - - private static final long EVEN_BITS_MASK = 0x5555555555555555L; - private static final long ODD_BITS_MASK = ~EVEN_BITS_MASK; - - private final ByteVector backslashMask; - private final ByteVector quoteMask; - - private long prevInString = 0; - private long prevEscaped = 0; - - JsonStringScanner() { - this.backslashMask = ByteVector.broadcast(StructuralIndexer.BYTE_SPECIES, (byte) '\\'); - this.quoteMask = ByteVector.broadcast(StructuralIndexer.BYTE_SPECIES, (byte) '"'); - } - - JsonStringBlock next(ByteVector chunk0) { - long backslash = eq(chunk0, backslashMask); - long escaped = findEscaped(backslash); - long quote = eq(chunk0, quoteMask) & ~escaped; - long inString = prefixXor(quote) ^ prevInString; - prevInString = inString >> 63; - return new JsonStringBlock(quote, inString); - } - - JsonStringBlock next(ByteVector chunk0, ByteVector chunk1) { - long backslash = eq(chunk0, chunk1, backslashMask); - long escaped = findEscaped(backslash); - long quote = eq(chunk0, chunk1, quoteMask) & ~escaped; - long inString = prefixXor(quote) ^ prevInString; - prevInString = inString >> 63; - return new JsonStringBlock(quote, inString); - } - - private long eq(ByteVector chunk0, ByteVector mask) { - long r = chunk0.eq(mask).toLong(); - return r; - } - - private long eq(ByteVector chunk0, ByteVector chunk1, ByteVector mask) { - long r0 = chunk0.eq(mask).toLong(); - long r1 = chunk1.eq(mask).toLong(); - return r0 | (r1 << 32); - } - - private long findEscaped(long backslash) { - if (backslash == 0) { - long escaped = prevEscaped; - prevEscaped = 0; - return escaped; - } - backslash &= ~prevEscaped; - long followsEscape = backslash << 1 | prevEscaped; - long oddSequenceStarts = backslash & ODD_BITS_MASK & ~followsEscape; - - long sequencesStartingOnEvenBits = oddSequenceStarts + backslash; - // Here, we check if the unsigned addition above caused an overflow. If that's the case, we store 1 in prevEscaped. - // The formula used to detect overflow was taken from 'Hacker's Delight, Second Edition' by Henry S. Warren, Jr., - // Chapter 2-13. - prevEscaped = ((oddSequenceStarts >>> 1) + (backslash >>> 1) + ((oddSequenceStarts & backslash) & 1)) >>> 63; - - long invertMask = sequencesStartingOnEvenBits << 1; - return (EVEN_BITS_MASK ^ invertMask) & followsEscape; - } - - private long prefixXor(long bitmask) { - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; - } - - void reset() { - prevInString = 0; - prevEscaped = 0; - } - - void finish() { - if (prevInString != 0) { - throw new JsonParsingException("Unclosed string. A string is opened, but never closed."); - } - } -} diff --git a/src/main/java/org/simdjson/JsonValue.java b/src/main/java/org/simdjson/JsonValue.java index 279f55a..6877519 100644 --- a/src/main/java/org/simdjson/JsonValue.java +++ b/src/main/java/org/simdjson/JsonValue.java @@ -3,6 +3,7 @@ import java.util.Arrays; import java.util.Iterator; import java.util.Map; +import java.util.NoSuchElementException; import static org.simdjson.Tape.DOUBLE; import static org.simdjson.Tape.FALSE_VALUE; @@ -157,9 +158,12 @@ public boolean hasNext() { @Override public JsonValue next() { - JsonValue value = new JsonValue(tape, idx, stringBuffer, buffer); - idx = tape.computeNextIndex(idx); - return value; + if (hasNext()) { + JsonValue value = new JsonValue(tape, idx, stringBuffer, buffer); + idx = tape.computeNextIndex(idx); + return value; + } + throw new NoSuchElementException("No more elements"); } } diff --git a/src/main/java/org/simdjson/NumberParser.java b/src/main/java/org/simdjson/NumberParser.java index 7c9c101..44fab42 100644 --- a/src/main/java/org/simdjson/NumberParser.java +++ b/src/main/java/org/simdjson/NumberParser.java @@ -1,73 +1,34 @@ package org.simdjson; -import static java.lang.Double.NEGATIVE_INFINITY; -import static java.lang.Double.POSITIVE_INFINITY; -import static java.lang.Double.longBitsToDouble; -import static java.lang.Long.compareUnsigned; -import static java.lang.Long.divideUnsigned; -import static java.lang.Long.numberOfLeadingZeros; -import static java.lang.Long.remainderUnsigned; -import static java.lang.Math.abs; -import static java.lang.Math.unsignedMultiplyHigh; +import org.simdjson.ExponentParser.ExponentParsingResult; + import static org.simdjson.CharacterUtils.isStructuralOrWhitespace; -import static org.simdjson.NumberParserTables.NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT; -import static org.simdjson.NumberParserTables.POWERS_OF_FIVE; -import static org.simdjson.NumberParserTables.POWER_OF_FIVE_DIGITS; +import static org.simdjson.ExponentParser.isExponentIndicator; class NumberParser { - // When parsing doubles, we assume that a long used to store digits is unsigned. Thus, it can safely accommodate - // up to 19 digits (9999999999999999999 < 2^64). - private static final int FAST_PATH_MAX_DIGIT_COUNT = 19; - // The smallest non-zero number representable in binary64 is 2^-1074, which is about 4.941 * 10^-324. - // If we consider a number in the form of w * 10^q where 1 <= w <= 9999999999999999999, then - // 1 * 10^q <= w * 10^q <= 9.999999999999999999 * 10^18 * 10^q. To ensure w * 10^q < 2^-1074, q must satisfy the - // following inequality: 9.999999999999999999 * 10^(18 + q) < 2^-1074. This condition holds true whenever - // 18 + q < -324. Thus, for q < -342, we can reliably conclude that the number w * 10^q is smaller than 2^-1074, - // and this, in turn means the number is equal to zero. - private static final int FAST_PATH_MIN_POWER_OF_TEN = -342; - // We know that (1 - 2^-53) * 2^1024, which is about 1.798 * 10^308, is the largest number representable in binary64. - // When the parsed number is expressed as w * 10^q, where w >= 1, we are sure that for any q > 308, the number is - // infinite. - private static final int FAST_PATH_MAX_POWER_OF_TEN = 308; - private static final double[] POWERS_OF_TEN = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 - }; - private static final long MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY = 9007199254740991L; // 2^53 - 1 - private static final int IEEE64_EXPONENT_BIAS = 1023; - private static final int IEEE64_SIGN_BIT_INDEX = 63; - private static final int IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT = 52; - private static final int IEEE64_SIGNIFICAND_SIZE_IN_BITS = IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT + 1; - private static final int IEEE64_MAX_FINITE_NUMBER_EXPONENT = 1023; - private static final int IEEE64_MIN_FINITE_NUMBER_EXPONENT = -1022; - private static final int IEEE64_SUBNORMAL_EXPONENT = -1023; + private static final int BYTE_MAX_DIGIT_COUNT = 3; + private static final int BYTE_MAX_ABS_VALUE = 128; + private static final int SHORT_MAX_DIGIT_COUNT = 5; + private static final int SHORT_MAX_ABS_VALUE = 32768; + private static final int INT_MAX_DIGIT_COUNT = 10; + private static final long INT_MAX_ABS_VALUE = 2147483648L; private static final int LONG_MAX_DIGIT_COUNT = 19; - // This is the upper limit for the count of decimal digits taken into account in the slow path. All digits exceeding - // this threshold are excluded. - private static final int SLOW_PATH_MAX_DIGIT_COUNT = 800; - private static final int SLOW_PATH_MAX_SHIFT = 60; - private static final byte[] SLOW_PATH_SHIFTS = { - 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, - 33, 36, 39, 43, 46, 49, 53, 56, 59, - }; - - private final Tape tape; - private final SlowPathDecimal slowPathDecimal = new SlowPathDecimal(); - - private int currentIdx; - - NumberParser(Tape tape) { - this.tape = tape; - } - void parseNumber(byte[] buffer, int offset) { + private final DigitsParsingResult digitsParsingResult = new DigitsParsingResult(); + private final ExponentParser exponentParser = new ExponentParser(); + private final DoubleParser doubleParser = new DoubleParser(); + private final FloatParser floatParser = new FloatParser(); + + void parseNumber(byte[] buffer, int offset, Tape tape) { boolean negative = buffer[offset] == '-'; - currentIdx = negative ? offset + 1 : offset; + int currentIdx = negative ? offset + 1 : offset; int digitsStartIdx = currentIdx; - long digits = parseDigits(buffer, 0); + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); int digitCount = currentIdx - digitsStartIdx; if (digitCount == 0) { throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); @@ -77,12 +38,14 @@ void parseNumber(byte[] buffer, int offset) { } long exponent = 0; - boolean isDouble = false; + boolean floatingPointNumber = false; if ('.' == buffer[currentIdx]) { - isDouble = true; + floatingPointNumber = true; currentIdx++; int firstIdxAfterPeriod = currentIdx; - digits = parseDigits(buffer, digits); + digitsParsingResult = parseDigits(buffer, currentIdx, digits); + digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); exponent = firstIdxAfterPeriod - currentIdx; if (exponent == 0) { throw new JsonParsingException("Invalid number. Decimal point has to be followed by a digit."); @@ -90,21 +53,18 @@ void parseNumber(byte[] buffer, int offset) { digitCount = currentIdx - digitsStartIdx; } if (isExponentIndicator(buffer[currentIdx])) { - isDouble = true; + floatingPointNumber = true; currentIdx++; - exponent = parseExponent(buffer, exponent); + ExponentParsingResult exponentParsingResult = exponentParser.parse(buffer, currentIdx, exponent); + exponent = exponentParsingResult.exponent(); + currentIdx = exponentParsingResult.currentIdx(); } if (!isStructuralOrWhitespace(buffer[currentIdx])) { throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - if (isDouble) { - double d; - if (shouldBeHandledBySlowPath(buffer, digitsStartIdx, digitCount)) { - d = slowlyParseDouble(buffer, offset); - } else { - d = computeDouble(negative, digits, exponent); - } - tape.appendDouble(d); + if (floatingPointNumber) { + double value = doubleParser.parse(buffer, offset, negative, digitsStartIdx, digitCount, digits, exponent); + tape.appendDouble(value); } else { if (isOutOfLongRange(negative, digits, digitCount)) { throw new JsonParsingException("Number value is out of long range ([" + Long.MIN_VALUE + ", " + Long.MAX_VALUE + "])."); @@ -113,521 +73,291 @@ void parseNumber(byte[] buffer, int offset) { } } - private static boolean isOutOfLongRange(boolean negative, long digits, int digitCount) { - if (digitCount < LONG_MAX_DIGIT_COUNT) { + byte parseByte(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; + + int currentIdx = negative ? offset + 1 : offset; + + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); + } + + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); + } + if (isOutOfByteRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of byte range ([" + Byte.MIN_VALUE + ", " + Byte.MAX_VALUE + "])."); + } + return (byte) (negative ? (~digits + 1) : digits); + } + + private static boolean isOutOfByteRange(boolean negative, long digits, int digitCount) { + if (digitCount < BYTE_MAX_DIGIT_COUNT) { return false; } - if (digitCount > LONG_MAX_DIGIT_COUNT) { + if (digitCount > BYTE_MAX_DIGIT_COUNT) { return true; } - if (negative && digits == Long.MIN_VALUE) { - // The maximum value we can store in a long is 9223372036854775807. When we try to store 9223372036854775808, - // a long wraps around, resulting in -9223372036854775808 (Long.MIN_VALUE). If the number we are parsing is - // negative, and we've attempted to store 9223372036854775808 in "digits", we can be sure that we are - // dealing with Long.MIN_VALUE, which obviously does not fall outside the acceptable range. - return false; + if (negative) { + return digits > BYTE_MAX_ABS_VALUE; } - return digits < 0; + return digits > Byte.MAX_VALUE; } - private static double computeDouble(boolean negative, long significand10, long exp10) { - if (abs(exp10) < POWERS_OF_TEN.length && compareUnsigned(significand10, MAX_LONG_REPRESENTED_AS_DOUBLE_EXACTLY) <= 0) { - // This path has been described in https://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/. - double d = significand10; - if (exp10 < 0) { - d = d / POWERS_OF_TEN[(int) -exp10]; - } else { - d = d * POWERS_OF_TEN[(int) exp10]; - } - return negative ? -d : d; - } - - // The following path is an implementation of the Eisel-Lemire algorithm described by Daniel Lemire in - // "Number Parsing at a Gigabyte per Second" (https://arxiv.org/abs/2101.11408). - - if (exp10 < FAST_PATH_MIN_POWER_OF_TEN || significand10 == 0) { - return zero(negative); - } else if (exp10 > FAST_PATH_MAX_POWER_OF_TEN) { - return infinity(negative); - } - - // We start by normalizing the decimal significand so that it is within the range of [2^63, 2^64). - int lz = numberOfLeadingZeros(significand10); - significand10 <<= lz; - - // Initially, the number we are parsing is in the form of w * 10^q = w * 5^q * 2^q, and our objective is to - // convert it to m * 2^p. We can represent w * 10^q as w * 5^q * 2^r * 2^p, where w * 5^q * 2^r = m. - // Therefore, in the next step we compute w * 5^q. The implementation of this multiplication is optimized - // to minimize necessary operations while ensuring precise results. For further insight, refer to the - // aforementioned paper. - int powersOfFiveTableIndex = 2 * (int) (exp10 - FAST_PATH_MIN_POWER_OF_TEN); - long upper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex]); - long lower = significand10 * POWERS_OF_FIVE[powersOfFiveTableIndex]; - if ((upper & 0x1FF) == 0x1FF) { - long secondUpper = unsignedMultiplyHigh(significand10, POWERS_OF_FIVE[powersOfFiveTableIndex + 1]); - lower += secondUpper; - if (compareUnsigned(secondUpper, lower) > 0) { - upper++; - } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without Fallback" - // (https://arxiv.org/abs/2212.06644), at this point we are sure that the product is sufficiently accurate, - // and more computation is not needed. - } - - // Here, we extract the binary significand from the product. Although in binary64 the significand has 53 bits, - // we extract 54 bits to use the least significant bit for rounding. Since both the decimal significand and the - // values stored in POWERS_OF_FIVE are normalized, ensuring that their most significant bits are set, the product - // has either 0 or 1 leading zeros. As a result, we need to perform a right shift of either 9 or 10 bits. - long upperBit = upper >>> 63; - long upperShift = upperBit + 9; - long significand2 = upper >>> upperShift; - - // Now, we have to determine the value of the binary exponent. Let's begin by calculating the contribution of - // 10^q. Our goal is to compute f0 and f1 such that: - // - when q >= 0: 10^q = (5^q / 2^(f0 - q)) * 2^f0 - // - when q < 0: 10^q = (2^(f1 - q) / 5^-q) * 2^f1 - // Both (5^q / 2^(f0 - q)) and (2^(f1 - q) / 5^-q) must fall within the range of [1, 2). - // It turns out that these conditions are met when: - // - 0 <= q <= FAST_PATH_MAX_POWER_OF_TEN, and f0 = floor(log2(5^q)) + q = floor(q * log(5) / log(2)) + q = (217706 * q) / 2^16. - // - FAST_PATH_MIN_POWER_OF_TEN <= q < 0, and f1 = -ceil(log2(5^-q)) + q = -ceil(-q * log(5) / log(2)) + q = (217706 * q) / 2^16. - // Thus, we can express the contribution of 10^q to the exponent as (217706 * exp10) >> 16. - // - // Furthermore, we need to factor in the following normalizations we've performed: - // - shifting the decimal significand left bitwise - // - shifting the binary significand right bitwise if the most significant bit of the product was 1 - // Therefore, we add (63 - lz + upperBit) to the exponent. - long exp2 = ((217706 * exp10) >> 16) + 63 - lz + upperBit; - if (exp2 < IEEE64_MIN_FINITE_NUMBER_EXPONENT) { - // In the next step, we right-shift the binary significand by the difference between the minimum exponent - // and the binary exponent. In Java, the shift distance is limited to the range of 0 to 63, inclusive. - // Thus, we need to handle the case when the distance is >= 64 separately and always return zero. - if (exp2 <= IEEE64_MIN_FINITE_NUMBER_EXPONENT - 64) { - return zero(negative); - } + short parseShort(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; - // In this branch, it is likely that we are handling a subnormal number. Therefore, we adjust the significand - // to conform to the formula representing subnormal numbers: (significand2 * 2^(1 - IEEE64_EXPONENT_BIAS)) / 2^52. - significand2 >>= 1 - IEEE64_EXPONENT_BIAS - exp2; - // Round up if the significand is odd and remove the least significant bit that we've left for rounding. - significand2 += significand2 & 1; - significand2 >>= 1; - - // Here, we are addressing a scenario in which the original number was subnormal, but it became normal after - // rounding up. For example, when we are parsing 2.2250738585072013e-308 before rounding and removing the - // least significant bit significand2 = 0x3fffffffffffff and exp2 = -1023. After rounding, we get - // significand2 = 0x10000000000000, which is the significand of the smallest normal number. - exp2 = (significand2 < (1L << 52)) ? IEEE64_SUBNORMAL_EXPONENT : IEEE64_MIN_FINITE_NUMBER_EXPONENT; - return toDouble(negative, significand2, exp2); - } - - // Here, we are addressing a scenario of rounding the binary significand when it falls precisely halfway - // between two integers. To understand the rationale behind the condition used to identify this case, refer to - // sections 6, 8.1, and 9.1 of "Number Parsing at a Gigabyte per Second". - if ((compareUnsigned(lower, 1) <= 0) && (exp10 >= -4) && (exp10 <= 23) && ((significand2 & 3) == 1)) { - if (significand2 << upperShift == upper) { - significand2 &= ~1; - } - } + int currentIdx = negative ? offset + 1 : offset; - // Round up if the significand is odd and remove the least significant bit that we've left for rounding. - significand2 += significand2 & 1; - significand2 >>= 1; + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); + } - if (significand2 == (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { - // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand - // by 2 and update the exponent accordingly. - significand2 >>= 1; - exp2++; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); + } + if (isOutOfShortRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of short range ([" + Short.MIN_VALUE + ", " + Short.MAX_VALUE + "])."); } + return (short) (negative ? (~digits + 1) : digits); + } - if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { - return infinity(negative); + private static boolean isOutOfShortRange(boolean negative, long digits, int digitCount) { + if (digitCount < SHORT_MAX_DIGIT_COUNT) { + return false; + } + if (digitCount > SHORT_MAX_DIGIT_COUNT) { + return true; + } + if (negative) { + return digits > SHORT_MAX_ABS_VALUE; } - return toDouble(negative, significand2, exp2); + return digits > Short.MAX_VALUE; } - // The following parser is based on the idea described in - // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html and implemented in - // https://github.com/simdjson/simdjson/blob/caff09cafceb0f5f6fc9109236d6dd09ac4bc0d8/src/from_chars.cpp - private double slowlyParseDouble(byte[] buffer, int offset) { - SlowPathDecimal decimal = slowPathDecimal; - decimal.reset(); + int parseInt(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; - decimal.negative = buffer[offset] == '-'; - currentIdx = decimal.negative ? offset + 1 : offset; - long exp10 = 0; + int currentIdx = negative ? offset + 1 : offset; - skipZeros(buffer); - parseDigits(buffer, decimal); - if (buffer[currentIdx] == '.') { - currentIdx++; - int firstIdxAfterPeriod = currentIdx; - if (decimal.digitCount == 0) { - skipZeros(buffer); - } - parseDigits(buffer, decimal); - exp10 = firstIdxAfterPeriod - currentIdx; + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); } - - int currentIdxMovingBackwards = currentIdx - 1; - int trailingZeros = 0; - // Here, we also skip the period to handle cases like 100000000000000000000.000000 - while (buffer[currentIdxMovingBackwards] == '0' || buffer[currentIdxMovingBackwards] == '.') { - if (buffer[currentIdxMovingBackwards] == '0') { - trailingZeros++; - } - currentIdxMovingBackwards--; + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - exp10 += decimal.digitCount; - decimal.digitCount -= trailingZeros; - if (decimal.digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { - decimal.digitCount = SLOW_PATH_MAX_DIGIT_COUNT; - decimal.truncated = true; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - - if (isExponentIndicator(buffer[currentIdx])) { - currentIdx++; - exp10 = parseExponent(buffer, exp10); - } - - // At this point, the number we are parsing is represented in the following way: w * 10^exp10, where -1 < w < 1. - if (exp10 <= -324) { - // We know that -1e-324 < w * 10^exp10 < 1e-324. In binary64 -1e-324 = -0.0 and 1e-324 = +0.0, so we can - // safely return +/-0.0. - return zero(decimal.negative); - } else if (exp10 >= 310) { - // We know that either w * 10^exp10 <= -0.1e310 or w * 10^exp10 >= 0.1e310. - // In binary64 -0.1e310 = -inf and 0.1e310 = +inf, so we can safely return +/-inf. - return infinity(decimal.negative); - } - - decimal.exp10 = (int) exp10; - int exp2 = 0; - - // We start the following loop with the decimal in the form of w * 10^exp10. After a series of - // right-shifts (dividing by a power of 2), we transform the decimal into w' * 2^exp2 * 10^exp10, - // where exp10 is <= 0. Resultantly, w' * 10^exp10 is in the range of [0, 1). - while (decimal.exp10 > 0) { - int shift = resolveShiftDistanceBasedOnExponent10(decimal.exp10); - decimal.shiftRight(shift); - exp2 += shift; - } - - // Now, we are left-shifting to get to the point where w'' * 10^exp10 is within the range of [1/2, 1). - while (decimal.exp10 <= 0) { - int shift; - if (decimal.exp10 == 0) { - if (decimal.digits[0] >= 5) { - break; - } - shift = (decimal.digits[0] < 2) ? 2 : 1; - } else { - shift = resolveShiftDistanceBasedOnExponent10(-decimal.exp10); - } - decimal.shiftLeft(shift); - exp2 -= shift; + if (isOutOfIntRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of int range ([" + Integer.MIN_VALUE + ", " + Integer.MAX_VALUE + "])."); } + return (int) (negative ? (~digits + 1) : digits); + } - // Here, w'' * 10^exp10 falls within the range of [1/2, 1). In binary64, the significand must be within the - // range of [1, 2). We can get to the target range by decreasing the binary exponent. Resultantly, the decimal - // is represented as w'' * 10^exp10 * 2^exp2, where w'' * 10^exp10 is in the range of [1, 2). - exp2--; - - while (IEEE64_MIN_FINITE_NUMBER_EXPONENT > exp2) { - int n = IEEE64_MIN_FINITE_NUMBER_EXPONENT - exp2; - if (n > SLOW_PATH_MAX_SHIFT) { - n = SLOW_PATH_MAX_SHIFT; - } - decimal.shiftRight(n); - exp2 += n; + private static boolean isOutOfIntRange(boolean negative, long digits, int digitCount) { + if (digitCount < INT_MAX_DIGIT_COUNT) { + return false; + } + if (digitCount > INT_MAX_DIGIT_COUNT) { + return true; + } + if (negative) { + return digits > INT_MAX_ABS_VALUE; } + return digits > Integer.MAX_VALUE; + } - // To conform to the IEEE 754 standard, the binary significand must fall within the range of [2^52, 2^53). Hence, - // we perform the following multiplication. If, after this step, the significand is less than 2^52, we have a - // subnormal number, which we will address later. - decimal.shiftLeft(IEEE64_SIGNIFICAND_SIZE_IN_BITS); + long parseLong(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; + + int currentIdx = negative ? offset + 1 : offset; - long significand2 = decimal.computeSignificand(); - if (significand2 >= (1L << IEEE64_SIGNIFICAND_SIZE_IN_BITS)) { - // If we've reached here, it means that rounding has caused an overflow. We need to divide the significand - // by 2 and update the exponent accordingly. - significand2 >>= 1; - exp2++; + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + long digits = digitsParsingResult.digits(); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - if (significand2 < (1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT)) { - exp2 = IEEE64_SUBNORMAL_EXPONENT; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - if (exp2 > IEEE64_MAX_FINITE_NUMBER_EXPONENT) { - return infinity(decimal.negative); + if (isOutOfLongRange(negative, digits, digitCount)) { + throw new JsonParsingException("Number value is out of long range ([" + Long.MIN_VALUE + ", " + Long.MAX_VALUE + "])."); } - return toDouble(decimal.negative, significand2, exp2); + return negative ? (~digits + 1) : digits; } - private static int resolveShiftDistanceBasedOnExponent10(int exp10) { - return (exp10 < SLOW_PATH_SHIFTS.length) ? SLOW_PATH_SHIFTS[exp10] : SLOW_PATH_MAX_SHIFT; - } + float parseFloat(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; - private long parseExponent(byte[] buffer, long exponent) { - boolean negative = '-' == buffer[currentIdx]; - if (negative || '+' == buffer[currentIdx]) { - currentIdx++; + int currentIdx = negative ? offset + 1 : offset; + + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); } - int exponentStartIdx = currentIdx; - long parsedExponent = parseDigits(buffer, 0); - if (exponentStartIdx == currentIdx) { - throw new JsonParsingException("Invalid number. Exponent indicator has to be followed by a digit."); - } - // Long.MAX_VALUE = 9223372036854775807 (19 digits). Therefore, any number with <= 18 digits can be safely - // stored in a long without causing an overflow. - int maxDigitCountLongCanAccommodate = 18; - if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { - // Potentially, we have an overflow here. We try to skip leading zeros. - while (buffer[exponentStartIdx] == '0') { - exponentStartIdx++; - } - if (currentIdx > exponentStartIdx + maxDigitCountLongCanAccommodate) { - // We still have more digits than a long can safely accommodate. - // - // The largest finite number that can be represented in binary64 is (1-2^-53) * 2^1024, which is about - // 1.798e308, and the smallest non-zero number is 2^-1074, roughly 4.941e-324. So, we might, potentially, - // care only about numbers with explicit exponents falling within the range of [-324, 308], and return - // either zero or infinity for everything outside of this range.However, we have to take into account - // the fractional part of the parsed number. This part can potentially cancel out the value of the - // explicit exponent. For example, 1000e-325 (1 * 10^3 * 10^-325 = 1 * 10^-322) is not equal to zero - // despite the explicit exponent being less than -324. - // - // Let's consider a scenario where the explicit exponent is greater than 999999999999999999. As long as - // the fractional part has <= 999999999999999690 digits, it doesn't matter whether we take - // 999999999999999999 or its actual value as the explicit exponent. This is due to the fact that the - // parsed number is infinite anyway (w * 10^-q * 10^999999999999999999 > (1-2^-53) * 2^1024, 0 < w < 10, - // 0 <= q <= 999999999999999690). Similarly, in a scenario where the explicit exponent is less than - // -999999999999999999, as long as the fractional part has <= 999999999999999674 digits, we can safely - // take 999999999999999999 as the explicit exponent, given that the parsed number is zero anyway - // (w * 10^q * 10^-999999999999999999 < 2^-1074, 0 < w < 10, 0 <= q <= 999999999999999674) - // - // Note that if the fractional part had 999999999999999674 digits, the JSON size would need to be - // 999999999999999674 bytes, which is approximately ~888 PiB. Consequently, it's reasonable to assume - // that the fractional part contains no more than 999999999999999674 digits. - parsedExponent = 999999999999999999L; - } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - // Note that we don't check if 'exponent' has overflowed after the following addition. This is because we - // know that the parsed exponent falls within the range of [-999999999999999999, 999999999999999999]. We also - // assume that 'exponent' before the addition is within the range of [-9223372036854775808, 9223372036854775807]. - // This assumption should always be valid as the value of 'exponent' is constrained by the size of the JSON input. - exponent += negative ? -parsedExponent : parsedExponent; - return exponent; - } - private long parseDigits(byte[] buffer, long digits) { - byte digit = convertCharacterToDigit(buffer[currentIdx]); - while (digit >= 0 && digit <= 9) { - digits = 10 * digits + digit; + long exponent = 0; + boolean floatingPointNumber = false; + if ('.' == buffer[currentIdx]) { + floatingPointNumber = true; currentIdx++; - digit = convertCharacterToDigit(buffer[currentIdx]); + int firstIdxAfterPeriod = currentIdx; + digitsParsingResult = parseDigits(buffer, currentIdx, digitsParsingResult.digits()); + currentIdx = digitsParsingResult.currentIdx(); + exponent = firstIdxAfterPeriod - currentIdx; + if (exponent == 0) { + throw new JsonParsingException("Invalid number. Decimal point has to be followed by a digit."); + } + digitCount = currentIdx - digitsStartIdx; } - return digits; - } - - private static boolean shouldBeHandledBySlowPath(byte[] buffer, int startDigitsIdx, int digitCount) { - if (digitCount <= FAST_PATH_MAX_DIGIT_COUNT) { - return false; + if (isExponentIndicator(buffer[currentIdx])) { + floatingPointNumber = true; + currentIdx++; + ExponentParsingResult exponentParsingResult = exponentParser.parse(buffer, currentIdx, exponent); + exponent = exponentParsingResult.exponent(); + currentIdx = exponentParsingResult.currentIdx(); } - int start = startDigitsIdx; - while (buffer[start] == '0' || buffer[start] == '.') { - start++; + if (!floatingPointNumber) { + throw new JsonParsingException("Invalid floating-point number. Fraction or exponent part is missing."); } - int significantDigitCount = digitCount - (start - startDigitsIdx); - return significantDigitCount > FAST_PATH_MAX_DIGIT_COUNT; + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); + } + + return floatParser.parse(buffer, offset, negative, digitsStartIdx, digitCount, digitsParsingResult.digits(), exponent); } - private void skipZeros(byte[] buffer) { - while (buffer[currentIdx] == '0') { - currentIdx++; + double parseDouble(byte[] buffer, int len, int offset) { + boolean negative = buffer[offset] == '-'; + + int currentIdx = negative ? offset + 1 : offset; + + int digitsStartIdx = currentIdx; + DigitsParsingResult digitsParsingResult = parseDigits(buffer, currentIdx, 0); + currentIdx = digitsParsingResult.currentIdx(); + int digitCount = currentIdx - digitsStartIdx; + if (digitCount == 0) { + throw new JsonParsingException("Invalid number. Minus has to be followed by a digit."); + } + if ('0' == buffer[digitsStartIdx] && digitCount > 1) { + throw new JsonParsingException("Invalid number. Leading zeroes are not allowed."); } - } - private void parseDigits(byte[] buffer, SlowPathDecimal decimal) { - while (isDigit(buffer[currentIdx])) { - if (decimal.digitCount < SLOW_PATH_MAX_DIGIT_COUNT) { - decimal.digits[decimal.digitCount] = convertCharacterToDigit(buffer[currentIdx]); + long exponent = 0; + boolean floatingPointNumber = false; + if ('.' == buffer[currentIdx]) { + floatingPointNumber = true; + currentIdx++; + int firstIdxAfterPeriod = currentIdx; + digitsParsingResult = parseDigits(buffer, currentIdx, digitsParsingResult.digits()); + currentIdx = digitsParsingResult.currentIdx(); + exponent = firstIdxAfterPeriod - currentIdx; + if (exponent == 0) { + throw new JsonParsingException("Invalid number. Decimal point has to be followed by a digit."); } - decimal.digitCount++; + digitCount = currentIdx - digitsStartIdx; + } + if (isExponentIndicator(buffer[currentIdx])) { + floatingPointNumber = true; currentIdx++; + ExponentParsingResult exponentParsingResult = exponentParser.parse(buffer, currentIdx, exponent); + exponent = exponentParsingResult.exponent(); + currentIdx = exponentParsingResult.currentIdx(); + } + if (!floatingPointNumber) { + throw new JsonParsingException("Invalid floating-point number. Fraction or exponent part is missing."); + } + if (currentIdx < len && !isStructuralOrWhitespace(buffer[currentIdx])) { + throw new JsonParsingException("Number has to be followed by a structural character or whitespace."); } - } - - private static boolean isDigit(byte b) { - return b >= '0' && b <= '9'; - } - - private static boolean isExponentIndicator(byte b) { - return 'e' == b || 'E' == b; - } - private static double toDouble(boolean negative, long significand2, long exp2) { - long bits = significand2; - bits &= ~(1L << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT); // clear the implicit bit - bits |= (exp2 + IEEE64_EXPONENT_BIAS) << IEEE64_SIGNIFICAND_EXPLICIT_BIT_COUNT; - bits = negative ? (bits | (1L << IEEE64_SIGN_BIT_INDEX)) : bits; - return longBitsToDouble(bits); + return doubleParser.parse(buffer, offset, negative, digitsStartIdx, digitCount, digitsParsingResult.digits(), exponent); } - private static double infinity(boolean negative) { - return negative ? NEGATIVE_INFINITY : POSITIVE_INFINITY; + private static boolean isOutOfLongRange(boolean negative, long digits, int digitCount) { + if (digitCount < LONG_MAX_DIGIT_COUNT) { + return false; + } + if (digitCount > LONG_MAX_DIGIT_COUNT) { + return true; + } + if (negative && digits == Long.MIN_VALUE) { + // The maximum value we can store in a long is 9223372036854775807. When we try to store 9223372036854775808, + // a long wraps around, resulting in -9223372036854775808 (Long.MIN_VALUE). If the number we are parsing is + // negative, and we've attempted to store 9223372036854775808 in "digits", we can be sure that we are + // dealing with Long.MIN_VALUE, which obviously does not fall outside the acceptable range. + return false; + } + return digits < 0; } - private static double zero(boolean negative) { - return negative ? -0.0 : 0.0; + private DigitsParsingResult parseDigits(byte[] buffer, int currentIdx, long digits) { + byte digit = convertCharacterToDigit(buffer[currentIdx]); + while (digit >= 0 && digit <= 9) { + digits = 10 * digits + digit; + currentIdx++; + digit = convertCharacterToDigit(buffer[currentIdx]); + } + return digitsParsingResult.of(digits, currentIdx); } private static byte convertCharacterToDigit(byte b) { return (byte) (b - '0'); } - private static class SlowPathDecimal { + private static class DigitsParsingResult { - final byte[] digits = new byte[SLOW_PATH_MAX_DIGIT_COUNT]; - int digitCount; - int exp10; - boolean truncated; - boolean negative; + private long digits; + private int currentIdx; - // Before calling this method we have to make sure that the significand is within the range of [0, 2^53 - 1]. - long computeSignificand() { - if (digitCount == 0 || exp10 < 0) { - return 0; - } - long significand = 0; - for (int i = 0; i < exp10; i++) { - significand = (10 * significand) + ((i < digitCount) ? digits[i] : 0); - } - boolean roundUp = false; - if (exp10 < digitCount) { - roundUp = digits[exp10] >= 5; - if ((digits[exp10] == 5) && (exp10 + 1 == digitCount)) { - // If the digits haven't been truncated, then we are exactly halfway between two integers. In such - // cases, we round to even, otherwise we round up. - roundUp = truncated || (significand & 1) == 1; - } - } - return roundUp ? ++significand : significand; + DigitsParsingResult of(long digits, int currentIdx) { + this.digits = digits; + this.currentIdx = currentIdx; + return this; } - void shiftLeft(int shift) { - if (digitCount == 0) { - return; - } - - int numberOfAdditionalDigits = calculateNumberOfAdditionalDigitsAfterLeftShift(shift); - int readIndex = digitCount - 1; - int writeIndex = digitCount - 1 + numberOfAdditionalDigits; - long n = 0; - - while (readIndex >= 0) { - n += (long) digits[readIndex] << shift; - long quotient = divideUnsigned(n, 10); - long remainder = remainderUnsigned(n, 10); - if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { - digits[writeIndex] = (byte) remainder; - } else if (remainder > 0) { - truncated = true; - } - n = quotient; - writeIndex--; - readIndex--; - } - - while (compareUnsigned(n, 0) > 0) { - long quotient = divideUnsigned(n, 10); - long remainder = remainderUnsigned(n, 10); - if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { - digits[writeIndex] = (byte) remainder; - } else if (remainder > 0) { - truncated = true; - } - n = quotient; - writeIndex--; - } - digitCount += numberOfAdditionalDigits; - if (digitCount > SLOW_PATH_MAX_DIGIT_COUNT) { - digitCount = SLOW_PATH_MAX_DIGIT_COUNT; - } - exp10 += numberOfAdditionalDigits; - trimTrailingZeros(); - } - - // See https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html#hpd-shifts - private int calculateNumberOfAdditionalDigitsAfterLeftShift(int shift) { - int a = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift]; - int b = NUMBER_OF_ADDITIONAL_DIGITS_AFTER_LEFT_SHIFT[shift + 1]; - int newDigitCount = a >> 11; - int pow5OffsetA = 0x7FF & a; - int pow5OffsetB = 0x7FF & b; - - int n = pow5OffsetB - pow5OffsetA; - for (int i = 0; i < n; i++) { - if (i >= digitCount) { - return newDigitCount - 1; - } else if (digits[i] < POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { - return newDigitCount - 1; - } else if (digits[i] > POWER_OF_FIVE_DIGITS[pow5OffsetA + i]) { - return newDigitCount; - } - } - return newDigitCount; - } - - void shiftRight(int shift) { - int readIndex = 0; - int writeIndex = 0; - long n = 0; - - while ((n >>> shift) == 0) { - if (readIndex < digitCount) { - n = (10 * n) + digits[readIndex++]; - } else if (n == 0) { - return; - } else { - while ((n >>> shift) == 0) { - n = 10 * n; - readIndex++; - } - break; - } - } - exp10 -= (readIndex - 1); - long mask = (1L << shift) - 1; - while (readIndex < digitCount) { - byte newDigit = (byte) (n >>> shift); - n = (10 * (n & mask)) + digits[readIndex++]; - digits[writeIndex++] = newDigit; - } - while (compareUnsigned(n, 0) > 0) { - byte newDigit = (byte) (n >>> shift); - n = 10 * (n & mask); - if (writeIndex < SLOW_PATH_MAX_DIGIT_COUNT) { - digits[writeIndex++] = newDigit; - } else if (newDigit > 0) { - truncated = true; - } - } - digitCount = writeIndex; - trimTrailingZeros(); - } - - private void trimTrailingZeros() { - while ((digitCount > 0) && (digits[digitCount - 1] == 0)) { - digitCount--; - } + long digits() { + return digits; } - private void reset() { - digitCount = 0; - exp10 = 0; - truncated = false; + int currentIdx() { + return currentIdx; } } } diff --git a/src/main/java/org/simdjson/NumberParserTables.java b/src/main/java/org/simdjson/NumberParserTables.java index 58f1fa0..3cd8751 100644 --- a/src/main/java/org/simdjson/NumberParserTables.java +++ b/src/main/java/org/simdjson/NumberParserTables.java @@ -73,6 +73,8 @@ class NumberParserTables { 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5 }; + static final int MIN_POWER_OF_FIVE = -342; + static final long[] POWERS_OF_FIVE = { 0xeef453d6923bd65aL, 0x113faa2906a13b3fL, 0x9558b4661b6565f8L, 0x4ac7ca59a424c507L, diff --git a/src/main/java/org/simdjson/OnDemandJsonIterator.java b/src/main/java/org/simdjson/OnDemandJsonIterator.java new file mode 100644 index 0000000..5376504 --- /dev/null +++ b/src/main/java/org/simdjson/OnDemandJsonIterator.java @@ -0,0 +1,675 @@ +package org.simdjson; + +import java.util.Arrays; + +import static org.simdjson.CharacterUtils.isStructuralOrWhitespace; + +class OnDemandJsonIterator { + + private static final byte SPACE = 0x20; + private static final int[] SKIP_DEPTH_PER_CHARACTER = new int[127]; + + static { + Arrays.fill(SKIP_DEPTH_PER_CHARACTER, 0); + SKIP_DEPTH_PER_CHARACTER['['] = 1; + SKIP_DEPTH_PER_CHARACTER['{'] = 1; + SKIP_DEPTH_PER_CHARACTER[']'] = -1; + SKIP_DEPTH_PER_CHARACTER['}'] = -1; + } + + private final BitIndexes indexer; + private final int padding; + private final StringParser stringParser = new StringParser(); + private final NumberParser numberParser = new NumberParser(); + + private byte[] buffer; + private int len; + private int depth; + + OnDemandJsonIterator(BitIndexes indexer, int padding) { + this.indexer = indexer; + this.padding = padding; + } + + void init(byte[] buffer, int len) { + if (indexer.isEnd()) { + throw new JsonParsingException("No structural element found."); + } + this.buffer = buffer; + this.len = len; + this.depth = 1; + } + + void skipChild() { + skipChild(depth - 1); + } + + void skipChild(int parentDepth) { + if (depth <= parentDepth) { + return; + } + int idx = indexer.getAndAdvance(); + byte character = buffer[idx]; + + switch (character) { + case '[', '{', ':', ',': + break; + case '"': + if (buffer[indexer.peek()] == ':') { + indexer.advance(); // skip ':' + break; + } + default: + depth--; + if (depth <= parentDepth) { + return; + } + } + + while (indexer.hasNext()) { + idx = indexer.getAndAdvance(); + character = buffer[idx]; + + int delta = SKIP_DEPTH_PER_CHARACTER[character]; + depth += delta; + if (delta < 0 && depth <= parentDepth) { + return; + } + } + + throw new JsonParsingException("Not enough close braces."); + } + + Boolean getRootNonNullBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitRootTrueAtom(idx); + case 'f' -> visitRootFalseAtom(idx); + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true' or 'false'."); + }; + assertNoMoreJsonValues(); + depth--; + return result; + } + + Boolean getRootBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitRootTrueAtom(idx); + case 'f' -> visitRootFalseAtom(idx); + case 'n' -> { + visitRootNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true', 'false' or 'null'."); + }; + assertNoMoreJsonValues(); + depth--; + return result; + } + + private Boolean visitRootTrueAtom(int idx) { + boolean valid = idx + 4 <= len && isTrue(idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'true'."); + } + return Boolean.TRUE; + } + + private Boolean visitRootFalseAtom(int idx) { + boolean valid = idx + 5 <= len && isFalse(idx) && (idx + 5 == len || isStructuralOrWhitespace(buffer[idx + 5])); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'false'."); + } + return Boolean.FALSE; + } + + private void visitRootNullAtom(int idx) { + boolean valid = idx + 4 <= len && isNull(idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'null'."); + } + } + + private void visitNullAtom(int idx) { + if (!isNull(idx)) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'null'."); + } + } + + private boolean isNull(int idx) { + return buffer[idx] == 'n' + && buffer[idx + 1] == 'u' + && buffer[idx + 2] == 'l' + && buffer[idx + 3] == 'l'; + } + + Boolean getNonNullBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitTrueAtom(idx); + case 'f' -> visitFalseAtom(idx); + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true' or 'false'."); + }; + depth--; + return result; + } + + Boolean getBoolean() { + int idx = indexer.getAndAdvance(); + Boolean result = switch (buffer[idx]) { + case 't' -> visitTrueAtom(idx); + case 'f' -> visitFalseAtom(idx); + case 'n' -> { + visitNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Unrecognized boolean value. Expected: 'true', 'false' or 'null'."); + }; + depth--; + return result; + } + + private Boolean visitTrueAtom(int idx) { + boolean valid = isTrue(idx) && isStructuralOrWhitespace(buffer[idx + 4]); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'true'."); + } + return Boolean.TRUE; + } + + private boolean isTrue(int idx) { + return buffer[idx] == 't' + && buffer[idx + 1] == 'r' + && buffer[idx + 2] == 'u' + && buffer[idx + 3] == 'e'; + } + + private Boolean visitFalseAtom(int idx) { + boolean valid = isFalse(idx) && isStructuralOrWhitespace(buffer[idx + 5]); + if (!valid) { + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'false'."); + } + return Boolean.FALSE; + } + + private boolean isFalse(int idx) { + return buffer[idx] == 'f' + && buffer[idx + 1] == 'a' + && buffer[idx + 2] == 'l' + && buffer[idx + 3] == 's' + && buffer[idx + 4] == 'e'; + } + + byte getRootNonNullByte() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + byte value = numberParser.parseByte(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Byte getRootByte() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + byte value = numberParser.parseByte(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + byte getNonNullByte() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseByte(buffer, len, idx); + } + + Byte getByte() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseByte(buffer, len, idx); + } + + short getRootNonNullShort() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + short value = numberParser.parseShort(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Short getRootShort() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + short value = numberParser.parseShort(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + short getNonNullShort() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseShort(buffer, len, idx); + } + + Short getShort() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseShort(buffer, len, idx); + } + + int getRootNonNullInt() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + int value = numberParser.parseInt(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Integer getRootInt() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + int value = numberParser.parseInt(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Integer getInt() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseInt(buffer, len, idx); + } + + int getNonNullInt() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseInt(buffer, len, idx); + } + + long getRootNonNullLong() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + long value = numberParser.parseLong(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Long getRootLong() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + long value = numberParser.parseLong(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + long getNonNullLong() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseLong(buffer, len, idx); + } + + Long getLong() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseLong(buffer, len, idx); + } + + float getRootNonNullFloat() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + float value = numberParser.parseFloat(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Float getRootFloat() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + float value = numberParser.parseFloat(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + double getRootNonNullDouble() { + depth--; + int idx = indexer.getAndAdvance(); + byte[] copy = padRootNumber(idx); + double value = numberParser.parseDouble(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + Double getRootDouble() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + assertNoMoreJsonValues(); + return null; + } + byte[] copy = padRootNumber(idx); + double value = numberParser.parseDouble(copy, len, 0); + assertNoMoreJsonValues(); + return value; + } + + private byte[] padRootNumber(int idx) { + int remainingLen = len - idx; + byte[] copy = new byte[remainingLen + padding]; + System.arraycopy(buffer, idx, copy, 0, remainingLen); + Arrays.fill(copy, remainingLen, remainingLen + padding, SPACE); + return copy; + } + + double getNonNullDouble() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseDouble(buffer, len, idx); + } + + Double getDouble() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseDouble(buffer, len, idx); + } + + float getNonNullFloat() { + depth--; + int idx = indexer.getAndAdvance(); + return numberParser.parseFloat(buffer, len, idx); + } + + Float getFloat() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + return null; + } + return numberParser.parseFloat(buffer, len, idx); + } + + int getRootString(byte[] stringBuffer) { + depth--; + int idx = indexer.getAndAdvance(); + int len = switch (buffer[idx]) { + case '"' -> stringParser.parseString(buffer, idx, stringBuffer); + case 'n' -> { + visitRootNullAtom(idx); + yield -1; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + assertNoMoreJsonValues(); + return len; + } + + int getString(byte[] stringBuffer) { + depth--; + int idx = indexer.getAndAdvance(); + return switch (buffer[idx]) { + case '"' -> stringParser.parseString(buffer, idx, stringBuffer); + case 'n' -> { + visitNullAtom(idx); + yield -1; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + } + + char getNonNullChar() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == '"') { + return stringParser.parseChar(buffer, idx); + } + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected string."); + } + + Character getChar() { + depth--; + int idx = indexer.getAndAdvance(); + return switch (buffer[idx]) { + case '"' -> stringParser.parseChar(buffer, idx); + case 'n' -> { + visitNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + } + + char getRootNonNullChar() { + depth--; + int idx = indexer.getAndAdvance(); + if (buffer[idx] == '"') { + char character = stringParser.parseChar(buffer, idx); + assertNoMoreJsonValues(); + return character; + } + throw new JsonParsingException("Invalid value starting at " + idx + ". Expected string."); + } + + Character getRootChar() { + depth--; + int idx = indexer.getAndAdvance(); + Character character = switch (buffer[idx]) { + case '"' -> stringParser.parseChar(buffer, idx); + case 'n' -> { + visitRootNullAtom(idx); + yield null; + } + default -> throw new JsonParsingException("Invalid value starting at " + idx + ". Expected either string or 'null'."); + }; + assertNoMoreJsonValues(); + return character; + } + + IteratorResult startIteratingArray() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '[') { + throw unexpectedCharException(idx, '['); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == ']') { + indexer.advance(); + depth--; + return IteratorResult.EMPTY; + } + depth++; + return IteratorResult.NOT_EMPTY; + } + + IteratorResult startIteratingRootArray() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '[') { + throw unexpectedCharException(idx, '['); + } + if (buffer[indexer.getLast()] != ']') { + throw new JsonParsingException("Unclosed array. Missing ']' for starting '['."); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == ']') { + indexer.advance(); + depth--; + assertNoMoreJsonValues(); + return IteratorResult.EMPTY; + } + depth++; + return IteratorResult.NOT_EMPTY; + } + + boolean nextArrayElement() { + int idx = indexer.getAndAdvance(); + if (buffer[idx] == ']') { + depth--; + return false; + } else if (buffer[idx] == ',') { + depth++; + return true; + } else { + throw new JsonParsingException("Missing comma between array values"); + } + } + + IteratorResult startIteratingObject() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '{') { + throw unexpectedCharException(idx, '{'); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == '}') { + indexer.advance(); + depth--; + return IteratorResult.EMPTY; + } + return IteratorResult.NOT_EMPTY; + } + + IteratorResult startIteratingRootObject() { + int idx = indexer.peek(); + if (buffer[idx] == 'n') { + visitRootNullAtom(idx); + indexer.advance(); + depth--; + return IteratorResult.NULL; + } + if (buffer[idx] != '{') { + throw unexpectedCharException(idx, '{'); + } + if (buffer[indexer.getLast()] != '}') { + throw new JsonParsingException("Unclosed object. Missing '}' for starting '{'."); + } + idx = indexer.advanceAndGet(); + if (buffer[idx] == '}') { + indexer.advance(); + depth--; + assertNoMoreJsonValues(); + return IteratorResult.EMPTY; + } + return IteratorResult.NOT_EMPTY; + } + + boolean nextObjectField() { + int idx = indexer.getAndAdvance(); + byte character = buffer[idx]; + if (character == '}') { + depth--; + return false; + } else if (character == ',') { + return true; + } else { + throw unexpectedCharException(idx, ','); + } + } + + void moveToFieldValue() { + int idx = indexer.getAndAdvance(); + if (buffer[idx] != ':') { + throw unexpectedCharException(idx, ':'); + } + depth++; + } + + int getFieldName(byte[] stringBuffer) { + int idx = indexer.getAndAdvance(); + if (buffer[idx] != '"') { + throw unexpectedCharException(idx, '"'); + } + return stringParser.parseString(buffer, idx, stringBuffer); + } + + int getDepth() { + return depth; + } + + private JsonParsingException unexpectedCharException(int idx, char expected) { + if (indexer.isPastEnd()) { + return new JsonParsingException("Expected '" + expected + "' but reached end of buffer."); + } else { + return new JsonParsingException("Expected '" + expected + "' but got: '" + (char) buffer[idx] + "'."); + } + } + + void assertNoMoreJsonValues() { + if (indexer.hasNext()) { + throw new JsonParsingException("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + } + + enum IteratorResult { + EMPTY, NULL, NOT_EMPTY + } +} diff --git a/src/main/java/org/simdjson/ResolvedClass.java b/src/main/java/org/simdjson/ResolvedClass.java new file mode 100644 index 0000000..67c6887 --- /dev/null +++ b/src/main/java/org/simdjson/ResolvedClass.java @@ -0,0 +1,165 @@ +package org.simdjson; + +import org.simdjson.annotations.JsonFieldName; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Modifier; +import java.lang.reflect.Parameter; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; +import java.nio.charset.StandardCharsets; +import java.util.List; + +class ResolvedClass { + + enum ResolvedClassCategory { + BOOLEAN_PRIMITIVE(boolean.class, new boolean[0]), + BOOLEAN(Boolean.class, new Boolean[0]), + BYTE_PRIMITIVE(byte.class, new byte[0]), + BYTE(Byte.class, new Byte[0]), + CHAR_PRIMITIVE(char.class, new char[0]), + CHAR(Character.class, new Character[0]), + SHORT_PRIMITIVE(short.class, new short[0]), + SHORT(Short.class, new Short[0]), + INT_PRIMITIVE(int.class, new int[0]), + INT(Integer.class, new Integer[0]), + LONG_PRIMITIVE(long.class, new long[0]), + LONG(Long.class, new Long[0]), + DOUBLE_PRIMITIVE(double.class, new double[0]), + DOUBLE(Double.class, new Double[0]), + FLOAT_PRIMITIVE(float.class, new float[0]), + FLOAT(Float.class, new Float[0]), + STRING(String.class, new String[0]), + CUSTOM(null, null), + ARRAY(null, null), + LIST(List.class, null); + + private final Class cclass; + private final Object emptyArray; + + ResolvedClassCategory(Class cclass, Object emptyArray) { + this.cclass = cclass; + this.emptyArray = emptyArray; + } + + Object getEmptyArray() { + return emptyArray; + } + } + + private final ResolvedClassCategory classCategory; + private final Class rawClass; + private final ResolvedClass elementClass; + private final Constructor constructor; + private final ConstructorArgumentsMap argumentsMap; + + ResolvedClass(Type targetType, ClassResolver classResolver) { + if (targetType instanceof ParameterizedType parameterizedType) { + rawClass = (Class) parameterizedType.getRawType(); + elementClass = resolveElementClass(parameterizedType, classResolver); + } else { + rawClass = (Class) targetType; + elementClass = resolveElementClass(rawClass, classResolver); + } + + classCategory = resolveClassType(rawClass); + if (classCategory == ResolvedClassCategory.CUSTOM) { + checkIfCustomClassIsSupported(rawClass); + constructor = rawClass.getDeclaredConstructors()[0]; + constructor.setAccessible(true); + Parameter[] parameters = constructor.getParameters(); + argumentsMap = new ConstructorArgumentsMap(parameters.length); + for (int i = 0; i < parameters.length; i++) { + Type parameterType = parameters[i].getAnnotatedType().getType(); + String fieldName = resolveFieldName(parameters[i], rawClass); + byte[] fieldNameBytes = fieldName.getBytes(StandardCharsets.UTF_8); + argumentsMap.put(fieldNameBytes, new ConstructorArgument(i, classResolver.resolveClass(parameterType))); + } + } else { + constructor = null; + argumentsMap = null; + } + } + + private static ResolvedClass resolveElementClass(ParameterizedType parameterizedType, ClassResolver classResolver) { + if (parameterizedType.getRawType() != List.class) { + throw new JsonParsingException("Parametrized types other than java.util.List are not supported."); + } + return classResolver.resolveClass(parameterizedType.getActualTypeArguments()[0]); + } + + private static ResolvedClass resolveElementClass(Class cls, ClassResolver classResolver) { + if (cls == List.class) { + throw new JsonParsingException("Undefined list element type."); + } + if (cls.componentType() != null) { + return classResolver.resolveClass(cls.componentType()); + } else { + return null; + } + } + + private static ResolvedClassCategory resolveClassType(Class cls) { + if (Iterable.class.isAssignableFrom(cls) && cls != List.class) { + throw new JsonParsingException("Unsupported class: " + cls.getName() + + ". For JSON arrays at the root, use Java arrays. For inner JSON arrays, use either Java arrays or java.util.List."); + } + if (cls.isArray()) { + return ResolvedClassCategory.ARRAY; + } + for (ResolvedClassCategory t : ResolvedClassCategory.values()) { + if (t.cclass == cls) { + return t; + } + } + return ResolvedClassCategory.CUSTOM; + } + + private static void checkIfCustomClassIsSupported(Class cls) { + int modifiers = cls.getModifiers(); + if (cls.isMemberClass() && !Modifier.isStatic(modifiers)) { + throw new JsonParsingException("Unsupported class: " + cls.getName() + ". Inner non-static classes are not supported."); + } + if (Modifier.isAbstract(modifiers) || Modifier.isInterface(modifiers)) { + throw new JsonParsingException("Unsupported class: " + cls.getName() + ". Interfaces and abstract classes are not supported."); + } + Constructor[] constructors = cls.getDeclaredConstructors(); + if (constructors.length > 1) { + throw new JsonParsingException("Class: " + cls.getName() + " has more than one constructor."); + } + if (constructors.length == 0) { + throw new JsonParsingException("Class: " + cls.getName() + " doesn't have any constructor."); + } + } + + private static String resolveFieldName(Parameter parameter, Class targetClass) { + JsonFieldName annotation = parameter.getAnnotation(JsonFieldName.class); + if (annotation != null) { + return annotation.value(); + } + if (!targetClass.isRecord()) { + throw new JsonParsingException("Some of " + targetClass.getName() + "'s constructor arguments are not annotated with @JsonFieldName."); + } + return parameter.getName(); + } + + ConstructorArgumentsMap getArgumentsMap() { + return argumentsMap; + } + + Constructor getConstructor() { + return constructor; + } + + ResolvedClassCategory getClassCategory() { + return classCategory; + } + + ResolvedClass getElementClass() { + return elementClass; + } + + Class getRawClass() { + return rawClass; + } +} diff --git a/src/main/java/org/simdjson/SchemaBasedJsonIterator.java b/src/main/java/org/simdjson/SchemaBasedJsonIterator.java new file mode 100644 index 0000000..b48595d --- /dev/null +++ b/src/main/java/org/simdjson/SchemaBasedJsonIterator.java @@ -0,0 +1,735 @@ +package org.simdjson; + +import org.simdjson.OnDemandJsonIterator.IteratorResult; +import org.simdjson.ResolvedClass.ResolvedClassCategory; + +import java.lang.reflect.Array; +import java.lang.reflect.InvocationTargetException; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; + +import static java.nio.charset.StandardCharsets.UTF_8; + +class SchemaBasedJsonIterator { + + private static final int INITIAL_ARRAY_SIZE = 16; + + private final ClassResolver classResolver; + private final OnDemandJsonIterator jsonIterator; + private final byte[] stringBuffer; + + SchemaBasedJsonIterator(BitIndexes bitIndexes, byte[] stringBuffer, int padding) { + this.jsonIterator = new OnDemandJsonIterator(bitIndexes, padding); + this.classResolver = new ClassResolver(); + this.stringBuffer = stringBuffer; + } + + @SuppressWarnings("unchecked") + T walkDocument(byte[] padded, int len, Class expectedType) { + jsonIterator.init(padded, len); + classResolver.reset(); + + ResolvedClass resolvedExpectedClass = classResolver.resolveClass(expectedType); + return switch (resolvedExpectedClass.getClassCategory()) { + case BOOLEAN_PRIMITIVE -> (T) jsonIterator.getRootNonNullBoolean(); + case BOOLEAN -> (T) jsonIterator.getRootBoolean(); + case BYTE_PRIMITIVE -> (T) Byte.valueOf(jsonIterator.getRootNonNullByte()); + case BYTE -> (T) jsonIterator.getRootByte(); + case SHORT_PRIMITIVE -> (T) Short.valueOf(jsonIterator.getRootNonNullShort()); + case SHORT -> (T) jsonIterator.getRootShort(); + case INT_PRIMITIVE -> (T) Integer.valueOf(jsonIterator.getRootNonNullInt()); + case INT -> (T) jsonIterator.getRootInt(); + case LONG_PRIMITIVE -> (T) Long.valueOf(jsonIterator.getRootNonNullLong()); + case LONG -> (T) jsonIterator.getRootLong(); + case FLOAT_PRIMITIVE -> (T) Float.valueOf(jsonIterator.getRootNonNullFloat()); + case FLOAT -> (T) jsonIterator.getRootFloat(); + case DOUBLE_PRIMITIVE -> (T) Double.valueOf(jsonIterator.getRootNonNullDouble()); + case DOUBLE -> (T) jsonIterator.getRootDouble(); + case CHAR_PRIMITIVE -> (T) Character.valueOf(jsonIterator.getRootNonNullChar()); + case CHAR -> (T) jsonIterator.getRootChar(); + case STRING -> (T) getRootString(); + case ARRAY -> (T) getRootArray(resolvedExpectedClass.getElementClass()); + case CUSTOM -> (T) getRootObject(resolvedExpectedClass); + case LIST -> throw new JsonParsingException("Lists at the root are not supported. Consider using an array instead."); + }; + } + + private Object getRootObject(ResolvedClass expectedClass) { + IteratorResult result = jsonIterator.startIteratingRootObject(); + Object object = getObject(expectedClass, result); + jsonIterator.assertNoMoreJsonValues(); + return object; + } + + private Object getObject(ResolvedClass expectedClass) { + IteratorResult result = jsonIterator.startIteratingObject(); + return getObject(expectedClass, result); + } + + private Object getObject(ResolvedClass expectedClass, IteratorResult result) { + if (result == IteratorResult.NOT_EMPTY) { + ConstructorArgumentsMap argumentsMap = expectedClass.getArgumentsMap(); + Object[] args = new Object[argumentsMap.getArgumentCount()]; + int parentDepth = jsonIterator.getDepth() - 1; + collectArguments(argumentsMap, args); + jsonIterator.skipChild(parentDepth); + return createObject(expectedClass, args); + } else if (result == IteratorResult.EMPTY) { + ConstructorArgumentsMap argumentsMap = expectedClass.getArgumentsMap(); + Object[] args = new Object[argumentsMap.getArgumentCount()]; + return createObject(expectedClass, args); + } + return null; + } + + private Object createObject(ResolvedClass expectedClass, Object[] args) { + try { + return expectedClass.getConstructor().newInstance(args); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new JsonParsingException("Failed to construct an instance of " + expectedClass.getRawClass().getName(), e); + } + } + + private void collectArguments(ConstructorArgumentsMap argumentsMap, Object[] args) { + int collected = 0; + int argLen = args.length; + boolean hasFields = true; + while (collected < argLen && hasFields) { + int fieldNameLen = jsonIterator.getFieldName(stringBuffer); + jsonIterator.moveToFieldValue(); + ConstructorArgument argument = argumentsMap.get(stringBuffer, fieldNameLen); + if (argument != null) { + ResolvedClass argumentClass = argument.resolvedClass(); + collectArgument(argumentClass, args, argument); + collected++; + } else { + jsonIterator.skipChild(); + } + hasFields = jsonIterator.nextObjectField(); + } + } + + private void collectArgument(ResolvedClass argumentClass, Object[] args, ConstructorArgument argument) { + args[argument.idx()] = switch (argumentClass.getClassCategory()) { + case BOOLEAN_PRIMITIVE -> jsonIterator.getNonNullBoolean(); + case BOOLEAN -> jsonIterator.getBoolean(); + case BYTE_PRIMITIVE -> jsonIterator.getNonNullByte(); + case BYTE -> jsonIterator.getByte(); + case SHORT_PRIMITIVE -> jsonIterator.getNonNullShort(); + case SHORT -> jsonIterator.getShort(); + case INT_PRIMITIVE -> jsonIterator.getNonNullInt(); + case INT -> jsonIterator.getInt(); + case LONG_PRIMITIVE -> jsonIterator.getNonNullLong(); + case LONG -> jsonIterator.getLong(); + case FLOAT_PRIMITIVE -> jsonIterator.getNonNullFloat(); + case FLOAT -> jsonIterator.getFloat(); + case DOUBLE_PRIMITIVE -> jsonIterator.getNonNullDouble(); + case DOUBLE -> jsonIterator.getDouble(); + case CHAR_PRIMITIVE -> jsonIterator.getNonNullChar(); + case CHAR -> jsonIterator.getChar(); + case STRING -> getString(); + case ARRAY -> getArray(argumentClass.getElementClass()); + case LIST -> getList(argumentClass.getElementClass()); + case CUSTOM -> getObject(argument.resolvedClass()); + }; + } + + private List getList(ResolvedClass elementType) { + IteratorResult result = jsonIterator.startIteratingArray(); + if (result == IteratorResult.EMPTY) { + return Collections.emptyList(); + } + if (result == IteratorResult.NULL) { + return null; + } + + LinkedList list = new LinkedList<>(); + boolean hasElements = true; + + switch (elementType.getClassCategory()) { + case BOOLEAN -> { + while (hasElements) { + list.add(jsonIterator.getBoolean()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case BYTE -> { + while (hasElements) { + list.add(jsonIterator.getByte()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case CHAR -> { + while (hasElements) { + list.add(jsonIterator.getChar()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case SHORT -> { + while (hasElements) { + list.add(jsonIterator.getShort()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case INT -> { + while (hasElements) { + list.add(jsonIterator.getInt()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case LONG -> { + while (hasElements) { + list.add(jsonIterator.getLong()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case DOUBLE -> { + while (hasElements) { + list.add(jsonIterator.getDouble()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case FLOAT -> { + while (hasElements) { + list.add(jsonIterator.getFloat()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case STRING -> { + while (hasElements) { + list.add(getString()); + hasElements = jsonIterator.nextArrayElement(); + } + } + case CUSTOM -> { + while (hasElements) { + list.add(getObject(elementType)); + hasElements = jsonIterator.nextArrayElement(); + } + } + case ARRAY -> { + while (hasElements) { + list.add(getArray(elementType.getElementClass())); + hasElements = jsonIterator.nextArrayElement(); + } + } + case LIST -> { + while (hasElements) { + list.add(getList(elementType.getElementClass())); + hasElements = jsonIterator.nextArrayElement(); + } + } + default -> throw new JsonParsingException("Unsupported array element type: " + elementType.getRawClass().getName()); + } + + return list; + } + + private Object getRootArray(ResolvedClass elementType) { + IteratorResult result = jsonIterator.startIteratingRootArray(); + Object array = getArray(elementType, result); + jsonIterator.assertNoMoreJsonValues(); + return array; + } + + private Object getArray(ResolvedClass elementType) { + IteratorResult result = jsonIterator.startIteratingArray(); + return getArray(elementType, result); + } + + private Object getArray(ResolvedClass elementType, IteratorResult result) { + if (result == IteratorResult.EMPTY) { + ResolvedClassCategory classCategory = elementType.getClassCategory(); + return classCategory.getEmptyArray() != null ? classCategory.getEmptyArray() : Array.newInstance(elementType.getRawClass(), 0); + } + if (result == IteratorResult.NULL) { + return null; + } + + return switch (elementType.getClassCategory()) { + case BOOLEAN_PRIMITIVE -> getPrimitiveBooleanArray(); + case BOOLEAN -> getBooleanArray(); + case BYTE_PRIMITIVE -> getBytePrimitiveArray(); + case BYTE -> getByteArray(); + case CHAR_PRIMITIVE -> getCharPrimitiveArray(); + case CHAR -> getCharArray(); + case SHORT_PRIMITIVE -> getShortPrimitiveArray(); + case SHORT -> getShortArray(); + case INT_PRIMITIVE -> getIntPrimitiveArray(); + case INT -> getIntArray(); + case LONG_PRIMITIVE -> getLongPrimitiveArray(); + case LONG -> getLongArray(); + case DOUBLE_PRIMITIVE -> getDoublePrimitiveArray(); + case DOUBLE -> getDoubleArray(); + case FLOAT_PRIMITIVE -> getFloatPrimitiveArray(); + case FLOAT -> getFloatArray(); + case STRING -> getStringArray(); + case CUSTOM -> getCustomObjectArray(elementType); + case ARRAY -> getArrayOfArrays(elementType); + case LIST -> throw new JsonParsingException("Arrays of lists are not supported."); + }; + } + + private Object getFloatArray() { + Float[] array = new Float[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Float[] copy = new Float[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getFloat(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Float[] copy = new Float[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getFloatPrimitiveArray() { + float[] array = new float[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + float[] copy = new float[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullFloat(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + float[] copy = new float[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getDoubleArray() { + Double[] array = new Double[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Double[] copy = new Double[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getDouble(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Double[] copy = new Double[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getDoublePrimitiveArray() { + double[] array = new double[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + double[] copy = new double[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullDouble(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + double[] copy = new double[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getLongPrimitiveArray() { + long[] array = new long[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + long[] copy = new long[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullLong(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + long[] copy = new long[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getLongArray() { + Long[] array = new Long[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Long[] copy = new Long[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getLong(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Long[] copy = new Long[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getShortPrimitiveArray() { + short[] array = new short[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + short[] copy = new short[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullShort(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + short[] copy = new short[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getShortArray() { + Short[] array = new Short[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Short[] copy = new Short[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getShort(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Short[] copy = new Short[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object[] getCustomObjectArray(ResolvedClass elementType) { + Object[] array = (Object[]) Array.newInstance(elementType.getRawClass(), INITIAL_ARRAY_SIZE); + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), newCapacity); + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = getObject(elementType); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), size); + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object[] getArrayOfArrays(ResolvedClass elementType) { + Object[] array = (Object[]) Array.newInstance(elementType.getRawClass(), INITIAL_ARRAY_SIZE); + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), newCapacity); + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = getArray(elementType.getElementClass()); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Object[] copy = (Object[]) Array.newInstance(elementType.getRawClass(), size); + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Integer[] getIntArray() { + Integer[] array = new Integer[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Integer[] copy = new Integer[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getInt(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Integer[] copy = new Integer[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private int[] getIntPrimitiveArray() { + int[] array = new int[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + int[] copy = new int[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullInt(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + int[] copy = new int[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getCharArray() { + Character[] array = new Character[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Character[] copy = new Character[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getChar(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Character[] copy = new Character[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private char[] getCharPrimitiveArray() { + char[] array = new char[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + char[] copy = new char[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullChar(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + char[] copy = new char[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Object getByteArray() { + Byte[] array = new Byte[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Byte[] copy = new Byte[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getByte(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Byte[] copy = new Byte[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private byte[] getBytePrimitiveArray() { + byte[] array = new byte[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + byte[] copy = new byte[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullByte(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + byte[] copy = new byte[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private Boolean[] getBooleanArray() { + Boolean[] array = new Boolean[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + Boolean[] copy = new Boolean[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getBoolean(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + Boolean[] copy = new Boolean[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private boolean[] getPrimitiveBooleanArray() { + boolean[] array = new boolean[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + boolean[] copy = new boolean[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = jsonIterator.getNonNullBoolean(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + boolean[] copy = new boolean[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private String[] getStringArray() { + String[] array = new String[INITIAL_ARRAY_SIZE]; + int size = 0; + boolean hasElements = true; + while (hasElements) { + int oldCapacity = array.length; + if (size == oldCapacity) { + int newCapacity = calculateNewCapacity(oldCapacity); + String[] copy = new String[newCapacity]; + System.arraycopy(array, 0, copy, 0, oldCapacity); + array = copy; + } + array[size++] = getString(); + hasElements = jsonIterator.nextArrayElement(); + } + if (size != array.length) { + String[] copy = new String[size]; + System.arraycopy(array, 0, copy, 0, size); + array = copy; + } + return array; + } + + private static int calculateNewCapacity(int oldCapacity) { + int minCapacity = oldCapacity + 1; + int newCapacity = oldCapacity + (oldCapacity >> 1); + if (newCapacity - minCapacity < 0) { + newCapacity = minCapacity; + } + return newCapacity; + } + + private String getString() { + int len = jsonIterator.getString(stringBuffer); + if (len == -1) { + return null; + } + return new String(stringBuffer, 0, len, UTF_8); + } + + private String getRootString() { + int len = jsonIterator.getRootString(stringBuffer); + if (len == -1) { + return null; + } + return new String(stringBuffer, 0, len, UTF_8); + } +} diff --git a/src/main/java/org/simdjson/SimdJsonParser.java b/src/main/java/org/simdjson/SimdJsonParser.java index 2ca2d1a..707124c 100644 --- a/src/main/java/org/simdjson/SimdJsonParser.java +++ b/src/main/java/org/simdjson/SimdJsonParser.java @@ -2,15 +2,14 @@ public class SimdJsonParser { - private static final int STEP_SIZE = 64; private static final int PADDING = 64; private static final int DEFAULT_CAPACITY = 34 * 1024 * 1024; // we should be able to handle jsons <= 34MiB private static final int DEFAULT_MAX_DEPTH = 1024; - private final BlockReader reader; private final StructuralIndexer indexer; private final BitIndexes bitIndexes; private final JsonIterator jsonIterator; + private final SchemaBasedJsonIterator schemaBasedJsonIterator; private final byte[] paddedBuffer; public SimdJsonParser() { @@ -19,18 +18,25 @@ public SimdJsonParser() { public SimdJsonParser(int capacity, int maxDepth) { bitIndexes = new BitIndexes(capacity); - jsonIterator = new JsonIterator(bitIndexes, capacity, maxDepth, PADDING); + byte[] stringBuffer = new byte[capacity]; + jsonIterator = new JsonIterator(bitIndexes, stringBuffer, capacity, maxDepth, PADDING); + schemaBasedJsonIterator = new SchemaBasedJsonIterator(bitIndexes, stringBuffer, PADDING); paddedBuffer = new byte[capacity]; - reader = new BlockReader(STEP_SIZE); indexer = new StructuralIndexer(bitIndexes); } + public T parse(byte[] buffer, int len, Class expectedType) { + byte[] padded = padIfNeeded(buffer, len); + reset(); + stage1(padded, len); + return schemaBasedJsonIterator.walkDocument(padded, len, expectedType); + } + public JsonValue parse(byte[] buffer, int len) { - stage0(buffer); byte[] padded = padIfNeeded(buffer, len); - reset(padded, len); - stage1(padded); - return stage2(padded, len); + reset(); + stage1(padded, len); + return jsonIterator.walkDocument(padded, len); } private byte[] padIfNeeded(byte[] buffer, int len) { @@ -41,29 +47,13 @@ private byte[] padIfNeeded(byte[] buffer, int len) { return buffer; } - private void reset(byte[] buffer, int len) { - indexer.reset(); - reader.reset(buffer, len); + private void reset() { bitIndexes.reset(); jsonIterator.reset(); } - private void stage0(byte[] buffer) { - Utf8Validator.validate(buffer); - } - - private void stage1(byte[] buffer) { - while (reader.hasFullBlock()) { - int blockIndex = reader.getBlockIndex(); - indexer.step(buffer, blockIndex, blockIndex); - reader.advance(); - } - indexer.step(reader.remainder(), 0, reader.getBlockIndex()); - reader.advance(); - indexer.finish(reader.getBlockIndex()); - } - - private JsonValue stage2(byte[] buffer, int len) { - return jsonIterator.walkDocument(buffer, len); + private void stage1(byte[] buffer, int length) { + Utf8Validator.validate(buffer, length); + indexer.index(buffer, length); } } diff --git a/src/main/java/org/simdjson/StringParser.java b/src/main/java/org/simdjson/StringParser.java index 11fb7fd..6452de9 100644 --- a/src/main/java/org/simdjson/StringParser.java +++ b/src/main/java/org/simdjson/StringParser.java @@ -4,34 +4,33 @@ import static org.simdjson.CharacterUtils.escape; import static org.simdjson.CharacterUtils.hexToInt; -import static org.simdjson.Tape.STRING; class StringParser { private static final byte BACKSLASH = '\\'; private static final byte QUOTE = '"'; - private static final int BYTES_PROCESSED = StructuralIndexer.BYTE_SPECIES.vectorByteSize(); + private static final int BYTES_PROCESSED = VectorUtils.BYTE_SPECIES.vectorByteSize(); private static final int MIN_HIGH_SURROGATE = 0xD800; private static final int MAX_HIGH_SURROGATE = 0xDBFF; private static final int MIN_LOW_SURROGATE = 0xDC00; private static final int MAX_LOW_SURROGATE = 0xDFFF; - private final Tape tape; - private final byte[] stringBuffer; - - private int stringBufferIdx; + int parseString(byte[] buffer, int idx, byte[] stringBuffer, int stringBufferIdx) { + int dst = doParseString(buffer, idx, stringBuffer, stringBufferIdx + Integer.BYTES); + int len = dst - stringBufferIdx - Integer.BYTES; + IntegerUtils.toBytes(len, stringBuffer, stringBufferIdx); + return dst; + } - StringParser(Tape tape, byte[] stringBuffer) { - this.tape = tape; - this.stringBuffer = stringBuffer; + int parseString(byte[] buffer, int idx, byte[] stringBuffer) { + return doParseString(buffer, idx, stringBuffer, 0); } - void parseString(byte[] buffer, int idx) { - tape.append(stringBufferIdx, STRING); + private int doParseString(byte[] buffer, int idx, byte[] stringBuffer, int offset) { int src = idx + 1; - int dst = stringBufferIdx + Integer.BYTES; + int dst = offset; while (true) { - ByteVector srcVec = ByteVector.fromArray(StructuralIndexer.BYTE_SPECIES, buffer, src); + ByteVector srcVec = ByteVector.fromArray(VectorUtils.BYTE_SPECIES, buffer, src); srcVec.intoArray(stringBuffer, dst); long backslashBits = srcVec.eq(BACKSLASH).toLong(); long quoteBits = srcVec.eq(QUOTE).toLong(); @@ -54,7 +53,7 @@ void parseString(byte[] buffer, int idx) { } else if (codePoint >= MIN_LOW_SURROGATE && codePoint <= MAX_LOW_SURROGATE) { throw new JsonParsingException("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); } - dst += storeCodePointInStringBuffer(codePoint, dst); + dst += storeCodePointInStringBuffer(codePoint, dst, stringBuffer); } else { stringBuffer[dst + backslashDist] = escape(escapeChar); src += backslashDist + 2; @@ -65,9 +64,49 @@ void parseString(byte[] buffer, int idx) { dst += BYTES_PROCESSED; } } - int len = dst - stringBufferIdx - Integer.BYTES; - IntegerUtils.toBytes(len, stringBuffer, stringBufferIdx); - stringBufferIdx = dst; + return dst; + } + + char parseChar(byte[] buffer, int startIdx) { + int idx = startIdx + 1; + char character; + if (buffer[idx] == '\\') { + byte escapeChar = buffer[idx + 1]; + if (escapeChar == 'u') { + int codePoint = hexToInt(buffer, idx + 2); + if (codePoint >= MIN_HIGH_SURROGATE && codePoint <= MAX_LOW_SURROGATE) { + throw new JsonParsingException("Invalid code point. Should be within the range U+0000–U+D777 or U+E000–U+FFFF."); + } + if (codePoint < 0) { + throw new JsonParsingException("Invalid unicode escape sequence."); + } + character = (char) codePoint; + idx += 6; + } else { + character = (char) escape(escapeChar); + idx += 2; + } + } else if (buffer[idx] >= 0) { + // We have an ASCII character + character = (char) buffer[idx]; + idx++; + } else if ((buffer[idx] & 0b11100000) == 0b11000000) { + // We have a two-byte UTF-8 character + int codePoint = (buffer[idx] & 0b00011111) << 6 | (buffer[idx + 1] & 0b00111111); + character = (char) codePoint; + idx += 2; + } else if ((buffer[idx] & 0b11110000) == 0b11100000) { + // We have a three-byte UTF-8 character + int codePoint = (buffer[idx] & 0b00001111) << 12 | (buffer[idx + 1] & 0b00111111) << 6 | (buffer[idx + 2] & 0b00111111); + character = (char) codePoint; + idx += 3; + } else { + throw new JsonParsingException("String cannot be deserialized to a char. Expected a single 16-bit code unit character."); + } + if (buffer[idx] != '"') { + throw new JsonParsingException("String cannot be deserialized to a char. Expected a single-character string."); + } + return character; } private int parseLowSurrogate(byte[] buffer, int src, int codePoint) { @@ -84,7 +123,7 @@ private int parseLowSurrogate(byte[] buffer, int src, int codePoint) { } } - private int storeCodePointInStringBuffer(int codePoint, int dst) { + private int storeCodePointInStringBuffer(int codePoint, int dst, byte[] stringBuffer) { if (codePoint < 0) { throw new JsonParsingException("Invalid unicode escape sequence."); } @@ -120,8 +159,4 @@ private boolean hasQuoteFirst(long backslashBits, long quoteBits) { private boolean hasBackslash(long backslashBits, long quoteBits) { return ((quoteBits - 1) & backslashBits) != 0; } - - void reset() { - stringBufferIdx = 0; - } } diff --git a/src/main/java/org/simdjson/StructuralIndexer.java b/src/main/java/org/simdjson/StructuralIndexer.java index 43ec952..3720fda 100644 --- a/src/main/java/org/simdjson/StructuralIndexer.java +++ b/src/main/java/org/simdjson/StructuralIndexer.java @@ -1,122 +1,320 @@ package org.simdjson; import jdk.incubator.vector.ByteVector; -import jdk.incubator.vector.IntVector; -import jdk.incubator.vector.VectorShape; -import jdk.incubator.vector.VectorSpecies; +import jdk.incubator.vector.VectorShuffle; +import java.util.Arrays; + +import static jdk.incubator.vector.ByteVector.SPECIES_256; +import static jdk.incubator.vector.ByteVector.SPECIES_512; import static jdk.incubator.vector.VectorOperators.UNSIGNED_LE; class StructuralIndexer { - static final VectorSpecies INT_SPECIES; - static final VectorSpecies BYTE_SPECIES; - static final int N_CHUNKS; + private static final int VECTOR_BIT_SIZE = VectorUtils.BYTE_SPECIES.vectorBitSize(); + private static final int STEP_SIZE = 64; + private static final byte BACKSLASH = (byte) '\\'; + private static final byte QUOTE = (byte) '"'; + private static final byte SPACE = 0x20; + private static final byte LAST_CONTROL_CHARACTER = (byte) 0x1F; + private static final long EVEN_BITS_MASK = 0x5555555555555555L; + private static final long ODD_BITS_MASK = ~EVEN_BITS_MASK; + private static final byte LOW_NIBBLE_MASK = 0x0f; + private static final ByteVector WHITESPACE_TABLE = VectorUtils.repeat( + new byte[]{' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100} + ); + private static final ByteVector OP_TABLE = VectorUtils.repeat( + new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ':', '{', ',', '}', 0, 0} + ); + private static final byte[] LAST_BLOCK_SPACES = new byte[STEP_SIZE]; static { - String species = System.getProperty("org.simdjson.species", "preferred"); - switch (species) { - case "preferred" -> { - BYTE_SPECIES = ByteVector.SPECIES_PREFERRED; - INT_SPECIES = IntVector.SPECIES_PREFERRED; - } - case "512" -> { - BYTE_SPECIES = ByteVector.SPECIES_512; - INT_SPECIES = IntVector.SPECIES_512; - } - case "256" -> { - BYTE_SPECIES = ByteVector.SPECIES_256; - INT_SPECIES = IntVector.SPECIES_256; - } - default -> throw new IllegalArgumentException("Unsupported vector species: " + species); - } - N_CHUNKS = 64 / BYTE_SPECIES.vectorByteSize(); - assertSupportForSpecies(BYTE_SPECIES); - assertSupportForSpecies(INT_SPECIES); + Arrays.fill(LAST_BLOCK_SPACES, SPACE); } - private static void assertSupportForSpecies(VectorSpecies species) { - if (species.vectorShape() != VectorShape.S_256_BIT && species.vectorShape() != VectorShape.S_512_BIT) { - throw new IllegalArgumentException("Unsupported vector species: " + species); - } - } - - private final JsonStringScanner stringScanner; - private final CharactersClassifier classifier; private final BitIndexes bitIndexes; - - private long prevStructurals = 0; - private long unescapedCharsError = 0; - private long prevScalar = 0; + private final byte[] lastBlock = new byte[STEP_SIZE]; StructuralIndexer(BitIndexes bitIndexes) { - this.stringScanner = new JsonStringScanner(); - this.classifier = new CharactersClassifier(); this.bitIndexes = bitIndexes; } - void step(byte[] buffer, int offset, int blockIndex) { - switch (N_CHUNKS) { - case 1: step1(buffer, offset, blockIndex); break; - case 2: step2(buffer, offset, blockIndex); break; - default: throw new RuntimeException("Unsupported vector width: " + N_CHUNKS * 64); + void index(byte[] buffer, int length) { + bitIndexes.reset(); + switch (VECTOR_BIT_SIZE) { + case 256 -> index256(buffer, length); + case 512 -> index512(buffer, length); + default -> throw new UnsupportedOperationException("Unsupported vector width: " + VECTOR_BIT_SIZE * 64); } } - private void step1(byte[] buffer, int offset, int blockIndex) { - ByteVector chunk0 = ByteVector.fromArray(ByteVector.SPECIES_512, buffer, offset); - JsonStringBlock strings = stringScanner.next(chunk0); - JsonCharacterBlock characters = classifier.classify(chunk0); - long unescaped = lteq(chunk0, (byte) 0x1F); - finishStep(characters, strings, unescaped, blockIndex); - } + private void index256(byte[] buffer, int length) { + long prevInString = 0; + long prevEscaped = 0; + long prevStructurals = 0; + long unescapedCharsError = 0; + long prevScalar = 0; - private void step2(byte[] buffer, int offset, int blockIndex) { - ByteVector chunk0 = ByteVector.fromArray(ByteVector.SPECIES_256, buffer, offset); - ByteVector chunk1 = ByteVector.fromArray(ByteVector.SPECIES_256, buffer, offset + 32); - JsonStringBlock strings = stringScanner.next(chunk0, chunk1); - JsonCharacterBlock characters = classifier.classify(chunk0, chunk1); - long unescaped = lteq(chunk0, chunk1, (byte) 0x1F); - finishStep(characters, strings, unescaped, blockIndex); - } + // Using SPECIES_512 here is not a mistake. Each iteration of the below loop processes two 256-bit chunks, + // so effectively it processes 512 bits at once. + int loopBound = SPECIES_512.loopBound(length); + int offset = 0; + int blockIndex = 0; + for (; offset < loopBound; offset += STEP_SIZE) { + ByteVector chunk0 = ByteVector.fromArray(SPECIES_256, buffer, offset); + ByteVector chunk1 = ByteVector.fromArray(SPECIES_256, buffer, offset + 32); + + // string scanning + long backslash0 = chunk0.eq(BACKSLASH).toLong(); + long backslash1 = chunk1.eq(BACKSLASH).toLong(); + long backslash = backslash0 | (backslash1 << 32); + + long escaped; + if (backslash == 0) { + escaped = prevEscaped; + prevEscaped = 0; + } else { + backslash &= ~prevEscaped; + long followsEscape = backslash << 1 | prevEscaped; + long oddSequenceStarts = backslash & ODD_BITS_MASK & ~followsEscape; + + long sequencesStartingOnEvenBits = oddSequenceStarts + backslash; + // Here, we check if the unsigned addition above caused an overflow. If that's the case, we store 1 in prevEscaped. + // The formula used to detect overflow was taken from 'Hacker's Delight, Second Edition' by Henry S. Warren, Jr., + // Chapter 2-13. + prevEscaped = ((oddSequenceStarts >>> 1) + (backslash >>> 1) + ((oddSequenceStarts & backslash) & 1)) >>> 63; + + long invertMask = sequencesStartingOnEvenBits << 1; + escaped = (EVEN_BITS_MASK ^ invertMask) & followsEscape; + } + + long unescaped0 = chunk0.compare(UNSIGNED_LE, LAST_CONTROL_CHARACTER).toLong(); + long unescaped1 = chunk1.compare(UNSIGNED_LE, LAST_CONTROL_CHARACTER).toLong(); + long unescaped = unescaped0 | (unescaped1 << 32); + + long quote0 = chunk0.eq(QUOTE).toLong(); + long quote1 = chunk1.eq(QUOTE).toLong(); + long quote = (quote0 | (quote1 << 32)) & ~escaped; + + long inString = prefixXor(quote) ^ prevInString; + prevInString = inString >> 63; + + // characters classification + VectorShuffle chunk0Low = chunk0.and(LOW_NIBBLE_MASK).toShuffle(); + VectorShuffle chunk1Low = chunk1.and(LOW_NIBBLE_MASK).toShuffle(); + + long whitespace0 = chunk0.eq(WHITESPACE_TABLE.rearrange(chunk0Low)).toLong(); + long whitespace1 = chunk1.eq(WHITESPACE_TABLE.rearrange(chunk1Low)).toLong(); + long whitespace = whitespace0 | (whitespace1 << 32); + + ByteVector curlified0 = chunk0.or((byte) 0x20); + ByteVector curlified1 = chunk1.or((byte) 0x20); + long op0 = curlified0.eq(OP_TABLE.rearrange(chunk0Low)).toLong(); + long op1 = curlified1.eq(OP_TABLE.rearrange(chunk1Low)).toLong(); + long op = op0 | (op1 << 32); + + // finish + long scalar = ~(op | whitespace); + long nonQuoteScalar = scalar & ~quote; + long followsNonQuoteScalar = nonQuoteScalar << 1 | prevScalar; + prevScalar = nonQuoteScalar >>> 63; + long potentialScalarStart = scalar & ~followsNonQuoteScalar; + long potentialStructuralStart = op | potentialScalarStart; + bitIndexes.write(blockIndex, prevStructurals); + blockIndex += STEP_SIZE; + prevStructurals = potentialStructuralStart & ~(inString ^ quote); + unescapedCharsError |= unescaped & inString; + } + + byte[] remainder = remainder(buffer, length, blockIndex); + ByteVector chunk0 = ByteVector.fromArray(SPECIES_256, remainder, 0); + ByteVector chunk1 = ByteVector.fromArray(SPECIES_256, remainder, 32); + + // string scanning + long backslash0 = chunk0.eq(BACKSLASH).toLong(); + long backslash1 = chunk1.eq(BACKSLASH).toLong(); + long backslash = backslash0 | (backslash1 << 32); + + long escaped; + if (backslash == 0) { + escaped = prevEscaped; + } else { + backslash &= ~prevEscaped; + long followsEscape = backslash << 1 | prevEscaped; + long oddSequenceStarts = backslash & ODD_BITS_MASK & ~followsEscape; + + long sequencesStartingOnEvenBits = oddSequenceStarts + backslash; + long invertMask = sequencesStartingOnEvenBits << 1; + escaped = (EVEN_BITS_MASK ^ invertMask) & followsEscape; + } + + long unescaped0 = chunk0.compare(UNSIGNED_LE, LAST_CONTROL_CHARACTER).toLong(); + long unescaped1 = chunk1.compare(UNSIGNED_LE, LAST_CONTROL_CHARACTER).toLong(); + long unescaped = unescaped0 | (unescaped1 << 32); + + long quote0 = chunk0.eq(QUOTE).toLong(); + long quote1 = chunk1.eq(QUOTE).toLong(); + long quote = (quote0 | (quote1 << 32)) & ~escaped; + + long inString = prefixXor(quote) ^ prevInString; + prevInString = inString >> 63; + + // characters classification + VectorShuffle chunk0Low = chunk0.and(LOW_NIBBLE_MASK).toShuffle(); + VectorShuffle chunk1Low = chunk1.and(LOW_NIBBLE_MASK).toShuffle(); - private void finishStep(JsonCharacterBlock characters, JsonStringBlock strings, long unescaped, int blockIndex) { - long scalar = characters.scalar(); - long nonQuoteScalar = scalar & ~strings.quote(); + long whitespace0 = chunk0.eq(WHITESPACE_TABLE.rearrange(chunk0Low)).toLong(); + long whitespace1 = chunk1.eq(WHITESPACE_TABLE.rearrange(chunk1Low)).toLong(); + long whitespace = whitespace0 | (whitespace1 << 32); + + ByteVector curlified0 = chunk0.or((byte) 0x20); + ByteVector curlified1 = chunk1.or((byte) 0x20); + long op0 = curlified0.eq(OP_TABLE.rearrange(chunk0Low)).toLong(); + long op1 = curlified1.eq(OP_TABLE.rearrange(chunk1Low)).toLong(); + long op = op0 | (op1 << 32); + + // finish + long scalar = ~(op | whitespace); + long nonQuoteScalar = scalar & ~quote; long followsNonQuoteScalar = nonQuoteScalar << 1 | prevScalar; - prevScalar = nonQuoteScalar >>> 63; - // TODO: utf-8 validation long potentialScalarStart = scalar & ~followsNonQuoteScalar; - long potentialStructuralStart = characters.op() | potentialScalarStart; + long potentialStructuralStart = op | potentialScalarStart; bitIndexes.write(blockIndex, prevStructurals); - prevStructurals = potentialStructuralStart & ~strings.stringTail(); - unescapedCharsError |= strings.nonQuoteInsideString(unescaped); + blockIndex += STEP_SIZE; + prevStructurals = potentialStructuralStart & ~(inString ^ quote); + unescapedCharsError |= unescaped & inString; + bitIndexes.write(blockIndex, prevStructurals); + bitIndexes.finish(); + if (prevInString != 0) { + throw new JsonParsingException("Unclosed string. A string is opened, but never closed."); + } + if (unescapedCharsError != 0) { + throw new JsonParsingException("Unescaped characters. Within strings, there are characters that should be escaped."); + } } - private long lteq(ByteVector chunk0, byte scalar) { - long r = chunk0.compare(UNSIGNED_LE, scalar).toLong(); - return r; - } + private void index512(byte[] buffer, int length) { + long prevInString = 0; + long prevEscaped = 0; + long prevStructurals = 0; + long unescapedCharsError = 0; + long prevScalar = 0; - private long lteq(ByteVector chunk0, ByteVector chunk1, byte scalar) { - long r0 = chunk0.compare(UNSIGNED_LE, scalar).toLong(); - long r1 = chunk1.compare(UNSIGNED_LE, scalar).toLong(); - return r0 | (r1 << 32); - } + int loopBound = SPECIES_512.loopBound(length); + int offset = 0; + int blockIndex = 0; + for (; offset < loopBound; offset += STEP_SIZE) { + ByteVector chunk = ByteVector.fromArray(SPECIES_512, buffer, offset); - void finish(int blockIndex) { - bitIndexes.write(blockIndex, prevStructurals); + // string scanning + long backslash = chunk.eq(BACKSLASH).toLong(); + + long escaped; + if (backslash == 0) { + escaped = prevEscaped; + prevEscaped = 0; + } else { + backslash &= ~prevEscaped; + long followsEscape = backslash << 1 | prevEscaped; + long oddSequenceStarts = backslash & ODD_BITS_MASK & ~followsEscape; + + long sequencesStartingOnEvenBits = oddSequenceStarts + backslash; + // Here, we check if the unsigned addition above caused an overflow. If that's the case, we store 1 in prevEscaped. + // The formula used to detect overflow was taken from 'Hacker's Delight, Second Edition' by Henry S. Warren, Jr., + // Chapter 2-13. + prevEscaped = ((oddSequenceStarts >>> 1) + (backslash >>> 1) + ((oddSequenceStarts & backslash) & 1)) >>> 63; + + long invertMask = sequencesStartingOnEvenBits << 1; + escaped = (EVEN_BITS_MASK ^ invertMask) & followsEscape; + } + + long unescaped = chunk.compare(UNSIGNED_LE, LAST_CONTROL_CHARACTER).toLong(); + long quote = chunk.eq(QUOTE).toLong() & ~escaped; + long inString = prefixXor(quote) ^ prevInString; + prevInString = inString >> 63; + + // characters classification + VectorShuffle chunkLow = chunk.and(LOW_NIBBLE_MASK).toShuffle(); + long whitespace = chunk.eq(WHITESPACE_TABLE.rearrange(chunkLow)).toLong(); + ByteVector curlified = chunk.or((byte) 0x20); + long op = curlified.eq(OP_TABLE.rearrange(chunkLow)).toLong(); - stringScanner.finish(); + // finish + long scalar = ~(op | whitespace); + long nonQuoteScalar = scalar & ~quote; + long followsNonQuoteScalar = nonQuoteScalar << 1 | prevScalar; + prevScalar = nonQuoteScalar >>> 63; + long potentialScalarStart = scalar & ~followsNonQuoteScalar; + long potentialStructuralStart = op | potentialScalarStart; + bitIndexes.write(blockIndex, prevStructurals); + blockIndex += STEP_SIZE; + prevStructurals = potentialStructuralStart & ~(inString ^ quote); + unescapedCharsError |= unescaped & inString; + } + + byte[] remainder = remainder(buffer, length, blockIndex); + ByteVector chunk = ByteVector.fromArray(SPECIES_512, remainder, 0); + + // string scanning + long backslash = chunk.eq(BACKSLASH).toLong(); + + long escaped; + if (backslash == 0) { + escaped = prevEscaped; + } else { + backslash &= ~prevEscaped; + long followsEscape = backslash << 1 | prevEscaped; + long oddSequenceStarts = backslash & ODD_BITS_MASK & ~followsEscape; + + long sequencesStartingOnEvenBits = oddSequenceStarts + backslash; + long invertMask = sequencesStartingOnEvenBits << 1; + escaped = (EVEN_BITS_MASK ^ invertMask) & followsEscape; + } + + long unescaped = chunk.compare(UNSIGNED_LE, LAST_CONTROL_CHARACTER).toLong(); + long quote = chunk.eq(QUOTE).toLong() & ~escaped; + long inString = prefixXor(quote) ^ prevInString; + prevInString = inString >> 63; + + // characters classification + VectorShuffle chunkLow = chunk.and(LOW_NIBBLE_MASK).toShuffle(); + long whitespace = chunk.eq(WHITESPACE_TABLE.rearrange(chunkLow)).toLong(); + ByteVector curlified = chunk.or((byte) 0x20); + long op = curlified.eq(OP_TABLE.rearrange(chunkLow)).toLong(); + + // finish + long scalar = ~(op | whitespace); + long nonQuoteScalar = scalar & ~quote; + long followsNonQuoteScalar = nonQuoteScalar << 1 | prevScalar; + long potentialScalarStart = scalar & ~followsNonQuoteScalar; + long potentialStructuralStart = op | potentialScalarStart; + bitIndexes.write(blockIndex, prevStructurals); + blockIndex += STEP_SIZE; + prevStructurals = potentialStructuralStart & ~(inString ^ quote); + unescapedCharsError |= unescaped & inString; + bitIndexes.write(blockIndex, prevStructurals); + bitIndexes.finish(); + if (prevInString != 0) { + throw new JsonParsingException("Unclosed string. A string is opened, but never closed."); + } if (unescapedCharsError != 0) { throw new JsonParsingException("Unescaped characters. Within strings, there are characters that should be escaped."); } } - void reset() { - stringScanner.reset(); - prevStructurals = 0; - unescapedCharsError = 0; - prevScalar = 0; + private byte[] remainder(byte[] buffer, int length, int idx) { + System.arraycopy(LAST_BLOCK_SPACES, 0, lastBlock, 0, lastBlock.length); + System.arraycopy(buffer, idx, lastBlock, 0, length - idx); + return lastBlock; + } + + private static long prefixXor(long bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; } } diff --git a/src/main/java/org/simdjson/TapeBuilder.java b/src/main/java/org/simdjson/TapeBuilder.java index fc7f87e..3d05783 100644 --- a/src/main/java/org/simdjson/TapeBuilder.java +++ b/src/main/java/org/simdjson/TapeBuilder.java @@ -10,6 +10,7 @@ import static org.simdjson.Tape.ROOT; import static org.simdjson.Tape.START_ARRAY; import static org.simdjson.Tape.START_OBJECT; +import static org.simdjson.Tape.STRING; import static org.simdjson.Tape.TRUE_VALUE; class TapeBuilder { @@ -23,16 +24,18 @@ class TapeBuilder { private final NumberParser numberParser; private final StringParser stringParser; - TapeBuilder(int capacity, int depth, int padding) { + private int stringBufferIdx; + + TapeBuilder(int capacity, int depth, int padding, byte[] stringBuffer) { this.tape = new Tape(capacity); this.openContainers = new OpenContainer[depth]; this.padding = padding; for (int i = 0; i < openContainers.length; i++) { openContainers[i] = new OpenContainer(); } - this.stringBuffer = new byte[capacity]; - this.numberParser = new NumberParser(tape); - this.stringParser = new StringParser(tape, stringBuffer); + this.stringBuffer = stringBuffer; + this.numberParser = new NumberParser(); + this.stringParser = new StringParser(); } void visitDocumentStart() { @@ -55,9 +58,9 @@ void visitEmptyArray() { void visitRootPrimitive(byte[] buffer, int idx, int len) { switch (buffer[idx]) { case '"' -> visitString(buffer, idx); - case 't' -> visitRootTrueAtom(buffer, idx); - case 'f' -> visitRootFalseAtom(buffer, idx); - case 'n' -> visitRootNullAtom(buffer, idx); + case 't' -> visitRootTrueAtom(buffer, idx, len); + case 'f' -> visitRootFalseAtom(buffer, idx, len); + case 'n' -> visitRootNullAtom(buffer, idx, len); case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> visitRootNumber(buffer, idx, len); default -> throw new JsonParsingException("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); } @@ -102,8 +105,9 @@ private void visitTrueAtom(byte[] buffer, int idx) { tape.append(0, TRUE_VALUE); } - private void visitRootTrueAtom(byte[] buffer, int idx) { - if (!isTrue(buffer, idx)) { + private void visitRootTrueAtom(byte[] buffer, int idx, int len) { + boolean valid = idx + 4 <= len && isTrue(buffer, idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'true'."); } tape.append(0, TRUE_VALUE); @@ -124,8 +128,9 @@ private void visitFalseAtom(byte[] buffer, int idx) { tape.append(0, FALSE_VALUE); } - private void visitRootFalseAtom(byte[] buffer, int idx) { - if (!isFalse(buffer, idx)) { + private void visitRootFalseAtom(byte[] buffer, int idx, int len) { + boolean valid = idx + 5 <= len && isFalse(buffer, idx) && (idx + 5 == len || isStructuralOrWhitespace(buffer[idx + 5])); + if (!valid) { throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'false'."); } tape.append(0, FALSE_VALUE); @@ -147,8 +152,9 @@ private void visitNullAtom(byte[] buffer, int idx) { tape.append(0, NULL_VALUE); } - private void visitRootNullAtom(byte[] buffer, int idx) { - if (!isNull(buffer, idx)) { + private void visitRootNullAtom(byte[] buffer, int idx, int len) { + boolean valid = idx + 4 <= len && isNull(buffer, idx) && (idx + 4 == len || isStructuralOrWhitespace(buffer[idx + 4])); + if (!valid) { throw new JsonParsingException("Invalid value starting at " + idx + ". Expected 'null'."); } tape.append(0, NULL_VALUE); @@ -166,11 +172,12 @@ void visitKey(byte[] buffer, int idx) { } private void visitString(byte[] buffer, int idx) { - stringParser.parseString(buffer, idx); + tape.append(stringBufferIdx, STRING); + stringBufferIdx = stringParser.parseString(buffer, idx, stringBuffer, stringBufferIdx); } private void visitNumber(byte[] buffer, int idx) { - numberParser.parseNumber(buffer, idx); + numberParser.parseNumber(buffer, idx, tape); } private void visitRootNumber(byte[] buffer, int idx, int len) { @@ -178,7 +185,7 @@ private void visitRootNumber(byte[] buffer, int idx, int len) { byte[] copy = new byte[remainingLen + padding]; System.arraycopy(buffer, idx, copy, 0, remainingLen); Arrays.fill(copy, remainingLen, remainingLen + padding, SPACE); - numberParser.parseNumber(copy, 0); + numberParser.parseNumber(copy, 0, tape); } private void startContainer(int depth) { @@ -202,7 +209,7 @@ private void emptyContainer(char start, char end) { void reset() { tape.reset(); - stringParser.reset(); + stringBufferIdx = 0; } JsonValue createJsonValue(byte[] buffer) { diff --git a/src/main/java/org/simdjson/Utf8Validator.java b/src/main/java/org/simdjson/Utf8Validator.java index e4d9c63..7645fd1 100644 --- a/src/main/java/org/simdjson/Utf8Validator.java +++ b/src/main/java/org/simdjson/Utf8Validator.java @@ -1,261 +1,250 @@ package org.simdjson; -import jdk.incubator.vector.*; +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.IntVector; +import jdk.incubator.vector.VectorMask; +import jdk.incubator.vector.VectorShuffle; import java.util.Arrays; -class Utf8Validator { +import static jdk.incubator.vector.VectorOperators.EQ; +import static jdk.incubator.vector.VectorOperators.LSHL; +import static jdk.incubator.vector.VectorOperators.LSHR; +import static jdk.incubator.vector.VectorOperators.NE; +import static jdk.incubator.vector.VectorOperators.UNSIGNED_GE; +import static jdk.incubator.vector.VectorOperators.UNSIGNED_GT; +import static jdk.incubator.vector.VectorShuffle.iota; +import static org.simdjson.VectorUtils.BYTE_SPECIES; +import static org.simdjson.VectorUtils.INT_SPECIES; - private static final VectorSpecies VECTOR_SPECIES = StructuralIndexer.BYTE_SPECIES; - private static final ByteVector INCOMPLETE_CHECK = getIncompleteCheck(); - private static final VectorShuffle SHIFT_FOUR_BYTES_FORWARD = VectorShuffle.iota(StructuralIndexer.INT_SPECIES, - StructuralIndexer.INT_SPECIES.elementSize() - 1, 1, true); - private static final ByteVector LOW_NIBBLE_MASK = ByteVector.broadcast(VECTOR_SPECIES, 0b0000_1111); - private static final ByteVector ALL_ASCII_MASK = ByteVector.broadcast(VECTOR_SPECIES, (byte) 0b1000_0000); +class Utf8Validator { - /** - * Validate the input bytes are valid UTF8 - * - * @param inputBytes the input bytes to validate - * @throws JsonParsingException if the input is not valid UTF8 - */ - static void validate(byte[] inputBytes) { + // Leading byte not followed by a continuation byte but by another leading or ASCII byte, e.g. 11______ 0_______, 11______ 11______ + private static final byte TOO_SHORT = 1; + // ASCII followed by continuation byte e.g. 01111111 10_000000. + private static final byte TOO_LONG = 1 << 1; + // Any 3-byte sequence that could be represented by a shorter sequence (any sequence smaller than 1110_0000 10_100000 10_000000). + private static final byte OVERLONG_3BYTE = 1 << 2; + // Any decoded code point greater than U+10FFFF. e.g. 11110_100 10_010000 10_000000 10_000000. + private static final byte TOO_LARGE = 1 << 3; + // Code points in the range of U+D800 - U+DFFF (inclusive) are the surrogates for UTF-16. + // These 2048 code points that are reserved for UTF-16 are disallowed in UTF-8, e.g. 1110_1101 10_100000 10_000000. + private static final byte SURROGATE = 1 << 4; + // First valid 2-byte sequence: 110_00010 10_000000. Anything smaller is considered overlong as it fits into a 1-byte sequence. + private static final byte OVERLONG_2BYTE = 1 << 5; + // Similar to TOO_LARGE, but for cases where the continuation byte's high nibble is 1000, e.g. 11110_101 10_000000 10_000000. + private static final byte TOO_LARGE_1000 = 1 << 6; + // Any decoded code point below above U+FFFF, e.g. 11110_000 10_000000 10_000000 10_000000. + private static final byte OVERLONG_4BYTE = 1 << 6; + // An example: 10_000000 10_000000. + private static final byte TWO_CONTINUATIONS = (byte) (1 << 7); + private static final byte MAX_2_LEADING_BYTE = (byte) 0b110_11111; + private static final byte MAX_3_LEADING_BYTE = (byte) 0b1110_1111; + private static final int TWO_BYTES_SIZE = Byte.SIZE * 2; + private static final int THREE_BYTES_SIZE = Byte.SIZE * 3; + private static final ByteVector BYTE_1_HIGH_LOOKUP = createByte1HighLookup(); + private static final ByteVector BYTE_1_LOW_LOOKUP = createByte1LowLookup(); + private static final ByteVector BYTE_2_HIGH_LOOKUP = createByte2HighLookup(); + private static final ByteVector INCOMPLETE_CHECK = createIncompleteCheck(); + private static final byte LOW_NIBBLE_MASK = 0b0000_1111; + private static final byte ALL_ASCII_MASK = (byte) 0b1000_0000; + private static final VectorShuffle FOUR_BYTES_FORWARD_SHIFT = iota(INT_SPECIES, INT_SPECIES.elementSize() - 1, 1, true); + private static final int STEP_SIZE = BYTE_SPECIES.vectorByteSize(); + + static void validate(byte[] buffer, int length) { long previousIncomplete = 0; long errors = 0; int previousFourUtf8Bytes = 0; - int idx = 0; - for (; idx < VECTOR_SPECIES.loopBound(inputBytes.length); idx += VECTOR_SPECIES.vectorByteSize()) { - ByteVector utf8Vector = ByteVector.fromArray(VECTOR_SPECIES, inputBytes, idx); - // ASCII fast path can bypass the checks that are only required for multibyte code points - if (isAscii(utf8Vector)) { + int loopBound = BYTE_SPECIES.loopBound(length); + int offset = 0; + for (; offset < loopBound; offset += STEP_SIZE) { + ByteVector chunk = ByteVector.fromArray(BYTE_SPECIES, buffer, offset); + IntVector chunkAsInts = chunk.reinterpretAsInts(); + // ASCII fast path can bypass the checks that are only required for multibyte code points. + if (chunk.and(ALL_ASCII_MASK).compare(EQ, 0).allTrue()) { errors |= previousIncomplete; } else { - previousIncomplete = isIncomplete(utf8Vector); - - var fourBytesPrevious = fourBytesPreviousSlice(utf8Vector, previousFourUtf8Bytes); - - ByteVector firstCheck = firstTwoByteSequenceCheck(utf8Vector.reinterpretAsInts(), fourBytesPrevious); - ByteVector secondCheck = lastTwoByteSequenceCheck(utf8Vector.reinterpretAsInts(), fourBytesPrevious, firstCheck); - - errors |= secondCheck.compare(VectorOperators.NE, 0).toLong(); + previousIncomplete = chunk.compare(UNSIGNED_GE, INCOMPLETE_CHECK).toLong(); + // Shift the input forward by four bytes to make space for the previous four bytes. + // The previous three bytes are required for validation, pulling in the last integer + // will give the previous four bytes. The switch to integer vectors is to allow for + // integer shifting instead of the more expensive shuffle / slice operations. + IntVector chunkWithPreviousFourBytes = chunkAsInts + .rearrange(FOUR_BYTES_FORWARD_SHIFT) + .withLane(0, previousFourUtf8Bytes); + // Shift the current input forward by one byte to include one byte from the previous chunk. + ByteVector previousOneByte = chunkAsInts + .lanewise(LSHL, Byte.SIZE) + .or(chunkWithPreviousFourBytes.lanewise(LSHR, THREE_BYTES_SIZE)) + .reinterpretAsBytes(); + ByteVector byte2HighNibbles = chunkAsInts.lanewise(LSHR, 4) + .reinterpretAsBytes() + .and(LOW_NIBBLE_MASK); + ByteVector byte1HighNibbles = previousOneByte.reinterpretAsInts() + .lanewise(LSHR, 4) + .reinterpretAsBytes() + .and(LOW_NIBBLE_MASK); + ByteVector byte1LowNibbles = previousOneByte.and(LOW_NIBBLE_MASK); + ByteVector byte1HighState = byte1HighNibbles.selectFrom(BYTE_1_HIGH_LOOKUP); + ByteVector byte1LowState = byte1LowNibbles.selectFrom(BYTE_1_LOW_LOOKUP); + ByteVector byte2HighState = byte2HighNibbles.selectFrom(BYTE_2_HIGH_LOOKUP); + ByteVector firstCheck = byte1HighState.and(byte1LowState).and(byte2HighState); + // All remaining checks are for invalid 3 and 4-byte sequences, which either have too many + // continuation bytes or not enough. + ByteVector previousTwoBytes = chunkAsInts + .lanewise(LSHL, TWO_BYTES_SIZE) + .or(chunkWithPreviousFourBytes.lanewise(LSHR, TWO_BYTES_SIZE)) + .reinterpretAsBytes(); + // The minimum leading byte of 3-byte sequences is always greater than the maximum leading byte of 2-byte sequences. + VectorMask is3ByteLead = previousTwoBytes.compare(UNSIGNED_GT, MAX_2_LEADING_BYTE); + ByteVector previousThreeBytes = chunkAsInts + .lanewise(LSHL, THREE_BYTES_SIZE) + .or(chunkWithPreviousFourBytes.lanewise(LSHR, Byte.SIZE)) + .reinterpretAsBytes(); + // The minimum leading byte of 4-byte sequences is always greater than the maximum leading byte of 3-byte sequences. + VectorMask is4ByteLead = previousThreeBytes.compare(UNSIGNED_GT, MAX_3_LEADING_BYTE); + // The firstCheck vector contains 0x80 values on continuation byte indexes. + // The leading bytes of 3 and 4-byte sequences should match up with these indexes and zero them out. + ByteVector secondCheck = firstCheck.add((byte) 0x80, is3ByteLead.or(is4ByteLead)); + errors |= secondCheck.compare(NE, 0).toLong(); } - previousFourUtf8Bytes = utf8Vector.reinterpretAsInts().lane(StructuralIndexer.INT_SPECIES.length() - 1); + previousFourUtf8Bytes = chunkAsInts.lane(INT_SPECIES.length() - 1); } - // if the input file doesn't align with the vector width, pad the missing bytes with zero - VectorMask remainingBytes = VECTOR_SPECIES.indexInRange(idx, inputBytes.length); - ByteVector lastVectorChunk = ByteVector.fromArray(VECTOR_SPECIES, inputBytes, idx, remainingBytes); - if (!isAscii(lastVectorChunk)) { - previousIncomplete = isIncomplete(lastVectorChunk); - - var fourBytesPrevious = fourBytesPreviousSlice(lastVectorChunk, previousFourUtf8Bytes); - - ByteVector firstCheck = firstTwoByteSequenceCheck(lastVectorChunk.reinterpretAsInts(), fourBytesPrevious); - ByteVector secondCheck = lastTwoByteSequenceCheck(lastVectorChunk.reinterpretAsInts(), fourBytesPrevious, firstCheck); - - errors |= secondCheck.compare(VectorOperators.NE, 0).toLong(); + // If the input file doesn't align with the vector width, pad the missing bytes with zeros. + VectorMask remainingBytes = BYTE_SPECIES.indexInRange(offset, length); + ByteVector chunk = ByteVector.fromArray(BYTE_SPECIES, buffer, offset, remainingBytes); + if (!chunk.and(ALL_ASCII_MASK).compare(EQ, 0).allTrue()) { + IntVector chunkAsInts = chunk.reinterpretAsInts(); + previousIncomplete = chunk.compare(UNSIGNED_GE, INCOMPLETE_CHECK).toLong(); + // Shift the input forward by four bytes to make space for the previous four bytes. + // The previous three bytes are required for validation, pulling in the last integer + // will give the previous four bytes. The switch to integer vectors is to allow for + // integer shifting instead of the more expensive shuffle / slice operations. + IntVector chunkWithPreviousFourBytes = chunkAsInts + .rearrange(FOUR_BYTES_FORWARD_SHIFT) + .withLane(0, previousFourUtf8Bytes); + // Shift the current input forward by one byte to include one byte from the previous chunk. + ByteVector previousOneByte = chunkAsInts + .lanewise(LSHL, Byte.SIZE) + .or(chunkWithPreviousFourBytes.lanewise(LSHR, THREE_BYTES_SIZE)) + .reinterpretAsBytes(); + ByteVector byte2HighNibbles = chunkAsInts.lanewise(LSHR, 4) + .reinterpretAsBytes() + .and(LOW_NIBBLE_MASK); + ByteVector byte1HighNibbles = previousOneByte.reinterpretAsInts() + .lanewise(LSHR, 4) + .reinterpretAsBytes() + .and(LOW_NIBBLE_MASK); + ByteVector byte1LowNibbles = previousOneByte.and(LOW_NIBBLE_MASK); + ByteVector byte1HighState = byte1HighNibbles.selectFrom(BYTE_1_HIGH_LOOKUP); + ByteVector byte1LowState = byte1LowNibbles.selectFrom(BYTE_1_LOW_LOOKUP); + ByteVector byte2HighState = byte2HighNibbles.selectFrom(BYTE_2_HIGH_LOOKUP); + ByteVector firstCheck = byte1HighState.and(byte1LowState).and(byte2HighState); + // All remaining checks are for invalid 3 and 4-byte sequences, which either have too many + // continuation bytes or not enough. + ByteVector previousTwoBytes = chunkAsInts + .lanewise(LSHL, TWO_BYTES_SIZE) + .or(chunkWithPreviousFourBytes.lanewise(LSHR, TWO_BYTES_SIZE)) + .reinterpretAsBytes(); + // The minimum leading byte of 3-byte sequences is always greater than the maximum leading byte of 2-byte sequences. + VectorMask is3ByteLead = previousTwoBytes.compare(UNSIGNED_GT, MAX_2_LEADING_BYTE); + ByteVector previousThreeBytes = chunkAsInts + .lanewise(LSHL, THREE_BYTES_SIZE) + .or(chunkWithPreviousFourBytes.lanewise(LSHR, Byte.SIZE)) + .reinterpretAsBytes(); + // The minimum leading byte of 4-byte sequences is always greater than the maximum leading byte of 3-byte sequences. + VectorMask is4ByteLead = previousThreeBytes.compare(UNSIGNED_GT, MAX_3_LEADING_BYTE); + // The firstCheck vector contains 0x80 values on continuation byte indexes. + // The leading bytes of 3 and 4-byte sequences should match up with these indexes and zero them out. + ByteVector secondCheck = firstCheck.add((byte) 0x80, is3ByteLead.or(is4ByteLead)); + errors |= secondCheck.compare(NE, 0).toLong(); } if ((errors | previousIncomplete) != 0) { - throw new JsonParsingException("Invalid UTF8"); + throw new JsonParsingException("The input is not valid UTF-8"); } } - /* Shuffles the input forward by four bytes to make space for the previous four bytes. - The previous three bytes are required for validation, pulling in the last integer will give the previous four bytes. - The switch to integer vectors is to allow for integer shifting instead of the more expensive shuffle / slice operations */ - private static IntVector fourBytesPreviousSlice(ByteVector vectorChunk, int previousFourUtf8Bytes) { - return vectorChunk.reinterpretAsInts() - .rearrange(SHIFT_FOUR_BYTES_FORWARD) - .withLane(0, previousFourUtf8Bytes); - } - - // works similar to previousUtf8Vector.slice(VECTOR_SPECIES.length() - numOfBytesToInclude, utf8Vector) but without the performance cost - private static ByteVector previousVectorSlice(IntVector utf8Vector, IntVector fourBytesPrevious, int numOfPreviousBytes) { - return utf8Vector - .lanewise(VectorOperators.LSHL, Byte.SIZE * numOfPreviousBytes) - .or(fourBytesPrevious.lanewise(VectorOperators.LSHR, Byte.SIZE * (4 - numOfPreviousBytes))) - .reinterpretAsBytes(); - } - - private static ByteVector firstTwoByteSequenceCheck(IntVector utf8Vector, IntVector fourBytesPrevious) { - // shift the current input forward by 1 byte to include 1 byte from the previous input - var oneBytePrevious = previousVectorSlice(utf8Vector, fourBytesPrevious, 1); - - // high nibbles of the current input (e.g. 0xC3 >> 4 = 0xC) - ByteVector byte2HighNibbles = utf8Vector.lanewise(VectorOperators.LSHR, 4) - .reinterpretAsBytes().and(LOW_NIBBLE_MASK); - - // high nibbles of the shifted input - ByteVector byte1HighNibbles = oneBytePrevious.reinterpretAsInts().lanewise(VectorOperators.LSHR, 4) - .reinterpretAsBytes().and(LOW_NIBBLE_MASK); - - // low nibbles of the shifted input (e.g. 0xC3 & 0xF = 0x3) - ByteVector byte1LowNibbles = oneBytePrevious.and(LOW_NIBBLE_MASK); - - ByteVector byte1HighState = byte1HighNibbles.selectFrom(LookupTable.byte1High); - ByteVector byte1LowState = byte1LowNibbles.selectFrom(LookupTable.byte1Low); - ByteVector byte2HighState = byte2HighNibbles.selectFrom(LookupTable.byte2High); - - return byte1HighState.and(byte1LowState).and(byte2HighState); - } - - // All remaining checks are invalid 3–4 byte sequences, which either have too many continuations bytes or not enough - private static ByteVector lastTwoByteSequenceCheck(IntVector utf8Vector, IntVector fourBytesPrevious, ByteVector firstCheck) { - // the minimum 3byte lead - 1110_0000 is always greater than the max 2byte lead - 110_11111 - ByteVector twoBytesPrevious = previousVectorSlice(utf8Vector, fourBytesPrevious, 2); - VectorMask is3ByteLead = twoBytesPrevious.compare(VectorOperators.UNSIGNED_GT, (byte) 0b110_11111); - - // the minimum 4byte lead - 1111_0000 is always greater than the max 3byte lead - 1110_1111 - ByteVector threeBytesPrevious = previousVectorSlice(utf8Vector, fourBytesPrevious, 3); - VectorMask is4ByteLead = threeBytesPrevious.compare(VectorOperators.UNSIGNED_GT, (byte) 0b1110_1111); - - // the firstCheck vector contains 0x80 values on continuation byte indexes - // the 3/4 byte lead bytes should match up with these indexes and zero them out - return firstCheck.add((byte) 0x80, is3ByteLead.or(is4ByteLead)); - } - - /* checks that the previous vector isn't in an incomplete state. - Previous vector is in an incomplete state if the last byte is smaller than 0xC0, - or the second last byte is smaller than 0xE0, or the third last byte is smaller than 0xF0.*/ - private static ByteVector getIncompleteCheck() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] eofArray = new byte[vectorBytes]; + private static ByteVector createIncompleteCheck() { + // Previous vector is in an incomplete state if the last byte is smaller than 0xC0, + // or the second last byte is smaller than 0xE0, or the third last byte is smaller than 0xF0. + int vectorByteSize = BYTE_SPECIES.vectorByteSize(); + byte[] eofArray = new byte[vectorByteSize]; Arrays.fill(eofArray, (byte) 255); - eofArray[vectorBytes - 3] = (byte) 0xF0; - eofArray[vectorBytes - 2] = (byte) 0xE0; - eofArray[vectorBytes - 1] = (byte) 0xC0; - return ByteVector.fromArray(VECTOR_SPECIES, eofArray, 0); + eofArray[vectorByteSize - 3] = (byte) 0xF0; + eofArray[vectorByteSize - 2] = (byte) 0xE0; + eofArray[vectorByteSize - 1] = (byte) 0xC0; + return ByteVector.fromArray(BYTE_SPECIES, eofArray, 0); } - private static long isIncomplete(ByteVector utf8Vector) { - return utf8Vector.compare(VectorOperators.UNSIGNED_GE, INCOMPLETE_CHECK).toLong(); + private static ByteVector createByte1HighLookup() { + byte[] byte1HighArray = new byte[]{ + // ASCII high nibble = 0000 -> 0111, ie 0 -> 7 index in lookup table + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // Continuation high nibble = 1000 -> 1011 + TWO_CONTINUATIONS, TWO_CONTINUATIONS, TWO_CONTINUATIONS, TWO_CONTINUATIONS, + // Two byte lead high nibble = 1100 -> 1101 + TOO_SHORT | OVERLONG_2BYTE, TOO_SHORT, + // Three byte lead high nibble = 1110 + TOO_SHORT | OVERLONG_3BYTE | SURROGATE, + // Four byte lead high nibble = 1111 + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4BYTE + }; + return alignArrayToVector(byte1HighArray); } - // ASCII will never exceed 01111_1111 - private static boolean isAscii(ByteVector utf8Vector) { - return utf8Vector.and(ALL_ASCII_MASK).compare(VectorOperators.EQ, 0).allTrue(); + private static ByteVector createByte1LowLookup() { + final byte CARRY = TOO_SHORT | TOO_LONG | TWO_CONTINUATIONS; + byte[] byte1LowArray = new byte[]{ + // ASCII, two byte lead and three byte leading low nibble = 0000 -> 1111, + // Four byte lead low nibble = 0000 -> 0111. + // Continuation byte low nibble is inconsequential + // Low nibble does not affect the states TOO_SHORT, TOO_LONG, TWO_CONTINUATIONS, so they will + // be carried over regardless. + CARRY | OVERLONG_2BYTE | OVERLONG_3BYTE | OVERLONG_4BYTE, + // 0001 + CARRY | OVERLONG_2BYTE, + CARRY, + CARRY, + // 1111_0100 -> 1111 = TOO_LARGE range + CARRY | TOO_LARGE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // 1110_1101 + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + }; + return alignArrayToVector(byte1LowArray); } - private static class LookupTable { - /* Bit 0 = Too Short (lead byte not followed by a continuation byte but by a lead/ASCII byte) - e.g. 11______ 0_______ - 11______ 11______ */ - static final byte TOO_SHORT = 1; - - /* Bit 1 = Too Long (ASCII followed by continuation byte) - e.g. 01111111 10_000000 */ - static final byte TOO_LONG = 1 << 1; - - /* Bit 2 = Overlong 3-byte - Any 3-byte sequence that could be represented by a shorter sequence - Which is any sequence smaller than 1110_0000 10_100000 10_000000 */ - static final byte OVERLONG_3BYTE = 1 << 2; - - /* Bit 3 = Too Large - Any decoded codepoint greater than U+10FFFF - e.g. 11110_100 10_010000 10_000000 10_000000 */ - static final byte TOO_LARGE = 1 << 3; - - /* Bit 4 = Surrogate - code points in the range of U+D800 - U+DFFF (inclusive) are the surrogates for UTF-16. - These 2048 code points that are reserved for UTF-16 are disallowed in UTF-8 - e.g. 1110_1101 10_100000 10_000000 */ - static final byte SURROGATE = 1 << 4; - - /* Bit 5 = Overlong 2-byte - first valid two byte sequence: 110_00010 10_000000 - anything smaller is considered overlong as it would fit into a one byte sequence / ASCII */ - static final byte OVERLONG_2BYTE = 1 << 5; - - /* Bit 6 = Too Large 1000 - Similar to TOO_LARGE, but for cases where the continuation byte's high nibble is 1000 - e.g. 11110_101 10_000000 10_000000 */ - static final byte TOO_LARGE_1000 = 1 << 6; - - /* Bit 6 = Overlong 4-byte - Any decoded code point below above U+FFFF / 11110_000 10_001111 10_111111 10_111111 - e.g. 11110_000 10_000000 10_000000 10_000000 */ - static final byte OVERLONG_4BYTE = 1 << 6; - - /* Bit 7 = Two Continuations - e.g. 10_000000 10_000000 */ - static final byte TWO_CONTINUATIONS = (byte) (1 << 7); - - private final static ByteVector byte1High = getByte1HighLookup(); - private final static ByteVector byte1Low = getByte1LowLookup(); - private final static ByteVector byte2High = getByte2HighLookup(); - - private static ByteVector getByte1HighLookup() { - byte[] byte1HighArray = new byte[]{ - /* ASCII high nibble = 0000 -> 0111, ie 0 -> 7 index in lookup table */ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - /* Continuation high nibble = 1000 -> 1011 */ - TWO_CONTINUATIONS, TWO_CONTINUATIONS, TWO_CONTINUATIONS, TWO_CONTINUATIONS, - /* Two byte lead high nibble = 1100 -> 1101 */ - TOO_SHORT | OVERLONG_2BYTE, TOO_SHORT, - /* Three byte lead high nibble = 1110 */ - TOO_SHORT | OVERLONG_3BYTE | SURROGATE, - /* Four byte lead high nibble = 1111 */ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4BYTE - }; - - return alignArrayToVector(byte1HighArray); - } - - private static ByteVector alignArrayToVector(byte[] arrayValues) { - // pad array with zeroes to align up with vector size - byte[] alignedArray = new byte[VECTOR_SPECIES.vectorByteSize()]; - System.arraycopy(arrayValues, 0, alignedArray, 0, arrayValues.length); - return ByteVector.fromArray(VECTOR_SPECIES, alignedArray, 0); - } - - private static ByteVector getByte1LowLookup() { - final byte CARRY = TOO_SHORT | TOO_LONG | TWO_CONTINUATIONS; - byte[] byte1LowArray = new byte[]{ - /* ASCII, two Byte lead and three byte lead low nibble = 0000 -> 1111, - * Four byte lead low nibble = 0000 -> 0111 - * Continuation byte low nibble is inconsequential - * Low nibble does not affect the states TOO_SHORT, TOO_LONG, TWO_CONTINUATIONS, so they will be carried over regardless */ - CARRY | OVERLONG_2BYTE | OVERLONG_3BYTE | OVERLONG_4BYTE, - // 0001 - CARRY | OVERLONG_2BYTE, - CARRY, - CARRY, - // 1111_0100 -> 1111 = TOO_LARGE range - CARRY | TOO_LARGE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // 1110_1101 - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - }; - - return alignArrayToVector(byte1LowArray); - } - - private static ByteVector getByte2HighLookup() { - byte[] byte2HighArray = new byte[]{ - // ASCII high nibble = 0000 -> 0111, ie 0 -> 7 index in lookup table - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - // Continuation high nibble - 1000 -> 1011 - TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | OVERLONG_3BYTE | OVERLONG_4BYTE | TOO_LARGE_1000, - TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | OVERLONG_3BYTE | TOO_LARGE, - TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | SURROGATE | TOO_LARGE, - TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | SURROGATE | TOO_LARGE, - // 1100 -> 1111 = unexpected lead byte - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - }; + private static ByteVector createByte2HighLookup() { + byte[] byte2HighArray = new byte[]{ + // ASCII high nibble = 0000 -> 0111, ie 0 -> 7 index in lookup table + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + // Continuation high nibble - 1000 -> 1011 + TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | OVERLONG_3BYTE | OVERLONG_4BYTE | TOO_LARGE_1000, + TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | OVERLONG_3BYTE | TOO_LARGE, + TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | SURROGATE | TOO_LARGE, + TOO_LONG | TWO_CONTINUATIONS | OVERLONG_2BYTE | SURROGATE | TOO_LARGE, + // 1100 -> 1111 = unexpected leading byte + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + }; + return alignArrayToVector(byte2HighArray); + } - return alignArrayToVector(byte2HighArray); - } + private static ByteVector alignArrayToVector(byte[] arrayValues) { + // Pad array with zeroes to align up with vector size. + byte[] alignedArray = new byte[BYTE_SPECIES.vectorByteSize()]; + System.arraycopy(arrayValues, 0, alignedArray, 0, arrayValues.length); + return ByteVector.fromArray(BYTE_SPECIES, alignedArray, 0); } } diff --git a/src/main/java/org/simdjson/VectorUtils.java b/src/main/java/org/simdjson/VectorUtils.java new file mode 100644 index 0000000..7a1ce8f --- /dev/null +++ b/src/main/java/org/simdjson/VectorUtils.java @@ -0,0 +1,48 @@ +package org.simdjson; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.IntVector; +import jdk.incubator.vector.VectorShape; +import jdk.incubator.vector.VectorSpecies; + +class VectorUtils { + + static final VectorSpecies INT_SPECIES; + static final VectorSpecies BYTE_SPECIES; + + static { + String species = System.getProperty("org.simdjson.species", "preferred"); + switch (species) { + case "preferred" -> { + BYTE_SPECIES = ByteVector.SPECIES_PREFERRED; + INT_SPECIES = IntVector.SPECIES_PREFERRED; + assertSupportForSpecies(BYTE_SPECIES); + assertSupportForSpecies(INT_SPECIES); + } + case "512" -> { + BYTE_SPECIES = ByteVector.SPECIES_512; + INT_SPECIES = IntVector.SPECIES_512; + } + case "256" -> { + BYTE_SPECIES = ByteVector.SPECIES_256; + INT_SPECIES = IntVector.SPECIES_256; + } + default -> throw new IllegalArgumentException("Unsupported vector species: " + species); + } + } + + private static void assertSupportForSpecies(VectorSpecies species) { + if (species.vectorShape() != VectorShape.S_256_BIT && species.vectorShape() != VectorShape.S_512_BIT) { + throw new IllegalArgumentException("Unsupported vector species: " + species); + } + } + + static ByteVector repeat(byte[] array) { + int n = BYTE_SPECIES.vectorByteSize() / 4; + byte[] result = new byte[n * array.length]; + for (int dst = 0; dst < result.length; dst += array.length) { + System.arraycopy(array, 0, result, dst, array.length); + } + return ByteVector.fromArray(BYTE_SPECIES, result, 0); + } +} diff --git a/src/main/java/org/simdjson/annotations/JsonFieldName.java b/src/main/java/org/simdjson/annotations/JsonFieldName.java new file mode 100644 index 0000000..04c5530 --- /dev/null +++ b/src/main/java/org/simdjson/annotations/JsonFieldName.java @@ -0,0 +1,13 @@ +package org.simdjson.annotations; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target({ElementType.ANNOTATION_TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface JsonFieldName { + + String value() default ""; +} diff --git a/src/test/java/org/simdjson/ArrayParsingTest.java b/src/test/java/org/simdjson/ArrayParsingTest.java new file mode 100644 index 0000000..ef738f2 --- /dev/null +++ b/src/test/java/org/simdjson/ArrayParsingTest.java @@ -0,0 +1,245 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import static org.assertj.core.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; + +public class ArrayParsingTest { + + @Test + public void emptyArrayAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + while (it.hasNext()) { + fail("Unexpected value"); + it.next(); + } + } + + @Test + public void arrayIterator() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + int[] expectedValues = new int[]{1, 2, 3}; + int counter = 0; + Iterator it = jsonValue.arrayIterator(); + while (it.hasNext()) { + JsonValue element = it.next(); + assertThat(element.isLong()).isTrue(); + assertThat(element.asLong()).isEqualTo(expectedValues[counter]); + counter++; + } + assertThat(counter).isEqualTo(expectedValues.length); + } + + @Test + public void arraySize() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + assertThat(jsonValue.getSize()).isEqualTo(3); + } + + @Test + public void largeArraySize() { + // given + SimdJsonParser parser = new SimdJsonParser(); + int realArraySize = 0xFFFFFF + 1; + byte[] json = new byte[realArraySize * 2 - 1 + 2]; + json[0] = '['; + int i = 0; + while (i < realArraySize) { + json[i * 2 + 1] = (byte) '0'; + json[i * 2 + 2] = (byte) ','; + i++; + } + json[json.length - 1] = ']'; + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + assertThat(jsonValue.getSize()).isEqualTo(0xFFFFFF); + } + + @Test + public void missingCommaInArrayAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommas(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"[,", "[1 ", "[,,", "[1,", "[1", "["}) + public void unclosedArray(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "[[]]", value = "Missing comma between array values"), + @MapEntry(stringKey = "[]", value = "Unclosed array. Missing ']' for starting '['.") + }) + public void unclosedArrayDueToPassedLength(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length - 1)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void missingCommaInArrayAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1 1]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "[,", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."), + @MapEntry(stringKey = "[1 ", value = "Missing comma between array values"), + @MapEntry(stringKey = "[,,", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."), + @MapEntry(stringKey = "[1,", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."), + @MapEntry(stringKey = "[1", value = "Missing comma between array values"), + @MapEntry(stringKey = "[", value = "Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'.") + }) + public void unclosedArrayAtObjectField(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void noMoreElements() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + JsonValue jsonValue = parser.parse(json, json.length); + Iterator it = jsonValue.arrayIterator(); + it.next(); + it.next(); + it.next(); + + // when + NoSuchElementException ex = assertThrows(NoSuchElementException.class, it::next); + + // then + assertThat(ex) + .hasMessage("No more elements"); + } + + @Test + public void unclosedArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[[["); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 2)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @Test + public void validArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[][[[["); + + // when + JsonValue jsonValue = parser.parse(json, 2); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + while (it.hasNext()) { + fail("Unexpected value"); + it.next(); + } + } +} diff --git a/src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java b/src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java new file mode 100644 index 0000000..28e1f1f --- /dev/null +++ b/src/test/java/org/simdjson/ArraySchemaBasedParsingTest.java @@ -0,0 +1,502 @@ +package org.simdjson; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.schemas.ClassWithIntegerField; +import org.simdjson.schemas.RecordWithBooleanListField; +import org.simdjson.schemas.RecordWithIntegerListField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerArrayField; +import org.simdjson.schemas.RecordWithStringArrayField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.lang.reflect.Array; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class ArraySchemaBasedParsingTest { + + @ParameterizedTest + @ValueSource(classes = { + Object[].class, + String[].class, + char[].class, + Character[].class, + byte[].class, + Byte[].class, + short[].class, + Short[].class, + int[].class, + Integer[].class, + long[].class, + Long[].class, + boolean[].class, + Boolean[].class, + float[].class, + Float[].class, + double[].class, + Double[].class, + ClassWithIntegerField[].class + }) + public void emptyArrayAtRoot(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[]"); + + // when + Object array = parser.parse(json, json.length, expectedType); + + // then + assertThat(array).isInstanceOf(expectedType); + assertThat(array.getClass().isArray()).isTrue(); + Assertions.assertThat(Array.getLength(array)).isEqualTo(0); + } + + @Test + public void objectWithEmptyArrayField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": []}"); + + // when + RecordWithStringArrayField object = parser.parse(json, json.length, RecordWithStringArrayField.class); + + // then + assertThat(object.field()).isEmpty(); + } + + @ParameterizedTest + @ValueSource(strings = {"1", "true", "false", "{}", ":", ",", "\"abc\""}) + public void invalidTypeAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Expected '[' but got: '" + jsonStr.charAt(0) + "'."); + } + + @Test + public void missingCommaInArrayAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommasInArrayAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(strings = {"[,", "[1 ", "[,,", "[1,", "[1", "["}) + public void unclosedArrayAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @Test + public void unclosedArrayDueToPassedLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[]]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, int[][].class)); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @Test + public void unclosedArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[[["); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 2, int[].class)); + + // then + assertThat(ex) + .hasMessage("Unclosed array. Missing ']' for starting '['."); + } + + @Test + public void validArrayPaddedWithOpenBraces() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[][[[["); + + // when + int[] array = parser.parse(json, 2, int[].class); + + // then + assertThat(array).isEmpty(); + } + + @Test + public void missingCommaInArrayAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1 1]}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class) + ); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @Test + public void missingCommaInListAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1 1]}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerListField.class) + ); + + // then + assertThat(ex) + .hasMessage("Missing comma between array values"); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommasInArrayAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(strings = {"[1,,1]", "[,]", "[,,]"}) + public void tooManyCommasInListAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerListField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"field\": [,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1 }", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [,,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1}", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"ignore\": [1, \"field\": []}", value = "Expected ',' but reached end of buffer."), + @MapEntry(stringKey = "{\"ignore\": [", value = "Unclosed object. Missing '}' for starting '{'.") + }) + public void unclosedArrayAtObjectField(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"field\": [,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1 }", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [,,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1,}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"field\": [1}", value = "Missing comma between array values"), + @MapEntry(stringKey = "{\"field\": [}", value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(stringKey = "{\"ignore\": [1, \"field\": []}", value = "Expected ',' but reached end of buffer."), + @MapEntry(stringKey = "{\"ignore\": [", value = "Unclosed object. Missing '}' for starting '{'.") + }) + public void unclosedListAtObjectField(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerListField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = {AbstractList.class, LinkedList.class, ArrayList.class, Set.class}) + public void unsupportedTypeForArrays(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Unsupported class: " + expectedType.getName() + + ". For JSON arrays at the root, use Java arrays. For inner JSON arrays, use either Java arrays or java.util.List."); + } + + @Test + public void listsAtRootAreNotSupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, 2, 3]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, List.class)); + + // then + assertThat(ex) + .hasMessage("Undefined list element type."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = int[][].class, nulls = false) + public void multidimensionalArrays2d(String jsonStr, int[][] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + int[][] array = parser.parse(json, json.length, int[][].class); + + // then + assertThat(array) + .isDeepEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = int[][][].class, nulls = false) + public void multidimensionalArrays3d(String jsonStr, int[][][] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + int[][][] array = parser.parse(json, json.length, int[][][].class); + + // then + assertThat(array) + .isDeepEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWith2dIntegerListField.class, nulls = false) + public void multidimensionalArrays2dAsList(String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + RecordWith2dIntegerListField object = parser.parse(json, json.length, RecordWith2dIntegerListField.class); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWith3dIntegerListField.class, nulls = false) + public void multidimensionalArrays3dAsList(String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + RecordWith3dIntegerListField object = parser.parse(json, json.length, RecordWith3dIntegerListField.class); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @Test + public void nullAtRootWhenArrayIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + int[] object = parser.parse(json, json.length, int[].class); + + // then + assertThat(object).isNull(); + } + + @Test + public void nullAtObjectFieldWhenArrayIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithPrimitiveIntegerArrayField object = parser.parse(json, json.length, RecordWithPrimitiveIntegerArrayField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.field()).isNull(); + } + + @Test + public void nullAtObjectFieldWhenListIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithBooleanListField object = parser.parse(json, json.length, RecordWithBooleanListField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.field()).isNull(); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "[],", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "[1, 2, 3],", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "[1, 2, 3][]", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!"), + @MapEntry(stringKey = "[1, 2, 3]{}", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "[1, 2, 3]1", value = "Unclosed array. Missing ']' for starting '['."), + @MapEntry(stringKey = "null,", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void moreValuesThanOneArrayAtRoot(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void arraysOfListsAreUnsupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[[1, 2], [1], [12, 13]]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, List[].class)); + + // then + assertThat(ex) + .hasMessage("Undefined list element type."); + } + + @Test + public void emptyJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, int[].class)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, Boolean[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + private record RecordWith2dIntegerListField(List> field) { + + } + + private record RecordWith3dIntegerListField(List>> field) { + + } +} diff --git a/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java b/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java index d6deecf..4ab21f8 100644 --- a/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java +++ b/src/test/java/org/simdjson/BenchmarkCorrectnessTest.java @@ -7,12 +7,12 @@ import java.io.IOException; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; import static org.assertj.core.api.Assertions.assertThat; -import static org.simdjson.TestUtils.loadTestFile; -import static org.simdjson.TestUtils.padWithSpaces; -import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.TestUtils.loadTestFile; +import static org.simdjson.testutils.TestUtils.toUtf8PaddedWithSpaces; public class BenchmarkCorrectnessTest { @@ -20,22 +20,48 @@ public class BenchmarkCorrectnessTest { public void countUniqueTwitterUsersWithDefaultProfile() throws IOException { // given SimdJsonParser parser = new SimdJsonParser(); - Set defaultUsers = new HashSet<>(); byte[] json = loadTestFile("/twitter.json"); - // when - JsonValue simdJsonValue = parser.parse(json, json.length); - Iterator tweets = simdJsonValue.get("statuses").arrayIterator(); - while (tweets.hasNext()) { - JsonValue tweet = tweets.next(); - JsonValue user = tweet.get("user"); - if (user.get("default_profile").asBoolean()) { - defaultUsers.add(user.get("screen_name").asString()); + for (int i = 0; i < 10; i++) { + Set defaultUsers = new HashSet<>(); + + // when + JsonValue simdJsonValue = parser.parse(json, json.length); + Iterator tweets = simdJsonValue.get("statuses").arrayIterator(); + while (tweets.hasNext()) { + JsonValue tweet = tweets.next(); + JsonValue user = tweet.get("user"); + if (user.get("default_profile").asBoolean()) { + defaultUsers.add(user.get("screen_name").asString()); + } } + + // then + assertThat(defaultUsers.size()).isEqualTo(86); } + } - // then - assertThat(defaultUsers.size()).isEqualTo(86); + @Test + public void schemaBasedCountUniqueTwitterUsersWithDefaultProfile() throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = loadTestFile("/twitter.json"); + + for (int i = 0; i < 10; i++) { + Set defaultUsers = new HashSet<>(); + + // when + Statuses statuses = parser.parse(json, json.length, Statuses.class); + for (var status : statuses.statuses()) { + User user = status.user(); + if (user.default_profile()) { + defaultUsers.add(user.screen_name()); + } + } + + // then + assertThat(defaultUsers.size()).isEqualTo(86); + } } @ParameterizedTest @@ -46,13 +72,25 @@ public void countUniqueTwitterUsersWithDefaultProfile() throws IOException { public void numberParserTest(String input, Double expected) { // given Tape tape = new Tape(100); - NumberParser numberParser = new NumberParser(tape); - byte[] numberUtf8Bytes = toUtf8(padWithSpaces(input)); + NumberParser numberParser = new NumberParser(); + byte[] numberUtf8Bytes = toUtf8PaddedWithSpaces(input); // when - numberParser.parseNumber(numberUtf8Bytes, 0); + numberParser.parseNumber(numberUtf8Bytes, 0, tape); // then assertThat(tape.getDouble(0)).isEqualTo(expected); } + + record User(boolean default_profile, String screen_name) { + + } + + record Status(User user) { + + } + + record Statuses(List statuses) { + + } } diff --git a/src/test/java/org/simdjson/BlockReaderTest.java b/src/test/java/org/simdjson/BlockReaderTest.java deleted file mode 100644 index 1e587cf..0000000 --- a/src/test/java/org/simdjson/BlockReaderTest.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.simdjson; - -import org.junit.jupiter.api.Test; - -import java.util.Arrays; - -import static org.assertj.core.api.Assertions.assertThat; - -public class BlockReaderTest { - - @Test - public void iterateOverEntireBuffer() { - // given - int stepSize = 64; - int fullBlockCount = 2; - byte[] buffer = new byte[fullBlockCount * stepSize + stepSize / 2]; - Arrays.fill(buffer, (byte) 'a'); - BlockReader reader = new BlockReader(stepSize); - reader.reset(buffer, buffer.length); - - // when / then - for (int i = 0; i < fullBlockCount; i++) { - assertThat(reader.hasFullBlock()).isTrue(); - assertThat(reader.getBlockIndex()).isEqualTo(i * stepSize); - reader.advance(); - assertThat(reader.getBlockIndex()).isEqualTo((i + 1) * stepSize); - } - assertThat(reader.hasFullBlock()).isFalse(); - byte[] remainder = reader.remainder(); - assertThat(remainder.length).isEqualTo(stepSize); - } - - @Test - public void lastBlockIsTreatedAsRemainder() { - // given - int stepSize = 64; - int blockCount = 2; - byte[] buffer = new byte[blockCount * stepSize]; - Arrays.fill(buffer, (byte) 'a'); - BlockReader reader = new BlockReader(stepSize); - reader.reset(buffer, buffer.length); - assertThat(reader.hasFullBlock()).isTrue(); - - // when - reader.advance(); - - // then - assertThat(reader.hasFullBlock()).isFalse(); - byte[] remainder = reader.remainder(); - assertThat(remainder.length).isEqualTo(stepSize); - for (int i = 0; i < remainder.length; i++) { - assertThat(remainder[i]).isEqualTo(buffer[i]); - } - } - - @Test - public void remainderShouldBeFilledWithSpaces() { - // given - int stepSize = 64; - byte[] buffer = new byte[stepSize / 2]; - Arrays.fill(buffer, (byte) 'a'); - BlockReader reader = new BlockReader(stepSize); - reader.reset(buffer, buffer.length); - assertThat(reader.hasFullBlock()).isFalse(); - - // when - byte[] remainder = reader.remainder(); - - // then - assertThat(remainder.length).isEqualTo(stepSize); - for (int i = 0; i < remainder.length; i++) { - if (i < buffer.length) { - assertThat(remainder[i]).isEqualTo(buffer[i]); - } else { - assertThat(remainder[i]).isEqualTo((byte) 0x20); - } - } - } -} diff --git a/src/test/java/org/simdjson/BooleanParsingTest.java b/src/test/java/org/simdjson/BooleanParsingTest.java new file mode 100644 index 0000000..47d4d9e --- /dev/null +++ b/src/test/java/org/simdjson/BooleanParsingTest.java @@ -0,0 +1,120 @@ +package org.simdjson; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class BooleanParsingTest { + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void booleanValuesAtRoot(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(Boolean.toString(booleanVal)); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(strings = {"true,", "false,"}) + public void moreThanBooleanAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"fals", "falsee", "[f]", "{\"a\":f}"}) + public void invalidFalse(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at " + jsonStr.indexOf('f') + ". Expected 'false'."); + } + + @ParameterizedTest + @ValueSource(strings = {"tru", "truee", "[t]", "{\"a\":t}"}) + public void invalidTrue(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at " + jsonStr.indexOf('t') + ". Expected 'true'."); + } + + @Test + public void arrayOfBooleans() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + Assertions.assertThat(it.hasNext()).isTrue(); + assertThat(it.next()).isEqualTo(true); + assertThat(it.next()).isEqualTo(false); + Assertions.assertThat(it.hasNext()).isFalse(); + } + + @Test + public void passedLengthSmallerThanTrueLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("true"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'true'."); + } + + @Test + public void passedLengthSmallerThanFalseLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("false"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 4)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'false'."); + } +} diff --git a/src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java b/src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java new file mode 100644 index 0000000..033f7cf --- /dev/null +++ b/src/test/java/org/simdjson/BooleanSchemaBasedParsingTest.java @@ -0,0 +1,592 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.schemas.RecordWithBooleanArrayField; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithBooleanListField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanArrayField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class BooleanSchemaBasedParsingTest { + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void booleanValueAtRoot(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(Boolean.toString(booleanVal)); + + // when + Boolean booleanValue = parser.parse(json, json.length, Boolean.class); + + // then + assertThat(booleanValue).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void primitiveBooleanValueAtRoot(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(Boolean.toString(booleanVal)); + + // when + boolean booleanValue = parser.parse(json, json.length, boolean.class); + + // then + assertThat(booleanValue).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void booleanValueAtObjectField(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + booleanVal + "}"); + + // when + RecordWithBooleanField object = parser.parse(json, json.length, RecordWithBooleanField.class); + + // then + assertThat(object.field()).isEqualTo(booleanVal); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void primitiveBooleanValueAtObjectField(boolean booleanVal) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + booleanVal + "}"); + + // when + RecordWithPrimitiveBooleanField object = parser.parse(json, json.length, RecordWithPrimitiveBooleanField.class); + + // then + assertThat(object.field()).isEqualTo(booleanVal); + } + + @Test + public void nullAtRootWhenBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Boolean booleanValue = parser.parse(json, json.length, Boolean.class); + + // then + assertThat(booleanValue).isNull(); + } + + @Test + public void nullAtRootWhenPrimitiveBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\"", "1"}) + public void invalidTypeForBoolean(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true', 'false' or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\"", "1"}) + public void invalidTypeForPrimitiveBoolean(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @Test + public void nullAtObjectFieldWhenBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithBooleanField object = parser.parse(json, json.length, RecordWithBooleanField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void nullAtObjectFieldWhenPrimitiveBooleanIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveBooleanField.class) + ); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @ParameterizedTest + @ValueSource(strings = {"true,", "false,"}) + public void moreValuesThanOnePrimitiveBooleanAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"true,", "false,", "null,"}) + public void moreValuesThanOneBooleanAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "truee", value = "true"), + @MapEntry(stringKey = "falsee", value = "false"), + @MapEntry(stringKey = "nul", value = "null"), + @MapEntry(stringKey = "nulll", value = "null"), + @MapEntry(stringKey = "nuul", value = "null") + }) + public void invalidBooleanAtRoot(String actual, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(actual); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected '" + expected + "'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "truee", value = "true"), + @MapEntry(stringKey = "falsee", value = "false") + }) + public void invalidPrimitiveBooleanAtRoot(String actual, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(actual); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected '" + expected + "'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Integer.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForTrueAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("true"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Integer.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForFalseAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("false"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveIntegerField.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForTrue(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": true}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveIntegerField.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForFalse(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": false}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = false) + public void arrayOfBooleansAtRoot(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Boolean[] array = parser.parse(json, json.length, Boolean[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = true) + public void arrayOfBooleansAndNullsAtRoot(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Boolean[] array = parser.parse(json, json.length, Boolean[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = boolean[].class, nulls = false) + public void arrayOfPrimitiveBooleansAtRoot(String jsonStr, boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + boolean[] array = parser.parse(json, json.length, boolean[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @Test + public void arrayOfPrimitiveBooleansAndNullsAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false, null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, boolean[].class)); + + // then + assertThat(ex) + .hasMessage("Unrecognized boolean value. Expected: 'true' or 'false'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = boolean[].class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean[].class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void arrayOfBooleansMixedWithOtherTypesAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false, 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithPrimitiveBooleanArrayField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithBooleanArrayField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithBooleanListField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void arrayOfBooleansMixedWithOtherTypesAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [true, false, 1]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = int[].class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = boolean.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = boolean[][].class, value = "Expected '[' but got: 't'."), + @MapEntry(classKey = Boolean[][].class, value = "Expected '[' but got: 't'.") + }) + public void mismatchedTypeForArrayOfBooleansAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[true, false]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = boolean[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithIntegerField.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = boolean.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void mismatchedTypeForArrayOfBooleansAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [true, false]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = false) + public void objectWithArrayOfBooleans(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithBooleanArrayField object = parser.parse(json, json.length, RecordWithBooleanArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = boolean[].class, nulls = false) + public void objectWithArrayOfPrimitiveBooleans(String jsonStr, boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithPrimitiveBooleanArrayField object = parser.parse(json, json.length, RecordWithPrimitiveBooleanArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = false) + public void objectWithListOfBooleans(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithBooleanListField object = parser.parse(json, json.length, RecordWithBooleanListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Boolean[].class, nulls = true) + public void objectWithListOfBooleansAndNulls(String jsonStr, Boolean[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithBooleanListField object = parser.parse(json, json.length, RecordWithBooleanListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @Test + public void missingBooleanField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + RecordWithBooleanField object = parser.parse(json, json.length, RecordWithBooleanField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void missingPrimitiveBooleanField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveBooleanField.class) + ); + + // then + assertThat(ex.getCause()).isInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(classes = {boolean.class, Boolean.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {boolean.class, Boolean.class}) + public void passedLengthSmallerThanTrueLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("true"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'true'."); + } + + @ParameterizedTest + @ValueSource(classes = {boolean.class, Boolean.class}) + public void passedLengthSmallerThanFalseLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("false"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 4, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'false'."); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, Boolean.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } +} diff --git a/src/test/java/org/simdjson/CharactersClassifierTest.java b/src/test/java/org/simdjson/CharactersClassifierTest.java deleted file mode 100644 index ce5a369..0000000 --- a/src/test/java/org/simdjson/CharactersClassifierTest.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.simdjson; - -import org.junit.jupiter.api.Test; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.assertj.core.api.Assertions.assertThat; -import static org.simdjson.TestUtils.chunk; - -public class CharactersClassifierTest { - - @Test - public void classifiesOperators() { - // given - CharactersClassifier classifier = new CharactersClassifier(); - String str = "a{bc}1:2,3[efg]aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - - // when - JsonCharacterBlock block = classify(classifier, str); - - // then - assertThat(block.op()).isEqualTo(0x4552); - assertThat(block.whitespace()).isEqualTo(0); - } - - @Test - public void classifiesControlCharactersAsOperators() { - // given - CharactersClassifier classifier = new CharactersClassifier(); - String str = new String(new byte[] { - 'a', 'a', 'a', 0x1a, 'a', 0x0c, 'a', 'a', // 0x1a = , 0x0c = - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', - 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a' - }, UTF_8); - - // when - JsonCharacterBlock block = classify(classifier, str); - - // then - assertThat(block.op()).isEqualTo(0x28); - assertThat(block.whitespace()).isEqualTo(0); - } - - @Test - public void classifiesWhitespaces() { - // given - CharactersClassifier classifier = new CharactersClassifier(); - String str = "a bc\t1\n2\r3efgaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - - // when - JsonCharacterBlock block = classify(classifier, str); - - // then - assertThat(block.whitespace()).isEqualTo(0x152); - assertThat(block.op()).isEqualTo(0); - } - - private JsonCharacterBlock classify(CharactersClassifier classifier, String str) { - return switch (StructuralIndexer.N_CHUNKS) { - case 1 -> classifier.classify(chunk(str, 0)); - case 2 -> classifier.classify(chunk(str, 0), chunk(str, 1)); - default -> throw new RuntimeException("Unsupported chunk count: " + StructuralIndexer.N_CHUNKS); - }; - } - -} diff --git a/src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java b/src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java new file mode 100644 index 0000000..0315055 --- /dev/null +++ b/src/test/java/org/simdjson/FloatingPointNumberSchemaBasedParsingTest.java @@ -0,0 +1,1296 @@ +package org.simdjson; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithByteArrayField; +import org.simdjson.schemas.RecordWithDoubleArrayField; +import org.simdjson.schemas.RecordWithDoubleField; +import org.simdjson.schemas.RecordWithDoubleListField; +import org.simdjson.schemas.RecordWithFloatArrayField; +import org.simdjson.schemas.RecordWithFloatField; +import org.simdjson.schemas.RecordWithFloatListField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveDoubleArrayField; +import org.simdjson.schemas.RecordWithPrimitiveDoubleField; +import org.simdjson.schemas.RecordWithPrimitiveFloatArrayField; +import org.simdjson.schemas.RecordWithPrimitiveFloatField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.CartesianTestCsv; +import org.simdjson.testutils.CartesianTestCsvRow; +import org.simdjson.testutils.FloatingPointNumberTestFile; +import org.simdjson.testutils.FloatingPointNumberTestFile.FloatingPointNumberTestCase; +import org.simdjson.testutils.FloatingPointNumberTestFilesSource; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.io.IOException; +import java.math.BigDecimal; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class FloatingPointNumberSchemaBasedParsingTest { + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = {Float.class, float.class, Double.class, double.class}, nulls = false) + public void floatingPointNumberAtRoot(String numberStr, Class schema, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object number = parser.parse(json, json.length, schema); + + // then + assertThat(number).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {Float.class, Double.class}) + public void nullAtRootWhenFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Object value = parser.parse(json, json.length, schema); + + // then + assertThat(value).isNull(); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, double.class}) + public void nullAtRootWhenPrimitiveFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithFloatField.class, + RecordWithPrimitiveFloatField.class, + RecordWithDoubleField.class, + RecordWithPrimitiveDoubleField.class + }, + nulls = false + ) + public void floatingPointNumberAtObjectField(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {RecordWithFloatField.class, RecordWithDoubleField.class}) + public void nullAtObjectFieldWhenFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).extracting("field").isNull(); + } + + @ParameterizedTest + @ValueSource(classes = {RecordWithPrimitiveFloatField.class, RecordWithPrimitiveDoubleField.class}) + public void nullAtObjectFieldWhenPrimitiveFloatingPointNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = {Float[].class, float[].class, Double[].class, double[].class}, nulls = false) + public void arrayOfFloatingPointNumbersAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = {Float[].class, Double[].class}, nulls = true) + public void arrayOfFloatingPointNumbersAndNullsAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, double.class}) + public void arrayOfPrimitiveFloatingPointNumbersAndNullsAtRoot(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[-1.1, 1.0, 0.0, null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithFloatArrayField.class, + RecordWithPrimitiveFloatArrayField.class, + RecordWithDoubleArrayField.class, + RecordWithPrimitiveDoubleArrayField.class + }, + nulls = false + ) + public void objectWithArrayOfFloatingPointNumbers(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithFloatArrayField.class, + RecordWithFloatListField.class, + RecordWithDoubleArrayField.class, + RecordWithDoubleListField.class + }, + nulls = true + ) + public void objectWithArrayOfFloatingPointNumbersWithNulls(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @CartesianTest + public void leadingZerosAreNotAllowed( + @Values(strings = {"01.0", "-01.0", "000.0", "-000.0", "012e34"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Leading zeroes are not allowed."); + } + + @CartesianTest + public void minusHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"-a123.0", "--123.0", "-+123.0", "-.123", "-e123",}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToBeFollowedByStructuralCharacterOrWhitespace( + @Values(strings = {"-1.0-2", "1.0a", "12E12.12", "1e2e3"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); + } + + @CartesianTest + public void decimalPointHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"123.", "1..1", "1.e1", "1.E1"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Decimal point has to be followed by a digit."); + } + + @CartesianTest + public void exponentIndicatorHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"1e+-2", "1E+-2", "1e--23", "1E--23", "1ea", "1Ea", "1e", "1E", "1e+", "1E+"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Exponent indicator has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, Float.class, Double.class, double.class}) + public void startingWithPlusIsNotAllowed(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("+1.0"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToStartWithMinusOrDigit( + @Values(strings = {"a123", "a-123"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void positiveDoubleZero( + @Values(strings = { + "0.0", + "2251799813685248e-342", + "9999999999999999999e-343", + "1.23e-341", + "123e-343", + "0.0e-999", + "0e9999999999999999999999999999", + "18446744073709551615e-343", + "0.099999999999999999999e-323", + "0.99999999999999999999e-324", + "0.9999999999999999999e-324" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0.0d); + } + + @CartesianTest + public void negativeDoubleZero( + @Values(strings = { + "-0.0", + "-2251799813685248e-342", + "-9999999999999999999e-343", + "-1.23e-341", + "-123e-343", + "-0.0e-999", + "-0e9999999999999999999999999999", + "-18446744073709551615e-343", + "-0.099999999999999999999e-323", + "-0.99999999999999999999e-324", + "-0.9999999999999999999e-324" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(-0.0d); + } + + @CartesianTest + public void positiveFloatZero( + @Values(strings = { + "0.0", + "1e-58", + "1e-64", + "0.0e-999", + "0e9999999999999999999999999999", + "18446744073709551615e-66", + "0.99999999999999999999e-46" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0.0f); + } + + @CartesianTest + public void negativeFloatZero( + @Values(strings = { + "-0.0", + "-1e-58", + "-1e-64", + "-0.0e-999", + "-0e9999999999999999999999999999", + "-18446744073709551615e-66", + "-0.99999999999999999999e-46" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(-0.0f); + } + + @CartesianTest + public void exactDouble( + @CartesianTestCsv({ + "9007199254740991.0, 9007199254740991", + "9007199254740992.0, 9007199254740992", + "18014398509481988.0, 18014398509481988" + }) CartesianTestCsvRow row, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValueAsDouble(1)); + } + + @CartesianTest + public void exactFloat( + @CartesianTestCsv({ + "16777215.0, 16777215", + "16777216.0, 16777216", + "33554436.0, 33554436" + }) CartesianTestCsvRow row, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValueAsFloat(1)); + } + + @CartesianTest + public void minNormalDouble( + @Values(strings = { + "2.2250738585072016e-308", + "2.2250738585072015e-308", + "2.2250738585072014e-308", + "2.2250738585072013e-308", + "2.2250738585072012e-308" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p-1022d); + } + + @CartesianTest + public void minNormalFloat( + @Values(strings = { + "1.17549433E-38", + "1.17549434E-38", + "1.17549435E-38", + "1.17549436E-38", + "1.17549437E-38" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p-126f); + } + + @CartesianTest + public void maxSubnormalDouble( + @Values(strings = { + "2.2250738585072011e-308", + "2.2250738585072010e-308", + "2.2250738585072009e-308", + "2.2250738585072008e-308", + "2.2250738585072007e-308", + "0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.fffffffffffffp-1022d); + } + + @CartesianTest + public void maxSubnormalFloat( + @Values(strings = { + "1.1754942e-38", + "0.0000000000000000000000000000000000000117549421069244107548702944485", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.fffffep-126f); + } + + @CartesianTest + public void minSubnormalDouble( + @Values(strings = { + "3e-324", + "4.9e-324", + "4.9406564584124654e-324", + "4.94065645841246544176568792868e-324", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.0000000000001p-1022d); + } + + @CartesianTest + public void minSubnormalFloat( + @Values(strings = { + "1e-45", + "1.4e-45", + "1.4012984643248170e-45", + "1.40129846432481707092372958329e-45", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x0.000002p-126f); + } + + @CartesianTest + public void maxDouble( + @Values(strings = { + "1.7976931348623157e308", + "1.7976931348623158e308", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.fffffffffffffp+1023d); + } + + @CartesianTest + public void maxFloat( + @Values(strings = { + "3.4028234664e38", + "3.4028234665e38", + "3.4028234666e38", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.fffffep+127f); + } + + @CartesianTest + public void positiveDoubleInfinity( + @Values(strings = { + "1.9e308", + "1.8e308", + "1234456789012345678901234567890e9999999999999999999999999999", + "1.832312213213213232132132143451234453123412321321312e308", + "2139879401095466344511101915470454744.9813888656856943E+272", + "2e30000000000000000", + "2e3000", + "1234456789012345678901234567890e999999999999999999999999999", + "1.7976931348623159e308", + "1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.POSITIVE_INFINITY); + } + + @CartesianTest + public void negativeDoubleInfinity( + @Values(strings = { + "-1.9e308", + "-1.8e308", + "-1234456789012345678901234567890e9999999999999999999999999999", + "-1.832312213213213232132132143451234453123412321321312e308", + "-2139879401095466344511101915470454744.9813888656856943E+272", + "-2e30000000000000000", + "-2e3000", + "-1234456789012345678901234567890e999999999999999999999999999", + "-1.7976931348623159e308", + "-1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.NEGATIVE_INFINITY); + } + + @CartesianTest + public void positiveFloatInfinity( + @Values(strings = { + "1.9e39", + "1.8e39", + "1.9e40", + "1.8e40", + "1234456789012345678901234567890e9999999999999999999999999999", + "3.532312213213213232132132143451234453123412321321312e38", + "2139879401095466344511101915470454744.9813888656856943E+3", + "2e30000000000000000", + "2e3000", + "3.4028236e38", + "1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.POSITIVE_INFINITY); + } + + @CartesianTest + public void negativeFloatInfinity( + @Values(strings = { + "-1.9e39", + "-1.8e39", + "-1.9e40", + "-1.8e40", + "-1234456789012345678901234567890e9999999999999999999999999999", + "-3.532312213213213232132132143451234453123412321321312e38", + "-2139879401095466344511101915470454744.9813888656856943E+3", + "-2e30000000000000000", + "-2e3000", + "-3.4028236e38", + "-1438456663141390273526118207642235581183227845246331231162636653790368152091394196930365828634687637948157940776599182791387527135353034738357134110310609455693900824193549772792016543182680519740580354365467985440183598701312257624545562331397018329928613196125590274187720073914818062530830316533158098624984118889298281371812288789537310599037529113415438738954894752124724983067241108764488346454376699018673078404751121414804937224240805993123816932326223683090770561597570457793932985826162604255884529134126396282202126526253389383421806727954588525596114379801269094096329805054803089299736996870951258573010877404407451953846698609198213926882692078557033228265259305481198526059813164469187586693257335779522020407645498684263339921905227556616698129967412891282231685504660671277927198290009824680186319750978665734576683784255802269708917361719466043175201158849097881370477111850171579869056016061666173029059588433776015644439705050377554277696143928278093453792803846252715966016733222646442382892123940052441346822429721593884378212558701004356924243030059517489346646577724622498919752597382095222500311124181823512251071356181769376577651390028297796156208815375089159128394945710515861334486267101797497111125909272505194792870889617179758703442608016143343262159998149700606597792535574457560429226974273443630323818747730771316763398572110874959981923732463076884528677392654150010269822239401993427482376513231389212353583573566376915572650916866553612366187378959554983566712767093372906030188976220169058025354973622211666504549316958271880975697143546564469806791358707318873075708383345004090151974068325838177531266954177406661392229801349994695941509935655355652985723782153570084089560139142231.738475042362596875449154552392299548947138162081694168675340677843807613129780449323363759027012972466987370921816813162658754726545121090545507240267000456594786540949605260722461937870630634874991729398208026467698131898691830012167897399682179601734569071423681e-733" + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.NEGATIVE_INFINITY); + } + + @CartesianTest + public void roundingOverflowForDouble( + @Values(strings = { + // In this case the binary significand after rounding up is equal to 9007199254740992 (2^53), + // which is more than we can store (2^53 - 1). + "7.2057594037927933e16", + "72057594037927933.0000000000000000", + }) String jsonStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p+56d); + } + + @CartesianTest + public void roundingOverflowForFloat( + @Values(strings = { + // In this case the binary significand after rounding up is equal to 16777216 (2^24), + // which is more than we can store (2^24 - 1). + "7.2057594e16", + "72057594000000000.0000000", + }) String jsonStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0x1.0p+56f); + } + + @CartesianTest + public void exponentWithMoreDigitsThanLongCanAccommodateAndLeadingZeros( + @CartesianTestCsv({ + "1e000000000000000000001, 10.0", + "1e-000000000000000000001, 0.1" + }) CartesianTestCsvRow row, + @Values(classes = {Float.class, float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValue(1, expectedType)); + } + + @CartesianTest + public void exponentWithMoreDigitsThanLongCanAccommodate( + @CartesianTestCsv({ + "0e999999999999999999999, 0.0", + "0e-999999999999999999999, 0.0", + "1e999999999999999999999, Infinity", + "1e-999999999999999999999, 0.0", + "9999999999999999999999999999999999999999e-999999999999999999999, 0.0", + "0.9999999999999999999999999999999999999999e999999999999999999999, Infinity" + }) CartesianTestCsvRow row, + @Values(classes = {Float.class, float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(row.getValueAsString(0)); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(row.getValue(1, expectedType)); + } + + @CartesianTest + public void doubleRoundTiesToEven( + @Values(strings = { + "2251799813685803.75", + "4503599627370497.5", + "4503599627475353.5", + "9007199254740993.0", + "4503599627370496.5", + "4503599627475352.5", + "2251799813685248.25", + "2.22507385850720212418870147920222032907240528279439037814303133837435107319244194686754406432563881851382188218502438069999947733013005649884107791928741341929297200970481951993067993290969042784064731682041565926728632933630474670123316852983422152744517260835859654566319282835244787787799894310779783833699159288594555213714181128458251145584319223079897504395086859412457230891738946169368372321191373658977977723286698840356390251044443035457396733706583981055420456693824658413747607155981176573877626747665912387199931904006317334709003012790188175203447190250028061277777916798391090578584006464715943810511489154282775041174682194133952466682503431306181587829379004205392375072083366693241580002758391118854188641513168478436313080237596295773983001708984375e-308", + "1125899906842624.125", + "1125899906842901.875", + "9007199254740993.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + }) String numberStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.parseDouble(numberStr)); + } + + @CartesianTest + public void doubleRoundUpToNearest( + @Values(strings = { + "2251799813685803.15", + "4503599627370497.2", + "45035996.273704985", + "4503599627475353.2", + "9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709", + "1.0000000000000006661338147750939242541790008544921875", + "-92666518056446206563e3", + "90054602635948575728e72", + "7.0420557077594588669468784357561207962098443483187940792729600000e59", + }) String numberStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.parseDouble(numberStr)); + } + + @CartesianTest + public void doubleRoundDownToNearest( + @Values(strings = { + "2251799813685803.15", + "4503599627370497.2", + "45035996.273704985", + "4503599627475353.2", + "9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709", + "1.0000000000000006661338147750939242541790008544921875", + "-92666518056446206563e3", + "90054602635948575728e72", + "7.0420557077594588669468784357561207962098443483187940792729600000e59", + }) String numberStr, + @Values(classes = {Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Double.parseDouble(numberStr)); + } + + @CartesianTest + public void floatRoundTiesToEven( + @Values(strings = { + "1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125e-38", + "30219.0830078125", + "16252921.5", + "5322519.25", + "3900245.875", + "1510988.3125", + "782262.28125", + "328381.484375", + "156782.0703125", + "85003.24609375", + "17419.6494140625", + "15498.36376953125", + "6318.580322265625", + "2525.2840576171875", + "16407.9462890625", + "8388614.5" + }) String numberStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.parseFloat(numberStr)); + } + + @CartesianTest + public void floatRoundUpToNearest( + @Values(strings = { + "1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", + "1.1754943508e-38", + "16252921.5", + "3900245.875", + "328381.484375", + "85003.24609375", + "2525.2840576171875", + "936.3702087402344", + "411.88682556152344", + "206.50310516357422", + "124.16878890991211", + "50.811574935913086", + "13.91745138168335", + "2.687217116355896", + "1.1877630352973938", + "0.09289376810193062", + "0.03706067614257336", + "0.028068351559340954", + "0.012114629615098238", + "0.004221370676532388", + "0.002153817447833717", + "0.0015924838953651488", + "0.00036393293703440577", + "1.1754947011469036e-38", + "7.0064923216240854e-46", + "4.7019774032891500318749461488889827112746622270883500860350068251e-38", + "3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679", + }) String numberStr, + @Values(classes = {Float.class, float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.parseFloat(numberStr)); + } + + @CartesianTest + public void floatRoundDownToNearest( + @Values(strings = { + "1.1754941406275178592461758986628081843312458647327962400313859427181746759860647699724722770042717456817626953125", + "30219.0830078125", + "5322519.25", + "1510988.3125", + "782262.28125", + "156782.0703125", + "17419.6494140625", + "15498.36376953125", + "6318.580322265625", + "1370.9265747070312", + "17.486443519592285", + "7.5464513301849365", + "0.7622503340244293", + "0.30531780421733856", + "0.21791061013936996", + "0.0008602388261351734", + "0.00013746770127909258", + "16407.9462890625", + "8388614.5", + "2.3509887016445750159374730744444913556373311135441750430175034126e-38", + "3.4028234664e38", + "3.4028234665e38", + "3.4028234666e38", + "0.000000000000000000000000000000000000011754943508222875079687365372222456778186655567720875215087517062784172594547271728515625", + "0.00000000000000000000000000000000000000000000140129846432481707092372958328991613128026194187651577175706828388979108268586060148663818836212158203125", + "0.00000000000000000000000000000000000002350988561514728583455765982071533026645717985517980855365926236850006129930346077117064851336181163787841796875", + "0.00000000000000000000000000000000000001175494210692441075487029444849287348827052428745893333857174530571588870475618904265502351336181163787841796875", + }) String numberStr, + @Values(classes = {Float.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numberStr); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(Float.parseFloat(numberStr)); + } + + @CartesianTest + public void moreValuesThanOneFloatingPointNumberAtRoot( + @Values(strings = {"123.0,", "123.0{}", "1.0:"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigDecimal.class, value = "Class: java.math.BigDecimal has more than one constructor."), + @MapEntry(classKey = Number.class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForFloatingPointNumberAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("123.0"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithByteArrayField.class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForFloatingPointNumberAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 123.0}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = {float[].class, Float[].class, double[].class, Double[].class}) + public void arrayOfFloatingPointNumbersMixedWithOtherTypesAtRoot(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1.0, -1.0, true]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithFloatField.class, + RecordWithPrimitiveFloatField.class, + RecordWithDoubleField.class, + RecordWithPrimitiveDoubleField.class + }) + public void arrayOfFloatingPointNumbersMixedWithOtherTypesAtObjectField(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1.0, -1.0, true]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigDecimal[].class, value = "Class: java.math.BigDecimal has more than one constructor."), + @MapEntry(classKey = Number[].class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte[][].class, value = "Expected '[' but got: '1'."), + @MapEntry(classKey = Byte[][].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForArrayOfFloatingPointNumbersAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1.0, -1.0, 0]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForArrayOfFloatingPointNumbersAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1.0, -1.0, 0.0]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForPrimitiveDouble(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + double value = parser.parse(json, json.length, double.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%n expected: %s%n was: %s", testCase.line(), testCase.expectedDouble(), value) + .isEqualTo(testCase.expectedDouble()); + } + } + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForDouble(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + Double value = parser.parse(json, json.length, Double.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%nexpected: %s%nwas: %s", testCase.line(), testCase.expectedDouble(), value) + .isEqualTo(testCase.expectedDouble()); + } + } + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForPrimitiveFloat(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + float value = parser.parse(json, json.length, float.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%n expected: %s%n was: %s", testCase.line(), testCase.expectedFloat(), value) + .isEqualTo(testCase.expectedFloat()); + } + } + } + + @ParameterizedTest + @FloatingPointNumberTestFilesSource + public void testFilesForFloat(FloatingPointNumberTestFile file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); + + // when + Float value = parser.parse(json, json.length, Float.class); + + // then + assertThat(value) + .withFailMessage("%nline: %d%nexpected: %s%nwas: %s", testCase.line(), testCase.expectedFloat(), value) + .isEqualTo(testCase.expectedFloat()); + } + } + } + + @CartesianTest + public void integralNumberAsFloatingPointNumber( + @Values(strings = {"123", "0", "-123"}) String jsonStr, + @Values(classes = {float.class, Float.class, Double.class, double.class}) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid floating-point number. Fraction or exponent part is missing."); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, Float.class, double.class, Double.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {Float.class, Double.class}) + public void passedLengthSmallerThanNullLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @ParameterizedTest + @ValueSource(classes = {float.class, Float.class, double.class, Double.class}) + public void passedLengthSmallerThanNumberLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("1.234"); + + // when + Object value = parser.parse(json, 3, expectedType); + + // then + assertThat(value.toString()).isEqualTo("1.2"); + } +} diff --git a/src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java b/src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java new file mode 100644 index 0000000..12e0fb1 --- /dev/null +++ b/src/test/java/org/simdjson/IntegralNumberSchemaBasedParsingTest.java @@ -0,0 +1,778 @@ +package org.simdjson; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithByteArrayField; +import org.simdjson.schemas.RecordWithByteField; +import org.simdjson.schemas.RecordWithByteListField; +import org.simdjson.schemas.RecordWithIntegerArrayField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithIntegerListField; +import org.simdjson.schemas.RecordWithLongArrayField; +import org.simdjson.schemas.RecordWithLongField; +import org.simdjson.schemas.RecordWithLongListField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveByteArrayField; +import org.simdjson.schemas.RecordWithPrimitiveByteField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerArrayField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveLongArrayField; +import org.simdjson.schemas.RecordWithPrimitiveLongField; +import org.simdjson.schemas.RecordWithPrimitiveShortArrayField; +import org.simdjson.schemas.RecordWithPrimitiveShortField; +import org.simdjson.schemas.RecordWithShortArrayField; +import org.simdjson.schemas.RecordWithShortField; +import org.simdjson.schemas.RecordWithShortListField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.RandomIntegralNumberSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.math.BigInteger; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class IntegralNumberSchemaBasedParsingTest { + + @ParameterizedTest + @RandomIntegralNumberSource( + classes = { + Byte.class, + byte.class, + Short.class, + short.class, + Integer.class, + int.class, + Long.class, + long.class + }, + includeMinMax = true + ) + public void integralNumberAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object value = parser.parse(json, json.length, schema); + + // then + assertThat(value).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {Byte.class, Short.class, Integer.class, Long.class}) + public void nullAtRootWhenIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Object value = parser.parse(json, json.length, schema); + + // then + assertThat(value).isNull(); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, short.class, int.class, long.class}) + public void nullAtRootWhenPrimitiveIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @RandomIntegralNumberSource( + classes = { + RecordWithByteField.class, + RecordWithPrimitiveByteField.class, + RecordWithShortField.class, + RecordWithPrimitiveShortField.class, + RecordWithIntegerField.class, + RecordWithPrimitiveIntegerField.class, + RecordWithLongField.class, + RecordWithPrimitiveLongField.class + }, + includeMinMax = true + ) + public void integralNumberAtObjectField(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithByteField.class, + RecordWithShortField.class, + RecordWithIntegerField.class, + RecordWithLongField.class + }) + public void nullAtObjectFieldWhenIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).extracting("field").isNull(); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithPrimitiveByteField.class, + RecordWithPrimitiveShortField.class, + RecordWithPrimitiveIntegerField.class, + RecordWithPrimitiveLongField.class + }) + public void nullAtObjectFieldWhenPrimitiveIntegralNumberIsExpected(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + Byte[].class, + byte[].class, + Short[].class, + short[].class, + Integer[].class, + int[].class, + Long[].class, + long[].class + }, + nulls = false + ) + public void arrayOfIntegralNumbersAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + Byte[].class, + Short[].class, + Integer[].class, + Long[].class + }, + nulls = true + ) + public void arrayOfIntegralNumbersAndNullsAtRoot(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object array = parser.parse(json, json.length, schema); + + // then + assertThat(array.getClass().isArray()).isTrue(); + assertThat(array).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, short.class, int.class, long.class}) + public void arrayOfPrimitiveIntegralNumbersAndNullsAtRoot(Class schema) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[-128, 1, 127, null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, schema)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithByteArrayField.class, + RecordWithPrimitiveByteArrayField.class, + RecordWithShortArrayField.class, + RecordWithPrimitiveShortArrayField.class, + RecordWithIntegerArrayField.class, + RecordWithPrimitiveIntegerArrayField.class, + RecordWithLongArrayField.class, + RecordWithPrimitiveLongArrayField.class + }, + nulls = false + ) + public void objectWithArrayOfIntegralNumbers(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource( + schemas = { + RecordWithByteArrayField.class, + RecordWithByteListField.class, + RecordWithShortArrayField.class, + RecordWithShortListField.class, + RecordWithIntegerArrayField.class, + RecordWithIntegerListField.class, + RecordWithLongArrayField.class, + RecordWithLongListField.class + }, + nulls = true + ) + public void objectWithArrayOfIntegralNumbersWithNulls(Class schema, String jsonStr, Object expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Object object = parser.parse(json, json.length, schema); + + // then + assertThat(object).usingRecursiveComparison().isEqualTo(expected); + } + + @CartesianTest + public void outOfPrimitiveByteRange( + @Values(classes = {byte.class, Byte.class}) Class expectedType, + @Values(strings = { + "-9223372036854775809", + "-129", + "128", + "9223372036854775808" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of byte range ([-128, 127])."); + } + + @CartesianTest + public void outOfPrimitiveShortRange( + @Values(classes = {short.class, Short.class}) Class expectedType, + @Values(strings = { + "-9223372036854775809", + "-32769", + "32768", + "9223372036854775808" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of short range ([-32768, 32767])."); + } + + @CartesianTest + public void outOfPrimitiveIntegerRange( + @Values(classes = {int.class, Integer.class}) Class expectedType, + @Values(strings = { + "-9223372036854775809", + "-2147483649", + "2147483648", + "9223372036854775808" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of int range ([-2147483648, 2147483647])."); + } + + @CartesianTest + public void outOfPrimitiveLongRange( + @Values(classes = {long.class, Long.class}) Class expectedType, + @Values(strings = { + "9223372036854775808", + "9999999999999999999", + "10000000000000000000", + "-9223372036854775809", + "-9999999999999999999", + "-10000000000000000000" + }) String numStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(numStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number value is out of long range ([-9223372036854775808, 9223372036854775807])."); + } + + @CartesianTest + public void leadingZerosAreNotAllowed( + @Values(strings = {"01", "-01", "000", "-000"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Leading zeroes are not allowed."); + } + + @CartesianTest + public void minusHasToBeFollowedByAtLeastOneDigit( + @Values(strings = {"-a123", "--123", "-+123"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToBeFollowedByStructuralCharacterOrWhitespace( + @Values(strings = {"-1-2", "1a"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); + } + + @CartesianTest + public void moreValuesThanOneIntegralNumberAtRoot( + @Values(strings = {"123,", "123{}", "1:"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @CartesianTest + public void floatingPointNumberAsIntegralNumber( + @Values(strings = {"1.0", "-1.0", "1e1", "1.9e1"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigInteger.class, value = "Class: java.math.BigInteger has more than one constructor."), + @MapEntry(classKey = Number.class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForIntegralNumberAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("123"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithByteArrayField.class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForIntegralNumberAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 123}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = { + byte[].class, + Byte[].class, + short[].class, + Short[].class, + int[].class, + Integer[].class, + long[].class, + Long[].class + }) + public void arrayOfIntegralNumbersMixedWithOtherTypesAtRoot(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, -1, true]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithByteArrayField.class, + RecordWithPrimitiveByteArrayField.class, + RecordWithByteListField.class, + RecordWithShortArrayField.class, + RecordWithPrimitiveShortArrayField.class, + RecordWithShortListField.class, + RecordWithIntegerArrayField.class, + RecordWithPrimitiveIntegerArrayField.class, + RecordWithIntegerListField.class, + RecordWithLongArrayField.class, + RecordWithPrimitiveLongArrayField.class, + RecordWithLongListField.class + }) + public void arrayOfIntegralNumbersMixedWithOtherTypesAtObjectField(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1, -1, true]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = BigInteger[].class, value = "Class: java.math.BigInteger has more than one constructor."), + @MapEntry(classKey = Number[].class, value = "Unsupported class: java.lang.Number. Interfaces and abstract classes are not supported."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = byte[][].class, value = "Expected '[' but got: '1'."), + @MapEntry(classKey = Byte[][].class, value = "Expected '[' but got: '1'.") + }) + public void mismatchedTypeForArrayOfIntegralNumbersAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[1, -1, 0]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = byte[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = byte.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Byte.class, value = "Invalid number. Minus has to be followed by a digit.") + }) + public void mismatchedTypeForArrayOfIntegralNumbersAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1, -1, 0]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) + public void startingWithPlusIsNotAllowed(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("+1"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @CartesianTest + public void numberHasToStartWithMinusOrDigit( + @Values(strings = {"a123", "a-123"}) String jsonStr, + @Values(classes = { + byte.class, + Byte.class, + short.class, + Short.class, + int.class, + Integer.class, + long.class, + Long.class + }) Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, Byte.class}) + public void minusZeroIsTreatedAsByteZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo((byte) 0); + } + + @ParameterizedTest + @ValueSource(classes = {short.class, Short.class}) + public void minusZeroIsTreatedAsShortZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo((short) 0); + } + + @ParameterizedTest + @ValueSource(classes = {int.class, Integer.class}) + public void minusZeroIsTreatedAsIntegerZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0); + } + + @ParameterizedTest + @ValueSource(classes = {long.class, Long.class}) + public void minusZeroIsTreatedAsLongZero(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("-0"); + + // when + Object value = parser.parse(json, json.length, expectedType); + + // then + assertThat(value).isEqualTo(0L); + } + + @ParameterizedTest + @ValueSource(classes = {Byte.class, byte.class, Short.class, short.class, Integer.class, int.class, Long.class, long.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {Byte.class, Short.class, Integer.class, Long.class}) + public void passedLengthSmallerThanNullLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @ParameterizedTest + @ValueSource(classes = {byte.class, Byte.class, short.class, Short.class, int.class, Integer.class, long.class, Long.class}) + public void passedLengthSmallerThanNumberLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("1234"); + + // when + Object value = parser.parse(json, 2, expectedType); + + // then + assertThat(value.toString()).isEqualTo("12"); + } +} diff --git a/src/test/java/org/simdjson/JsonStringScannerTest.java b/src/test/java/org/simdjson/JsonStringScannerTest.java deleted file mode 100644 index 9c23440..0000000 --- a/src/test/java/org/simdjson/JsonStringScannerTest.java +++ /dev/null @@ -1,141 +0,0 @@ -package org.simdjson; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.simdjson.TestUtils.chunk; -import static org.simdjson.TestUtils.padWithSpaces; - -public class JsonStringScannerTest { - - @Test - public void testUnquotedString() { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String str = padWithSpaces("abc 123"); - - // when - JsonStringBlock block = next(stringScanner, str); - - // then - assertThat(block.quote()).isEqualTo(0); - } - - @Test - public void testQuotedString() { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String str = padWithSpaces("\"abc 123\""); - - // when - JsonStringBlock block = next(stringScanner, str); - - // then - assertThat(block.quote()).isEqualTo(0x101); - } - - @Test - public void testStartingQuotes() { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String str = padWithSpaces("\"abc 123"); - - // when - JsonStringBlock block = next(stringScanner, str); - - // then - assertThat(block.quote()).isEqualTo(0x1); - } - - @Test - public void testQuotedStringSpanningMultipleBlocks() { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String str0 = "abc \"a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9"; - String str1 = " c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 d0 d1 d2 d3 d4 d5 d6 d7 d8 d\" def"; - - // when - JsonStringBlock firstBlock = next(stringScanner, str0); - JsonStringBlock secondBlock = next(stringScanner, str1); - - // then - assertThat(firstBlock.quote()).isEqualTo(0x10); - assertThat(secondBlock.quote()).isEqualTo(0x800000000000000L); - } - - @ParameterizedTest - @ValueSource(strings = { - "abc \\\"123", // abc \"123 - "abc \\\\\\\"123" // abc \\\"123 - }) - public void testEscapedQuote(String str) { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String padded = padWithSpaces(str); - - // when - JsonStringBlock block = next(stringScanner, padded); - - // then - assertThat(block.quote()).isEqualTo(0); - } - - @Test - public void testEscapedQuoteSpanningMultipleBlocks() { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String str0 = "a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 c0 \\"; - String str1 = padWithSpaces("\"def"); - - // when - JsonStringBlock firstBlock = next(stringScanner, str0); - JsonStringBlock secondBlock = next(stringScanner, str1); - - // then - assertThat(firstBlock.quote()).isEqualTo(0); - assertThat(secondBlock.quote()).isEqualTo(0); - } - - @ParameterizedTest - @ValueSource(strings = { - "abc \\\\\"123", // abc \\"123 - "abc \\\\\\\\\"123" // abc \\\\"123 - }) - public void testUnescapedQuote(String str) { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String padded = padWithSpaces(str); - - // when - JsonStringBlock block = next(stringScanner, padded); - - // then - assertThat(block.quote()).isEqualTo(0x1L << str.indexOf('"')); - } - - @Test - public void testUnescapedQuoteSpanningMultipleBlocks() { - // given - JsonStringScanner stringScanner = new JsonStringScanner(); - String str0 = padWithSpaces("a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 c0 \\"); - String str1 = padWithSpaces("\\\"abc"); - - // when - JsonStringBlock firstBlock = next(stringScanner, str0); - JsonStringBlock secondBlock = next(stringScanner, str1); - - // then - assertThat(firstBlock.quote()).isEqualTo(0); - assertThat(secondBlock.quote()).isEqualTo(0x2); - } - - private JsonStringBlock next(JsonStringScanner scanner, String str) { - return switch (StructuralIndexer.N_CHUNKS) { - case 1 -> scanner.next(chunk(str, 0)); - case 2 -> scanner.next(chunk(str, 0), chunk(str, 1)); - default -> throw new RuntimeException("Unsupported chunk count: " + StructuralIndexer.N_CHUNKS); - }; - } -} diff --git a/src/test/java/org/simdjson/NullParsingTest.java b/src/test/java/org/simdjson/NullParsingTest.java new file mode 100644 index 0000000..2345f04 --- /dev/null +++ b/src/test/java/org/simdjson/NullParsingTest.java @@ -0,0 +1,105 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.Iterator; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class NullParsingTest { + + @Test + public void nullValueAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isNull()).isTrue(); + } + + @ParameterizedTest + @ValueSource(strings = {"[n]", "{\"a\":n}"}) + public void invalidNull(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at " + jsonStr.indexOf('n') + ". Expected 'null'."); + } + + @Test + public void moreThanNullAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null,"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"nulll", "nul"}) + public void invalidNullAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @Test + public void arrayOfNulls() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[null, null, null]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + for (int i = 0; i < 3; i++) { + assertThat(it.hasNext()).isTrue(); + JsonValue element = it.next(); + assertThat(element.isNull()).isTrue(); + } + assertThat(it.hasNext()).isFalse(); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } +} diff --git a/src/test/java/org/simdjson/NumberParsingTest.java b/src/test/java/org/simdjson/NumberParsingTest.java index e92aca3..2f7b64e 100644 --- a/src/test/java/org/simdjson/NumberParsingTest.java +++ b/src/test/java/org/simdjson/NumberParsingTest.java @@ -1,26 +1,67 @@ package org.simdjson; +import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; -import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.testutils.FloatingPointNumberTestFile; +import org.simdjson.testutils.FloatingPointNumberTestFile.FloatingPointNumberTestCase; +import org.simdjson.testutils.FloatingPointNumberTestFilesSource; +import org.simdjson.testutils.RandomIntegralNumberSource; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; import java.io.IOException; -import java.nio.file.Path; -import java.util.List; -import java.util.stream.Stream; +import java.util.Iterator; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.simdjson.JsonValueAssert.assertThat; -import static org.simdjson.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; +import static org.simdjson.testutils.TestUtils.toUtf8; public class NumberParsingTest { + @ParameterizedTest + @RandomIntegralNumberSource(classes = long.class, includeMinMax = true) + public void longAtRoot(String longStr, long expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(longStr); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(strings = {"1.1", "-1.1", "1e1", "1E1", "-1e1", "-1E1", "1e-1", "1E-1", "1.1e1", "1.1E1"}) + public void doubleAtRoot(String doubleStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(doubleStr); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(Double.parseDouble(doubleStr)); + } + + @ParameterizedTest + @ValueSource(strings = {"1,", "1.1,"}) + public void invalidNumbersAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + @ParameterizedTest @ValueSource(strings = { "123.", @@ -37,7 +78,8 @@ public void decimalPointHasToBeFollowedByAtLeastOneDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Decimal point has to be followed by a digit."); + assertThat(ex) + .hasMessage("Invalid number. Decimal point has to be followed by a digit."); } @ParameterizedTest @@ -62,7 +104,8 @@ public void exponentIndicatorHasToBeFollowedByAtLeastOneDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Exponent indicator has to be followed by a digit."); + assertThat(ex) + .hasMessage("Invalid number. Exponent indicator has to be followed by a digit."); } @ParameterizedTest @@ -83,7 +126,8 @@ public void leadingZerosAreNotAllowed(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Leading zeroes are not allowed."); + assertThat(ex) + .hasMessage("Invalid number. Leading zeroes are not allowed."); } @ParameterizedTest @@ -105,7 +149,8 @@ public void minusHasToBeFollowedByAtLeastOneDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid number. Minus has to be followed by a digit."); + assertThat(ex) + .hasMessage("Invalid number. Minus has to be followed by a digit."); } @ParameterizedTest @@ -125,7 +170,8 @@ public void numberHasToBeFollowedByStructuralCharacterOrWhitespace(String input) JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Number has to be followed by a structural character or whitespace."); + assertThat(ex) + .hasMessage("Number has to be followed by a structural character or whitespace."); } @Test @@ -151,7 +197,8 @@ public void startingWithPlusIsNotAllowed() { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); + assertThat(ex) + .hasMessage("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); } @ParameterizedTest @@ -170,7 +217,8 @@ public void numberHasToStartWithMinusOrDigit(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); + assertThat(ex) + .hasMessage("Unrecognized primitive. Expected: string, number, 'true', 'false' or 'null'."); } @ParameterizedTest @@ -208,7 +256,8 @@ public void outOfRangeLongIsNotAllowed(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Number value is out of long range ([-9223372036854775808, 9223372036854775807])."); + assertThat(ex) + .hasMessage("Number value is out of long range ([-9223372036854775808, 9223372036854775807])."); } @ParameterizedTest @@ -541,50 +590,57 @@ public void exactDouble(String input, double expected) { } @ParameterizedTest - @MethodSource("listTestFiles") - // This test assumes that input files are formatted as described in: https://github.com/nigeltao/parse-number-fxx-test-data - public void testFiles(File file) throws IOException { + @FloatingPointNumberTestFilesSource + public void testFiles(FloatingPointNumberTestFile file) throws IOException { // given SimdJsonParser parser = new SimdJsonParser(); - try (BufferedReader br = new BufferedReader(new FileReader(file))) { - String line; - while ((line = br.readLine()) != null) { - String[] cells = line.split(" "); - Double expected = Double.longBitsToDouble(Long.decode("0x" + cells[2])); - String input = readInputNumber(cells[3]); - byte[] json = toUtf8(input); + try (FloatingPointNumberTestFile.FloatingPointNumberTestCasesIterator it = file.iterator()) { + while (it.hasNext()) { + FloatingPointNumberTestCase testCase = it.next(); + byte[] json = toUtf8(testCase.input()); // when JsonValue value = parser.parse(json, json.length); // then - assertThat(value).isEqualTo(expected); + assertThat(value) + .withFailMessage("%nline: %d%n expected: %s%n was: %s", testCase.line(), testCase.expectedDouble(), value) + .isEqualTo(testCase.expectedDouble()); } } } - private static String readInputNumber(String input) { - boolean isDouble = input.indexOf('e') >= 0 || input.indexOf('E') >= 0 || input.indexOf('.') >= 0; - if (isDouble) { - if (input.startsWith(".")) { - input = "0" + input; - } - return input.replaceFirst("\\.[eE]", ".0e"); - } - return input + ".0"; + @Test + public void arrayOfNumbers() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[0, 1, -1, 1.1]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + Assertions.assertThat(it.hasNext()).isTrue(); + assertThat(it.next()).isEqualTo(0); + assertThat(it.next()).isEqualTo(1); + assertThat(it.next()).isEqualTo(-1); + assertThat(it.next()).isEqualTo(1.1); + Assertions.assertThat(it.hasNext()).isFalse(); } - private static List listTestFiles() throws IOException { - String testDataDir = System.getProperty("org.simdjson.testdata.dir", System.getProperty("user.dir") + "/testdata"); - File[] testFiles = Path.of(testDataDir, "parse-number-fxx-test-data", "data").toFile().listFiles(); - if (testFiles == null) { - File emptyFile = new File(testDataDir, "empty.txt"); - emptyFile.createNewFile(); - return List.of(emptyFile); - } - return Stream.of(testFiles) - .filter(File::isFile) - .toList(); + @Test + public void passedLengthSmallerThanNumberLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("1234"); + + // when + JsonValue value = parser.parse(json, 2); + + // then + assertThat(value).isEqualTo(12); } } diff --git a/src/test/java/org/simdjson/ObjectParsingTest.java b/src/test/java/org/simdjson/ObjectParsingTest.java index 36cdbbc..76bd3a0 100644 --- a/src/test/java/org/simdjson/ObjectParsingTest.java +++ b/src/test/java/org/simdjson/ObjectParsingTest.java @@ -3,11 +3,12 @@ import org.junit.jupiter.api.Test; import java.util.Iterator; +import java.util.List; import java.util.Map; -import static org.assertj.core.api.Assertions.assertThat; -import static org.simdjson.JsonValueAssert.assertThat; -import static org.simdjson.TestUtils.toUtf8; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; public class ObjectParsingTest { @@ -22,7 +23,7 @@ public void emptyObject() { // then assertThat(jsonValue.isObject()).isTrue(); - Iterator it = jsonValue.arrayIterator(); + Iterator> it = jsonValue.objectIterator(); assertThat(it.hasNext()).isFalse(); } @@ -94,4 +95,69 @@ public void nonexistentField() { assertThat(jsonValue.get("\\u20A9\\u0E3F")).isNull(); assertThat(jsonValue.get("αβ")).isNull(); } + + @Test + public void nullFieldName() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\\null: 1}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Object does not start with a key"); + } + + @Test + public void arrayOfObjects() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[{\"a\": 1}, {\"a\": 2}, {\"a\": 3}]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator arrayIterator = jsonValue.arrayIterator(); + for (int expectedValue : List.of(1, 2, 3)) { + assertThat(arrayIterator.hasNext()).isTrue(); + JsonValue object = arrayIterator.next(); + assertThat(object.isObject()).isTrue(); + JsonValue field = object.get("a"); + assertThat(field.isLong()).isTrue(); + assertThat(field.asLong()).isEqualTo(expectedValue); + } + assertThat(arrayIterator.hasNext()).isFalse(); + } + + @Test + public void emptyJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @Test + public void unclosedObjectDueToPassedLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"a\":{}}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length - 1)); + + // then + assertThat(ex) + .hasMessage("No comma between object fields"); + } } diff --git a/src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java b/src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java new file mode 100644 index 0000000..071c77d --- /dev/null +++ b/src/test/java/org/simdjson/ObjectSchemaBasedParsingTest.java @@ -0,0 +1,839 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.annotations.JsonFieldName; +import org.simdjson.schemas.ClassWithIntegerField; +import org.simdjson.schemas.ClassWithPrimitiveBooleanField; +import org.simdjson.schemas.ClassWithPrimitiveByteField; +import org.simdjson.schemas.ClassWithPrimitiveCharacterField; +import org.simdjson.schemas.ClassWithPrimitiveDoubleField; +import org.simdjson.schemas.ClassWithPrimitiveFloatField; +import org.simdjson.schemas.ClassWithPrimitiveIntegerField; +import org.simdjson.schemas.ClassWithPrimitiveLongField; +import org.simdjson.schemas.ClassWithPrimitiveShortField; +import org.simdjson.schemas.ClassWithStringField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveByteField; +import org.simdjson.schemas.RecordWithPrimitiveCharacterField; +import org.simdjson.schemas.RecordWithPrimitiveDoubleField; +import org.simdjson.schemas.RecordWithPrimitiveFloatField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveLongField; +import org.simdjson.schemas.RecordWithPrimitiveShortField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; + +import java.lang.reflect.InvocationTargetException; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class ObjectSchemaBasedParsingTest { + + @ParameterizedTest + @ValueSource(classes = { + RecordWithIntegerField.class, + ClassWithIntegerField.class, + ClassWithoutExplicitConstructor.class + }) + public void emptyObject(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{}"); + + // when + Object object = parser.parse(json, json.length, expectedType); + + // then + assertThat(object).isNotNull(); + assertThat(object).hasAllNullFieldsOrProperties(); + } + + @ParameterizedTest + @ValueSource(classes = { + RecordWithPrimitiveByteField.class, + RecordWithPrimitiveShortField.class, + RecordWithPrimitiveIntegerField.class, + RecordWithPrimitiveLongField.class, + RecordWithPrimitiveBooleanField.class, + RecordWithPrimitiveFloatField.class, + RecordWithPrimitiveDoubleField.class, + RecordWithPrimitiveCharacterField.class, + ClassWithPrimitiveByteField.class, + ClassWithPrimitiveShortField.class, + ClassWithPrimitiveIntegerField.class, + ClassWithPrimitiveLongField.class, + ClassWithPrimitiveBooleanField.class, + ClassWithPrimitiveFloatField.class, + ClassWithPrimitiveDoubleField.class, + ClassWithPrimitiveCharacterField.class + }) + public void emptyObjectWhenPrimitiveFieldsAreExpected(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{}"); + + // when + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasCauseExactlyInstanceOf(NullPointerException.class); + } + + @Test + public void nullAtRootWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + RecordWithPrimitiveByteField object = parser.parse(json, json.length, RecordWithPrimitiveByteField.class); + + // then + assertThat(object).isNull(); + } + + @Test + public void nullAtObjectFieldWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": null}"); + + // when + NestedRecordWithStringField object = parser.parse(json, json.length, NestedRecordWithStringField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.nestedField()).isNull(); + } + + @Test + public void recordWithExplicitFieldNames() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4, \"fifth_field\": 5}"); + + // when + RecordWithExplicitFieldNames object = parser.parse(json, json.length, RecordWithExplicitFieldNames.class); + + // then + assertThat(object.firstField()).isEqualTo(1); + assertThat(object.secondField()).isEqualTo(2); + assertThat(object.thirdField()).isEqualTo(3); + assertThat(object.fourthField()).isEqualTo(4); + assertThat(object.fifthField()).isEqualTo(5); + } + + @Test + public void classWithExplicitFieldNames() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"ąćśńźż\": 1, \"\\u20A9\\u0E3F\": 2, \"αβγ\": 3, \"😀abc😀\": 4, \"fifth_field\": 5}"); + + // when + StaticClassWithExplicitFieldNames object = parser.parse(json, json.length, StaticClassWithExplicitFieldNames.class); + + // then + assertThat(object.getFirstField()).isEqualTo(1); + assertThat(object.getSecondField()).isEqualTo(2); + assertThat(object.getThirdField()).isEqualTo(3); + assertThat(object.getFourthField()).isEqualTo(4); + assertThat(object.getFifthField()).isEqualTo(5); + } + + @Test + public void recordWithImplicitAndExplicitFieldNames() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"implicitField\": \"abc\", \"explicit_field\": \"def\"}"); + + // when + RecordWithImplicitAndExplicitFieldNames object = parser.parse(json, json.length, RecordWithImplicitAndExplicitFieldNames.class); + + // then + assertThat(object.implicitField()).isEqualTo("abc"); + assertThat(object.explicitField()).isEqualTo("def"); + } + + @ParameterizedTest + @ValueSource(classes = {StaticClassWithImplicitAndExplicitFieldNames.class, StaticClassWithImplicitFieldNames.class}) + public void classWithImplicitFieldNames(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"firstField\": \"abc\", \"second_field\": \"def\"}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("Some of " + expectedType.getName() + "'s constructor arguments are not annotated with @JsonFieldName."); + } + + @Test + public void nonStaticInnerClassesAreUnsupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"abc\"}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, NonStaticInnerClass.class) + ); + + // then + assertThat(ex) + .hasMessage("Unsupported class: " + NonStaticInnerClass.class.getName() + ". Inner non-static classes are not supported."); + } + + @Test + public void fieldNamesWithEscapes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"\\\"abc\\\\\": 1}"); + + // when + RecordWithEscapedFieldName jsonValue = parser.parse(json, json.length, RecordWithEscapedFieldName.class); + + // then + assertThat(jsonValue.firstField()).isEqualTo(1); + } + + @Test + public void fieldExistsInJsonButDoesNotExistInRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"first\": 1, \"field\": 2, \"second\": 3}"); + + // when + RecordWithIntegerField jsonValue = parser.parse(json, json.length, RecordWithIntegerField.class); + + // then + assertThat(jsonValue.field()).isEqualTo(2); + } + + @Test + public void fieldDoesNotExistInJsonButExistsInRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"first\": 1, \"second\": 3}"); + + // when + RecordWithIntegerField jsonValue = parser.parse(json, json.length, RecordWithIntegerField.class); + + // then + assertThat(jsonValue.field()).isNull(); + } + + @Test + public void primitiveFieldDoesNotExistInJsonButExistsInRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"first\": 1, \"second\": 3}"); + + // when + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveIntegerField.class) + ); + + // then + assertThat(ex) + .hasCauseExactlyInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(classes = {NestedRecordWithStringField.class, NestedStaticClassWithStringField.class}) + public void objectWithEmptyObjectField(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": {}}"); + + // when + Object object = parser.parse(json, json.length, expectedType); + + // then + assertThat(object).isNotNull(); + assertThat(object).hasNoNullFieldsOrProperties(); + assertThat(object).extracting("nestedField").hasFieldOrPropertyWithValue("field", null); + } + + @Test + public void objectWithObjectFieldToRecord() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": {\"field\": \"abc\"}}"); + + // when + NestedRecordWithStringField object = parser.parse(json, json.length, NestedRecordWithStringField.class); + + // then + assertThat(object).isNotNull(); + assertThat(object.nestedField()).isNotNull(); + assertThat(object.nestedField().field()).isEqualTo("abc"); + } + + @Test + public void mismatchedTypeAtRootWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"{}\""); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '{' but got: '\"'."); + } + + @Test + public void mismatchedTypeAtObjectFieldWhenObjectIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"nestedField\": true}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, NestedRecordWithStringField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '{' but got: 't'."); + } + + @Test + public void invalidButParsableJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1, : 2}"); + + // when + RecordWithIntegerField object = parser.parse(json, json.length, RecordWithIntegerField.class); + + // then + assertThat(object.field()).isEqualTo(1); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"invalid\", \"field\": 1}", value = "Expected ':' but got: ','."), + @MapEntry(stringKey = "{\"field\": 1, \"invalid\"}", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void fieldWithoutValue(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{\"invalid\" 2, \"field\": 1}", value = "Expected ':' but got: '2'."), + @MapEntry(stringKey = "{\"field\": 1, \"invalid\" 2}", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void missingColon(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(strings = { + "{\"invalid\": 2 \"field\": 1}", + "{\"field\": 1 \"invalid\" 2}", + }) + public void missingComma(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected ',' but got: '\"'."); + } + + @Test + public void fieldWithoutName() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{: 2, \"field\": 1}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '\"' but got: ':'."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\null", "1", "true", "false", "[]", "{}"}) + public void invalidTypeOfFieldName(String fieldName) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{" + fieldName + ": 1}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Expected '\"' but got: '" + fieldName.charAt(0) + "'."); + } + + @ParameterizedTest + @ValueSource(strings = {"{\"field\": 1", "{\"field\":", "{\"field\"", "{", "{\"ignore\": {\"field\": 1", "{\"field\": 1,",}) + public void unclosedObject(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex).hasMessage("Unclosed object. Missing '}' for starting '{'."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWithIntegerField[].class, nulls = true) + public void arrayOfObjectsAtRoot(String jsonStr, RecordWithIntegerField[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + RecordWithIntegerField[] array = parser.parse(json, json.length, RecordWithIntegerField[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWithIntegerField[].class, nulls = true) + public void arrayOfObjectsAtObjectField(String jsonStr, RecordWithIntegerField[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + ArrayOfRecordsWithIntegerField object = parser.parse(json, json.length, ArrayOfRecordsWithIntegerField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = RecordWithIntegerField[].class, nulls = true) + public void listOfObjectsAtObjectField(String jsonStr, RecordWithIntegerField[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + ListOfRecordsWithIntegerField object = parser.parse(json, json.length, ListOfRecordsWithIntegerField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(stringKey = "{},", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "{\"field\": 1},", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "{\"field\": 1}[]", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "{\"field\": 1}{}", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!"), + @MapEntry(stringKey = "{\"field\": 1}1", value = "Unclosed object. Missing '}' for starting '{'."), + @MapEntry(stringKey = "null,", value = "More than one JSON value at the root of the document, or extra characters at the end of the JSON!") + }) + public void moreValuesThanOneObjectAtRoot(String jsonStr, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, RecordWithIntegerField.class)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void classWithMultipleConstructors() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1, \"field2\": 2}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, ClassWithMultipleConstructors.class) + ); + + // then + assertThat(ex) + .hasMessage("Class: " + ClassWithMultipleConstructors.class.getName() + " has more than one constructor."); + } + + @Test + public void recordWithMultipleConstructors() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1, \"field2\": 2}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithMultipleConstructors.class) + ); + + // then + assertThat(ex) + .hasMessage("Class: " + RecordWithMultipleConstructors.class.getName() + " has more than one constructor."); + } + + @Test + public void missingObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + NestedRecordWithStringField object = parser.parse(json, json.length, NestedRecordWithStringField.class); + + // then + assertThat(object.nestedField()).isNull(); + } + + @Test + public void objectInstantiationFailure() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": 1}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, ClassWithFailingConstructor.class) + ); + + // then + assertThat(ex) + .hasMessage("Failed to construct an instance of " + ClassWithFailingConstructor.class.getName()) + .hasCauseExactlyInstanceOf(InvocationTargetException.class); + } + + @Test + public void emptyJson() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @Test + public void passedLengthSmallerThanNullLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, 3, RecordWithIntegerField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @Test + public void genericClassesOtherThanListAreNotSupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": {\"field\": 123}}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithGenericField.class) + ); + + // then + assertThat(ex) + .hasMessage("Parametrized types other than java.util.List are not supported."); + } + + @Test + public void listsWithoutElementTypeAreNotSupported() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [1, 2, 3]}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithListWithoutElementType.class) + ); + + // then + assertThat(ex) + .hasMessage("Undefined list element type."); + } + + @Test + public void issue50() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"name\": \"John\", \"age\": 30, \"aaa\": 1, \"bbb\": 2, \"ccc\": 3}"); + + // when + Issue50 object = parser.parse(json, json.length, Issue50.class); + + // then + assertThat(object.aaa()).isEqualTo(1); + assertThat(object.bbb()).isEqualTo(2); + assertThat(object.ccc()).isEqualTo(3); + } + + private record Issue50(long aaa, long bbb, long ccc) { + + } + + private record RecordWithExplicitFieldNames(@JsonFieldName("ąćśńźż") long firstField, + @JsonFieldName("\u20A9\u0E3F") long secondField, + @JsonFieldName("αβγ") long thirdField, + @JsonFieldName("😀abc😀") long fourthField, + @JsonFieldName("fifth_field") long fifthField) { + } + + private static class StaticClassWithExplicitFieldNames { + + private final long firstField; + private final long secondField; + private final long thirdField; + private final long fourthField; + private final long fifthField; + + private StaticClassWithExplicitFieldNames(@JsonFieldName("ąćśńźż") long firstField, + @JsonFieldName("\u20A9\u0E3F") long secondField, + @JsonFieldName("αβγ") long thirdField, + @JsonFieldName("😀abc😀") long fourthField, + @JsonFieldName("fifth_field") long fifthField) { + this.firstField = firstField; + this.secondField = secondField; + this.thirdField = thirdField; + this.fourthField = fourthField; + this.fifthField = fifthField; + } + + public long getFirstField() { + return firstField; + } + + public long getSecondField() { + return secondField; + } + + public long getThirdField() { + return thirdField; + } + + public long getFourthField() { + return fourthField; + } + + public long getFifthField() { + return fifthField; + } + } + + private record RecordWithImplicitAndExplicitFieldNames(String implicitField, + @JsonFieldName("explicit_field") String explicitField) { + } + + private static class StaticClassWithImplicitAndExplicitFieldNames { + + private final String firstField; + private final String secondField; + + StaticClassWithImplicitAndExplicitFieldNames(String firstField, @JsonFieldName("second_field") String secondField) { + this.firstField = firstField; + this.secondField = secondField; + } + + String getFirstField() { + return firstField; + } + + String getSecondField() { + return secondField; + } + } + + private static class StaticClassWithImplicitFieldNames { + + private final String firstField; + private final String secondField; + + StaticClassWithImplicitFieldNames(String firstField, String secondField) { + this.firstField = firstField; + this.secondField = secondField; + } + + String getFirstField() { + return firstField; + } + + String getSecondField() { + return secondField; + } + } + + private record RecordWithEscapedFieldName(@JsonFieldName("\"abc\\") long firstField) { + } + + private record NestedRecordWithStringField(RecordWithStringField nestedField) { + + } + + private static class NestedStaticClassWithStringField { + + private final ClassWithStringField nestedField; + + NestedStaticClassWithStringField(@JsonFieldName("nestedField") ClassWithStringField nestedField) { + this.nestedField = nestedField; + } + + ClassWithStringField getNestedField() { + return nestedField; + } + } + + private record ArrayOfRecordsWithIntegerField(RecordWithIntegerField[] field) { + + } + + private record ListOfRecordsWithIntegerField(List field) { + + } + + private static class ClassWithMultipleConstructors { + + private final int field; + private final int field2; + + ClassWithMultipleConstructors(@JsonFieldName("field") int field) { + this.field = field; + this.field2 = 0; + } + + ClassWithMultipleConstructors(@JsonFieldName("field") int field, @JsonFieldName("field2") int field2) { + this.field = field; + this.field2 = field2; + } + } + + private record RecordWithMultipleConstructors(int field, int field2) { + + RecordWithMultipleConstructors(int field) { + this(field, 0); + } + } + + private static class ClassWithFailingConstructor { + + ClassWithFailingConstructor(@JsonFieldName("field") int field) { + throw new RuntimeException(); + } + } + + private class NonStaticInnerClass { + + private final String field; + + NonStaticInnerClass(@JsonFieldName("field") String field) { + this.field = field; + } + + String getField() { + return field; + } + } + + private record RecordWithGenericField(GenericRecord field) { + + } + + private record GenericRecord(T field) { + + } + + private record RecordWithListWithoutElementType(List field) { + + } + + private static class ClassWithoutExplicitConstructor { + + } +} diff --git a/src/test/java/org/simdjson/SimdJsonParserTest.java b/src/test/java/org/simdjson/SimdJsonParserTest.java deleted file mode 100644 index 3c16218..0000000 --- a/src/test/java/org/simdjson/SimdJsonParserTest.java +++ /dev/null @@ -1,323 +0,0 @@ -package org.simdjson; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -import java.io.IOException; -import java.util.Iterator; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.fail; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.simdjson.JsonValueAssert.assertThat; -import static org.simdjson.TestUtils.loadTestFile; -import static org.simdjson.TestUtils.toUtf8; - -public class SimdJsonParserTest { - - @Test - public void testEmptyArray() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - while (it.hasNext()) { - fail("Unexpected value"); - it.next(); - } - } - - @Test - public void testArrayIterator() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[1, 2, 3]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - int[] expectedValues = new int[]{1, 2, 3}; - int counter = 0; - Iterator it = jsonValue.arrayIterator(); - while (it.hasNext()) { - JsonValue element = it.next(); - assertThat(element.isLong()).isTrue(); - assertThat(element.asLong()).isEqualTo(expectedValues[counter]); - counter++; - } - assertThat(counter).isEqualTo(expectedValues.length); - } - - @Test - public void testBooleanValues() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[true, false]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - assertThat(it.next()).isEqualTo(true); - assertThat(it.next()).isEqualTo(false); - assertThat(it.hasNext()).isFalse(); - } - - @ParameterizedTest - @ValueSource(booleans = {true, false}) - public void testBooleanValuesAsRoot(boolean booleanVal) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(Boolean.toString(booleanVal)); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(booleanVal); - } - - @Test - public void testNullValue() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[null]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - JsonValue element = it.next(); - assertThat(element.isNull()).isTrue(); - assertThat(it.hasNext()).isFalse(); - } - - @Test - public void testNullValueAsRoot() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("null"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isNull()).isTrue(); - } - - @Test - public void testStringValues() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[\"abc\", \"ab\\\\c\"]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - assertThat(it.next()).isEqualTo("abc"); - assertThat(it.next()).isEqualTo("ab\\c"); - assertThat(it.hasNext()).isFalse(); - } - - @ParameterizedTest - @ValueSource(strings = {"abc", "ą"}) - public void testStringValuesAsRoot(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("\"" + jsonStr + "\""); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(jsonStr); - } - - @Test - public void testNumericValues() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[0, 1, -1, 1.1]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - Iterator it = jsonValue.arrayIterator(); - assertThat(it.hasNext()).isTrue(); - assertThat(it.next()).isEqualTo(0); - assertThat(it.next()).isEqualTo(1); - assertThat(it.next()).isEqualTo(-1); - assertThat(it.next()).isEqualTo(1.1); - assertThat(it.hasNext()).isFalse(); - } - - @ParameterizedTest - @ValueSource(strings = {"0", "1", "-1"}) - public void testLongValuesAsRoot(String longStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(longStr); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(Long.parseLong(longStr)); - } - - @ParameterizedTest - @ValueSource(strings = {"1.1", "-1.1", "1e1", "1E1", "-1e1", "-1E1", "1e-1", "1E-1", "1.1e1", "1.1E1"}) - public void testDoubleValuesAsRoot(String doubleStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(doubleStr); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue).isEqualTo(Double.parseDouble(doubleStr)); - } - - @ParameterizedTest - @ValueSource(strings = {"true,", "false,", "null,", "1,", "\"abc\",", "1.1,"}) - public void testInvalidPrimitivesAsRoot(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()) - .isEqualTo("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - } - - @ParameterizedTest - @ValueSource(strings = {"[n]", "{\"a\":n}"}) - public void testInvalidNull(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()).isEqualTo("Invalid value starting at " + jsonStr.indexOf('n') + ". Expected 'null'."); - } - - @ParameterizedTest - @ValueSource(strings = {"[f]", "{\"a\":f}"}) - public void testInvalidFalse(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()).isEqualTo("Invalid value starting at " + jsonStr.indexOf('f') + ". Expected 'false'."); - } - - @ParameterizedTest - @ValueSource(strings = {"[t]", "{\"a\":t}"}) - public void testInvalidTrue(String jsonStr) { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8(jsonStr); - - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - - // then - assertThat(ex.getMessage()).isEqualTo("Invalid value starting at " + jsonStr.indexOf('t') + ". Expected 'true'."); - } - - @Test - public void testArraySize() { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[1, 2, 3]"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - assertThat(jsonValue.getSize()).isEqualTo(3); - } - - @Test - public void testLargeArraySize() { - // given - SimdJsonParser parser = new SimdJsonParser(); - int realArraySize = 0xFFFFFF + 1; - byte[] json = new byte[realArraySize * 2 - 1 + 2]; - json[0] = '['; - int i = 0; - while (i < realArraySize) { - json[i * 2 + 1] = (byte) '0'; - json[i * 2 + 2] = (byte) ','; - i++; - } - json[json.length - 1] = ']'; - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isArray()).isTrue(); - assertThat(jsonValue.getSize()).isEqualTo(0xFFFFFF); - } - - @Test - public void issue26DeepBench() throws IOException { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = loadTestFile("/deep_bench.json"); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isObject()).isTrue(); - } - - @ParameterizedTest - @ValueSource(strings = {"/wide_bench.json", "/deep_bench.json"}) - public void issue26(String file) throws IOException { - // given - SimdJsonParser parser = new SimdJsonParser(); - byte[] json = loadTestFile(file); - - // when - JsonValue jsonValue = parser.parse(json, json.length); - - // then - assertThat(jsonValue.isObject()).isTrue(); - } -} diff --git a/src/test/java/org/simdjson/StringParsingTest.java b/src/test/java/org/simdjson/StringParsingTest.java index a61572e..5d80fa1 100644 --- a/src/test/java/org/simdjson/StringParsingTest.java +++ b/src/test/java/org/simdjson/StringParsingTest.java @@ -3,40 +3,69 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.testutils.RandomStringSource; +import org.simdjson.testutils.StringTestData; +import java.io.IOException; import java.util.Iterator; import java.util.List; -import static java.lang.Character.MAX_CODE_POINT; -import static java.lang.Character.isBmpCodePoint; -import static java.lang.Character.lowSurrogate; -import static java.util.stream.IntStream.rangeClosed; import static org.apache.commons.text.StringEscapeUtils.unescapeJava; -import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.simdjson.JsonValueAssert.assertThat; -import static org.simdjson.TestUtils.toUtf8; +import static org.junit.jupiter.api.Assertions.fail; +import static org.simdjson.testutils.SimdJsonAssertions.assertThat; +import static org.simdjson.testutils.TestUtils.loadTestFile; +import static org.simdjson.testutils.TestUtils.toUtf8; public class StringParsingTest { + @ParameterizedTest + @RandomStringSource + public void stringAtRoot(String jsonStr, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\",", "\"abc\"def"}) + public void moreValuesThanOneStringAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + @Test public void usableUnicodeCharacters() { // given SimdJsonParser parser = new SimdJsonParser(); - List unicodeCharacters = rangeClosed(0, MAX_CODE_POINT) - .filter(Character::isDefined) - .filter(codePoint -> !isReservedCodePoint(codePoint)) - .mapToObj(StringParsingTest::toUnicodeEscape) - .toList(); + List characters = StringTestData.usableEscapedUnicodeCharacters(); - for (String input : unicodeCharacters) { - byte[] json = toUtf8("\"" + input + "\""); + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); - // when - JsonValue value = parser.parse(json, json.length); + // when + JsonValue value = parser.parse(json, json.length); - // then - assertThat(value).isEqualTo(unescapeJava(input)); + // then + assertThat(value).isEqualTo(unescapeJava(character)); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } @@ -44,33 +73,37 @@ public void usableUnicodeCharacters() { public void unicodeCharactersReservedForLowSurrogate() { // given SimdJsonParser parser = new SimdJsonParser(); - List unicodeCharacters = rangeClosed(0xDC00, 0xDFFF) - .mapToObj(StringParsingTest::toUnicodeEscape) - .toList(); + List unicodeCharacters = StringTestData.escapedLowSurrogates(); - for (String input : unicodeCharacters) { - byte[] json = toUtf8("\"" + input + "\""); + for (String character : unicodeCharacters) { + try { + byte[] json = toUtf8("\"" + character + "\""); - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - // then - assertThat(ex.getMessage()).isEqualTo("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); + // then + assertThat(ex) + .hasMessage("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } @ParameterizedTest - @ValueSource(strings = {"\\uD8001", "\\uD800\\1", "\\uD800u", "\\uD800\\e", "\\uD800\\DC00"}) - public void invalidLowSurrogateEscape(String input) { + @ValueSource(strings = {"\\uD8001", "\\uD800\\1", "\\uD800u", "\\uD800\\e", "\\uD800\\DC00", "\\uD800"}) + public void invalidLowSurrogateEscape(String invalidCharacter) { // given SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("\"" + input + "\""); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); // when JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Low surrogate should start with '\\u'"); + assertThat(ex) + .hasMessage("Low surrogate should start with '\\u'"); } @ParameterizedTest @@ -84,55 +117,60 @@ public void missingLowSurrogate(String input) { JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); } @Test public void invalidLowSurrogateRange() { // given SimdJsonParser parser = new SimdJsonParser(); - List unicodeCharacters = rangeClosed(0x0000, 0xFFFF) - .filter(lowSurrogate -> lowSurrogate < 0xDC00 || lowSurrogate > 0xDFFF) - .mapToObj(lowSurrogate -> String.format("\\uD800\\u%04X", lowSurrogate)) - .toList(); + List unicodeCharacters = StringTestData.escapedUnicodeCharactersWithInvalidLowSurrogate(); - for (String input : unicodeCharacters) { - byte[] json = toUtf8("\"" + input + "\""); + for (String character : unicodeCharacters) { + try { + byte[] json = toUtf8("\"" + character + "\""); - // when - JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); - // then - assertThat(ex.getMessage()).isEqualTo("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + // then + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } @ParameterizedTest @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) - public void invalidUnicode(String input) { + public void invalidUnicode(String invalidCharacter) { // given SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("\"" + input + "\""); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); // when JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).isEqualTo("Invalid unicode escape sequence."); + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); } @ParameterizedTest @ValueSource(strings = {"\\g", "\\ą"}) - public void invalidEscape(String jsonStr) { + public void invalidEscape(String invalidCharacter) { // given SimdJsonParser parser = new SimdJsonParser(); - byte[] json = toUtf8("[\"" + jsonStr + "\"]"); + byte[] json = toUtf8("[\"" + invalidCharacter + "\"]"); // when JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); // then - assertThat(ex.getMessage()).startsWith("Escaped unexpected character: "); + assertThat(ex) + .hasMessageStartingWith("Escaped unexpected character: "); } @Test @@ -152,16 +190,86 @@ public void longString() { assertThat(it.hasNext()).isFalse(); } - private static String toUnicodeEscape(int codePoint) { - if (isBmpCodePoint(codePoint)) { - return String.format("\\u%04X", codePoint); - } else { - return String.format("\\u%04X\\u%04X", - (int) Character.highSurrogate(codePoint), (int) lowSurrogate(codePoint)); + @ParameterizedTest + @ValueSource(strings = {"/wide_bench.json", "/deep_bench.json"}) + public void issue26(String file) throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = loadTestFile(file); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isObject()).isTrue(); + } + + @Test + public void unescapedControlCharacterAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } } } - private static boolean isReservedCodePoint(int codePoint) { - return codePoint >= 0xD800 && codePoint <= 0xDFFF; + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsString(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void arrayOfStrings() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"abc\", \"ab\\\\c\"]"); + + // when + JsonValue jsonValue = parser.parse(json, json.length); + + // then + assertThat(jsonValue.isArray()).isTrue(); + Iterator it = jsonValue.arrayIterator(); + assertThat(it.hasNext()).isTrue(); + assertThat(it.next()).isEqualTo("abc"); + assertThat(it.next()).isEqualTo("ab\\c"); + assertThat(it.hasNext()).isFalse(); + } + + @Test + public void passedLengthSmallerThanStringLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"aaaaa\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 6)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); } } diff --git a/src/test/java/org/simdjson/StringSchemaBasedParsingTest.java b/src/test/java/org/simdjson/StringSchemaBasedParsingTest.java new file mode 100644 index 0000000..7c771cd --- /dev/null +++ b/src/test/java/org/simdjson/StringSchemaBasedParsingTest.java @@ -0,0 +1,1356 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.simdjson.schemas.RecordWithBooleanField; +import org.simdjson.schemas.RecordWithCharacterArrayField; +import org.simdjson.schemas.RecordWithCharacterField; +import org.simdjson.schemas.RecordWithCharacterListField; +import org.simdjson.schemas.RecordWithIntegerField; +import org.simdjson.schemas.RecordWithPrimitiveBooleanField; +import org.simdjson.schemas.RecordWithPrimitiveCharacterArrayField; +import org.simdjson.schemas.RecordWithPrimitiveCharacterField; +import org.simdjson.schemas.RecordWithPrimitiveIntegerField; +import org.simdjson.schemas.RecordWithStringArrayField; +import org.simdjson.schemas.RecordWithStringField; +import org.simdjson.schemas.RecordWithStringListField; +import org.simdjson.testutils.MapEntry; +import org.simdjson.testutils.MapSource; +import org.simdjson.testutils.RandomStringSource; +import org.simdjson.testutils.SchemaBasedRandomValueSource; +import org.simdjson.testutils.StringTestData; + +import java.util.List; + +import static org.apache.commons.text.StringEscapeUtils.unescapeJava; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.fail; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class StringSchemaBasedParsingTest { + + @Test + public void emptyStringAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"\""); + + // when + String string = parser.parse(json, json.length, String.class); + + // then + assertThat(string).isEqualTo(""); + } + + @ParameterizedTest + @RandomStringSource + public void stringAtRoot(String jsonStr, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + String string = parser.parse(json, json.length, String.class); + + // then + assertThat(string).isEqualTo(expected); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanStringAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected either string or 'null'."); + } + + @Test + public void nullAtRootWhenStringIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + String string = parser.parse(json, json.length, String.class); + + // then + assertThat(string).isNull(); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = Integer.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = char.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = Character.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void mismatchedTypeForStringAsRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"abc\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @ValueSource(strings = {"\"abc\",", "\"abc\"def"}) + public void moreValuesThanOneStringAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @Test + public void emptyStringAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"\"}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isEqualTo(""); + } + + @ParameterizedTest + @RandomStringSource + public void stringAtObjectField(String jsonStr, String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"" + jsonStr + "\"}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isEqualTo(expected); + } + + @Test + public void nullAtObjectFieldWhenStringIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isNull(); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanStringAtObjectField(String value) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + value + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithStringField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected either string or 'null'."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = RecordWithPrimitiveCharacterField.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = RecordWithCharacterField.class, value = "String cannot be deserialized to a char. Expected a single-character string."), + @MapEntry(classKey = RecordWithPrimitiveIntegerField.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = RecordWithBooleanField.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'.") + }) + public void mismatchedTypeForStringAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"abc\"}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void usableUnicodeCharactersAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableEscapedUnicodeCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + String value = parser.parse(json, json.length, String.class); + + // then + assertThat(value).isEqualTo(unescapeJava(character)); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @Test + public void unicodeCharactersReservedForLowSurrogateAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List codePoints = StringTestData.escapedLowSurrogates(); + + for (String codePoint : codePoints) { + try { + byte[] json = toUtf8("\"" + codePoint + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. The range U+DC00–U+DFFF is reserved for low surrogate."); + } catch (Throwable e) { + fail("Failed for code point: " + codePoint, e); + } + } + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void characterAtRoot(String jsonStr, Character expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character) + .isEqualTo(expected); + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void primitiveCharAtRoot(String jsonStr, char expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + jsonStr + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character) + .isEqualTo(expected); + } + + @Test + public void nullAtRootWhenCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isNull(); + } + + @Test + public void nullAtRootWhenPrimitiveCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected either string or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanPrimitiveCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected string."); + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void characterAtObjectField(String jsonStr, Character expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"" + jsonStr + "\"}"); + + // when + RecordWithCharacterField object = parser.parse(json, json.length, RecordWithCharacterField.class); + + // then + assertThat(object.field()) + .isEqualTo(expected); + } + + @Test + public void nullAtObjectFieldWhenCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + RecordWithCharacterField object = parser.parse(json, json.length, RecordWithCharacterField.class); + + // then + assertThat(object.field()).isNull(); + } + + @ParameterizedTest + @RandomStringSource(maxChars = 1) + public void primitiveCharAtObjectField(String jsonStr, char expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": \"" + jsonStr + "\"}"); + + // when + RecordWithPrimitiveCharacterField object = parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class); + + // then + assertThat(object.field()) + .isEqualTo(expected); + } + + @Test + public void nullAtObjectFieldWhenPrimitiveCharacterIsExpected() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": null}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanCharacterAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected either string or 'null'."); + } + + @ParameterizedTest + @ValueSource(strings = {"true", "false", "1"}) + public void typeOtherThanPrimitiveCharacterAtObjectField(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"a\"", "\"a"}) + public void missingQuotationMarkForCharacter(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @ValueSource(strings = {"a\"", "\"a"}) + public void missingQuotationMarkForPrimitiveCharacter(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void missingQuotationMarksForCharacterAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("a"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected either string or 'null'."); + } + + @Test + public void missingQuotationMarksForPrimitiveCharacterAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("a"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected string."); + } + + @Test + public void missingQuotationMarksForCharacterAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": a}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected either string or 'null'."); + } + + @Test + public void missingQuotationMarksForPrimitiveCharacterAtObjectField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": a}"); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 10. Expected string."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"ab\"", "\"\\u0024\\u0023\""}) + public void stringLongerThanOneCharacterWhenCharacterIsExpected(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single-character string."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"ab\"", "\"\\u0024\\u0023\""}) + public void stringLongerThanOneCharacterWhenPrimitiveCharacterIsExpected(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single-character string."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\\"", "\\\\", "\\/", "\\b", "\\f", "\\n", "\\r", "\\t"}) + public void twoCharacterEscapeSequenceAsPrimitiveCharacter(String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + expected + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } + + @ParameterizedTest + @ValueSource(strings = {"\\\"", "\\\\", "\\/", "\\b", "\\f", "\\n", "\\r", "\\t"}) + public void twoCharacterEscapeSequenceAsCharacter(String expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + expected + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } + + @ParameterizedTest + @ValueSource(classes = {Character.class, char.class}) + public void restrictedEscapedSingleCodeUnit(Class expectedClass) { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.reservedEscapedSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedClass)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. Should be within the range U+0000–U+D777 or U+E000–U+FFFF."); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableEscapedSingleCodeUnitAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableEscapedSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableEscapedSingleCodeUnitAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableEscapedSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isEqualTo(unescapeJava(expected).charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableSingleCodeUnitAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + Character character = parser.parse(json, json.length, Character.class); + + // then + assertThat(character).isEqualTo(expected.charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableSingleCodeUnitAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableSingleCodeUnitCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + char character = parser.parse(json, json.length, char.class); + + // then + assertThat(character).isEqualTo(expected.charAt(0)); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableTwoCodeUnitsAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableTwoCodeUnitsCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single 16-bit code unit character."); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @Test + public void usableTwoCodeUnitsAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.usableTwoCodeUnitsCharacters(); + + for (String expected : characters) { + try { + byte[] json = toUtf8("\"" + expected + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("String cannot be deserialized to a char. Expected a single 16-bit code unit character."); + } catch (Throwable e) { + fail("Failed for character: " + expected, e); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"\"a\",", "\"a\"b"}) + public void moreValuesThanOnePrimitiveCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"\"a\",", "\"a\"b"}) + public void moreValuesThanOneCharacterAtRoot(String jsonStr) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + } + + @ParameterizedTest + @ValueSource(strings = {"\\uD8001", "\\uD800\\1", "\\uD800u", "\\uD800\\e", "\\uD800\\DC00", "\\uD800"}) + public void invalidLowSurrogateEscape(String input) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + input + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Low surrogate should start with '\\u'"); + } + + @ParameterizedTest + @ValueSource(strings = {"\\uD800\\u"}) + public void missingLowSurrogate(String input) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + input + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + } + + @Test + public void invalidLowSurrogateRange() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List unicodeCharacters = StringTestData.escapedUnicodeCharactersWithInvalidLowSurrogate(); + + for (String character : unicodeCharacters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid code point. Low surrogate should be in the range U+DC00–U+DFFF."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) + public void invalidUnicodeAsString(String invalidCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) + public void invalidUnicodeAsPrimitiveCharacter(String invalidCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\u", "\\u1", "\\u12", "\\u123"}) + public void invalidUnicodeAsCharacter(String invalidCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + invalidCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Invalid unicode escape sequence."); + } + + @ParameterizedTest + @ValueSource(strings = {"\\g", "\\ą"}) + public void invalidEscapeAsString(String escapedCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + escapedCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex).hasMessageStartingWith("Escaped unexpected character: "); + } + + @ParameterizedTest + @ValueSource(strings = {"\\g", "\\ą"}) + public void invalidEscapeAsPrimitiveCharacter(String escapedCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + escapedCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Escaped unexpected character: "); + } + + @ParameterizedTest + @ValueSource(strings = {"\\g", "\\ą"}) + public void invalidEscapeAsCharacter(String escapedCharacter) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + escapedCharacter + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Escaped unexpected character: "); + } + + @Test + public void unescapedControlCharacterAsString() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @Test + public void unescapedControlCharacterAsPrimitiveCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @Test + public void unescapedControlCharacterAsCharacter() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List characters = StringTestData.unescapedControlCharacters(); + + for (String character : characters) { + try { + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessage("Unescaped characters. Within strings, there are characters that should be escaped."); + } catch (Throwable e) { + fail("Failed for character: " + character, e); + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsString(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsPrimitiveCharacter(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @ValueSource(strings = {"\"", "\\"}) + public void unescapedSpecialStringCharacterAsCharacter(String character) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"" + character + "\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, Character.class)); + + // then + assertThat(ex) + .hasMessageStartingWith("Unclosed string. A string is opened, but never closed."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = false) + public void arrayOfStringsAtRoot(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + String[] array = parser.parse(json, json.length, String[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = true) + public void arrayOfStringsAndNullsAtRoot(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + String[] array = parser.parse(json, json.length, String[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @Test + public void arrayOfStringsMixedWithOtherTypesAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"abc\", \"ab\\\\c\", 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, String[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 17. Expected either string or 'null'."); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = false) + public void objectWithArrayOfStrings(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithStringArrayField object = parser.parse(json, json.length, RecordWithStringArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = false) + public void objectWithListOfStrings(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithStringListField object = parser.parse(json, json.length, RecordWithStringListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = String[].class, nulls = true) + public void objectWithListOfStringsAndNulls(String jsonStr, String[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithStringListField object = parser.parse(json, json.length, RecordWithStringListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = false) + public void arrayOfCharactersAtRoot(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Character[] array = parser.parse(json, json.length, Character[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = char[].class, nulls = false) + public void arrayOfPrimitiveCharactersAtRoot(String jsonStr, char[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + char[] array = parser.parse(json, json.length, char[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = true) + public void arrayOfCharsAndNullsAtRoot(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(jsonStr); + + // when + Character[] array = parser.parse(json, json.length, Character[].class); + + // then + assertThat(array).containsExactly(expected); + } + + @Test + public void arrayOfPrimitiveCharactersAndNullsAtRoot() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"a\", \"b\", null]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, char[].class)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 11. Expected string."); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = char[].class, value = "Invalid value starting at 11. Expected string."), + @MapEntry(classKey = Character[].class, value = "Invalid value starting at 11. Expected either string or 'null'.") + }) + public void arrayOfCharactersMixedWithOtherTypesAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"a\", \"b\", 1]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = false) + public void objectWithArrayOfCharacters(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithCharacterArrayField object = parser.parse(json, json.length, RecordWithCharacterArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = char[].class, nulls = false) + public void objectWithArrayOfPrimitiveCharacters(String jsonStr, char[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithPrimitiveCharacterArrayField object = parser.parse(json, json.length, RecordWithPrimitiveCharacterArrayField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = false) + public void objectWithListOfCharacters(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithCharacterListField object = parser.parse(json, json.length, RecordWithCharacterListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @SchemaBasedRandomValueSource(schemas = Character[].class, nulls = true) + public void objectWithListOfCharactersAndNulls(String jsonStr, Character[] expected) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": " + jsonStr + "}"); + + // when + RecordWithCharacterListField object = parser.parse(json, json.length, RecordWithCharacterListField.class); + + // then + assertThat(object.field()).containsExactly(expected); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = int[].class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = int.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = boolean.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = Boolean.class, value = "Unrecognized boolean value. Expected: 'true', 'false' or 'null'."), + @MapEntry(classKey = String[][].class, value = "Expected '[' but got: '\"'.") + }) + public void mismatchedTypeForArrayOfStringsAtRoot(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("[\"abc\", \"ab\\\\c\"]"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @ParameterizedTest + @MapSource({ + @MapEntry(classKey = int[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String.class, value = "Invalid value starting at 0. Expected either string or 'null'."), + @MapEntry(classKey = RecordWithIntegerField.class, value = "Invalid number. Minus has to be followed by a digit."), + @MapEntry(classKey = RecordWithPrimitiveBooleanField.class, value = "Unrecognized boolean value. Expected: 'true' or 'false'."), + @MapEntry(classKey = RecordWithStringField.class, value = "Invalid value starting at 10. Expected either string or 'null'."), + @MapEntry(classKey = String[].class, value = "Expected '[' but got: '{'."), + @MapEntry(classKey = String[][].class, value = "Expected '[' but got: '{'.") + }) + public void mismatchedTypeForArrayOfStringsAtObjectField(Class expectedType, String errorMessage) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"field\": [\"abc\", \"ab\\\\c\"]}"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage(errorMessage); + } + + @Test + public void missingStringField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + RecordWithStringField object = parser.parse(json, json.length, RecordWithStringField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void missingCharacterField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + RecordWithCharacterField object = parser.parse(json, json.length, RecordWithCharacterField.class); + + // then + assertThat(object.field()).isNull(); + } + + @Test + public void missingPrimitiveCharacterField() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("{\"intField\": 1}"); + + // when + IllegalArgumentException ex = assertThrows( + IllegalArgumentException.class, + () -> parser.parse(json, json.length, RecordWithPrimitiveCharacterField.class) + ); + + // then + assertThat(ex.getCause()).isInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(classes = {char.class, Character.class, String.class}) + public void emptyJson(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8(""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, json.length, expectedType)); + + // then + assertThat(ex) + .hasMessage("No structural element found."); + } + + @ParameterizedTest + @ValueSource(classes = {Character.class, String.class}) + public void passedLengthSmallerThanNullLength(Class expectedType) { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("null"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, expectedType)); + + // then + assertThat(ex) + .hasMessage("Invalid value starting at 0. Expected 'null'."); + } + + @Test + public void passedLengthSmallerThanStringLength() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] json = toUtf8("\"aaaaa\""); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(json, 3, String.class)); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } +} diff --git a/src/test/java/org/simdjson/StructuralIndexerTest.java b/src/test/java/org/simdjson/StructuralIndexerTest.java new file mode 100644 index 0000000..3d65792 --- /dev/null +++ b/src/test/java/org/simdjson/StructuralIndexerTest.java @@ -0,0 +1,278 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toUtf8; + +public class StructuralIndexerTest { + + @Test + public void unquotedString() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "abc 123"; + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(4); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @Test + public void quotedString() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "\"abc 123\""; + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @Test + public void unclosedString() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "\"abc 123"; + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> indexer.index(toUtf8(input), len(input)) + ); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void quotedStringSpanningMultipleBlocks() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "abc \"a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 d0 d1 d2 d3 d4 d5 d6 d7 d8 d\" def"; + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(4); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(125); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @ParameterizedTest + @ValueSource(strings = { + "abc \\\"123", // abc \"123 + "abc \\\\\\\"123" // abc \\\"123 + }) + public void escapedQuote(String input) { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(4); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @Test + public void escapedQuoteSpanningMultipleBlocks() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "a0ba1ca2ca3ca4ca5ca6ca7ca8ca9cb0cb1cb2cb3cb4cb5cb6cb7cb8cb9cc0 \\\"def"; + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(63); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @ParameterizedTest + @ValueSource(strings = { + "abc \\\\\"123", // abc \\"123 + "abc \\\\\\\\\"123" // abc \\\\"123 + }) + public void unescapedQuote(String input) { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> indexer.index(toUtf8(input), len(input)) + ); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void unescapedQuoteSpanningMultipleBlocks() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 c0 \\\\\"abc"; + + // when + JsonParsingException ex = assertThrows( + JsonParsingException.class, + () -> indexer.index(toUtf8(input), len(input)) + ); + + // then + assertThat(ex) + .hasMessage("Unclosed string. A string is opened, but never closed."); + } + + @Test + public void operatorsClassification() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "a{bc}1:2,3[efg]aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(1); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(2); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(4); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(5); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(6); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(7); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(8); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(9); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(10); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(11); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(14); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(15); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @Test + public void controlCharactersClassification() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + byte[] input = new byte[] { + 'a', 'a', 'a', 0x1a, 'a', 0x0c, 'a', 'a', // 0x1a = , 0x0c = + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a' + }; + + // when + indexer.index(input, input.length); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(3); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(4); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(5); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(6); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @Test + public void whitespacesClassification() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + String input = "a bc\t1\n2\r3efgaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(2); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(5); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(7); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(9); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @ParameterizedTest + @ValueSource(strings = { + "aaaaaaaaaaaaaaa", // 120 bits + "aaaaaaaaaaaaaaaa", // 128 bits + "aaaaaaaaaaaaaaaaa", // 136 bits + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 248 bits + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 256 bits + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 264 bits + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 504 bits + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 512 bits + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // 520 bits + }) + public void inputLengthCloseToVectorWidth(String input) { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + + // when + indexer.index(toUtf8(input), len(input)); + + // then + assertThat(bitIndexes.isEnd()).isFalse(); + assertThat(bitIndexes.getAndAdvance()).isEqualTo(0); + assertThat(bitIndexes.isEnd()).isTrue(); + } + + @Test + public void emptyInput() { + // given + BitIndexes bitIndexes = new BitIndexes(1024); + StructuralIndexer indexer = new StructuralIndexer(bitIndexes); + + // when + indexer.index(toUtf8(""), 0); + + // then + assertThat(bitIndexes.isEnd()).isTrue(); + } + + private static int len(String input) { + return input.getBytes(UTF_8).length; + } +} diff --git a/src/test/java/org/simdjson/TestUtils.java b/src/test/java/org/simdjson/TestUtils.java deleted file mode 100644 index 8d63221..0000000 --- a/src/test/java/org/simdjson/TestUtils.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.simdjson; - -import jdk.incubator.vector.ByteVector; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; - -import static java.nio.charset.StandardCharsets.UTF_8; - -class TestUtils { - - static String padWithSpaces(String str) { - byte[] strBytes = toUtf8(str); - byte[] padded = new byte[strBytes.length + 64]; - Arrays.fill(padded, (byte) ' '); - System.arraycopy(strBytes, 0, padded, 0, strBytes.length); - return new String(padded, UTF_8); - } - - static ByteVector chunk(String str, int n) { - return ByteVector.fromArray(StructuralIndexer.BYTE_SPECIES, str.getBytes(UTF_8), n * StructuralIndexer.BYTE_SPECIES.vectorByteSize()); - } - - static byte[] toUtf8(String str) { - return str.getBytes(UTF_8); - } - - static byte[] loadTestFile(String name) throws IOException { - try (InputStream is = TestUtils.class.getResourceAsStream(name)) { - return is.readAllBytes(); - } - } -} diff --git a/src/test/java/org/simdjson/Utf8ValidationTest.java b/src/test/java/org/simdjson/Utf8ValidationTest.java new file mode 100644 index 0000000..d89734e --- /dev/null +++ b/src/test/java/org/simdjson/Utf8ValidationTest.java @@ -0,0 +1,449 @@ +package org.simdjson; + +import org.junit.jupiter.api.Test; +import org.simdjson.testutils.TestUtils; + +import java.io.IOException; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.simdjson.testutils.TestUtils.toHexString; +import static org.simdjson.testutils.Utf8TestData.randomUtf8ByteArray; +import static org.simdjson.testutils.Utf8TestData.randomUtf8ByteArrayIncluding; +import static org.simdjson.testutils.Utf8TestData.randomUtf8ByteArrayEndedWith; +import static org.simdjson.testutils.Utf8TestData.utf8Sequences; + +public class Utf8ValidationTest { + + @Test + public void valid() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArray(); + + try { + // when + parser.parse(input, input.length); + } catch (JsonParsingException ex) { + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessageNotContaining("The input is not valid UTF-8"); + } + } + + @Test + public void invalidAscii() { + // given + SimdJsonParser parser = new SimdJsonParser(); + for (int invalidAsciiByte = 128; invalidAsciiByte <= 255; invalidAsciiByte++) { + byte[] input = randomUtf8ByteArrayIncluding((byte) invalidAsciiByte); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void continuationByteWithoutPrecedingLeadingByte() { + // given + SimdJsonParser parser = new SimdJsonParser(); + for (int continuationByte = 0b10_000000; continuationByte <= 0b10_111111; continuationByte++) { + byte[] input = randomUtf8ByteArrayIncluding((byte) continuationByte); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void twoByteSequenceWithTwoContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding( + (byte) 0b110_00010, + (byte) 0b10_000000, + (byte) 0b10_000000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void twoByteSequenceWithoutContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding((byte) 0b110_00010); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void twoByteSequenceWithoutContinuationBytesAtTheEnd() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayEndedWith((byte) 0b110_00010); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void threeByteSequenceWithThreeContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding( + (byte) 0b1110_0000, + (byte) 0b10_100000, + (byte) 0b10_000000, + (byte) 0b10_000000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void threeByteSequenceWithOneContinuationByte() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding( + (byte) 0b1110_0000, + (byte) 0b10_100000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void threeByteSequenceWithoutContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding((byte) 0b1110_0000); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void threeByteSequenceWithOneContinuationByteAtTheEnd() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayEndedWith( + (byte) 0b1110_0000, + (byte) 0b10_100000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void threeByteSequenceWithoutContinuationBytesAtTheEnd() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayEndedWith((byte) 0b1110_0000); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithFourContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding( + (byte) 0b11110_000, + (byte) 0b10_010000, + (byte) 0b10_000000, + (byte) 0b10_000000, + (byte) 0b10_000000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithTwoContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding( + (byte) 0b11110_000, + (byte) 0b10_010000, + (byte) 0b10_000000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithOneContinuationByte() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding( + (byte) 0b11110_000, + (byte) 0b10_010000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithoutContinuationBytes() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayIncluding((byte) 0b11110_000); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithTwoContinuationBytesAtTheEnd() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayEndedWith( + (byte) 0b11110_000, + (byte) 0b10_010000, + (byte) 0b10_000000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithOneContinuationByteAtTheEnd() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayEndedWith( + (byte) 0b11110_000, + (byte) 0b10_010000 + ); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void fourByteSequenceWithoutContinuationBytesAtTheEnd() { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = randomUtf8ByteArrayEndedWith((byte) 0b11110_000); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for input: %s.", toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + + @Test + public void overlongTwoByteSequence() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List sequences = utf8Sequences(0x0000, 0x007F, 2); + + for (byte[] sequence : sequences) { + byte[] input = randomUtf8ByteArrayIncluding(sequence); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for sequence: %s and input: %s.", toHexString(sequence), toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void overlongThreeByteSequence() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List sequences = utf8Sequences(0x0000, 0x07FF, 3); + + for (byte[] sequence : sequences) { + byte[] input = randomUtf8ByteArrayIncluding(sequence); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for sequence: %s and input: %s.", toHexString(sequence), toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void surrogateCodePoints() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List sequences = utf8Sequences(0xD800, 0xDFFF, 3); + + for (byte[] sequence : sequences) { + byte[] input = randomUtf8ByteArrayIncluding(sequence); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for sequence: %s and input: %s.", toHexString(sequence), toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void overlongFourByteSequence() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List sequences = utf8Sequences(0x0000, 0xFFFF, 4); + + for (byte[] sequence : sequences) { + byte[] input = randomUtf8ByteArrayIncluding(sequence); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for sequence: %s and input: %s.", toHexString(sequence), toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void tooLargeFourByteSequence() { + // given + SimdJsonParser parser = new SimdJsonParser(); + List sequences = utf8Sequences(0x110000, 0x110400, 4); + + for (byte[] sequence : sequences) { + byte[] input = randomUtf8ByteArrayIncluding(sequence); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .overridingErrorMessage("Failed for sequence: %s and input: %s.", toHexString(sequence), toHexString(input)) + .hasMessage("The input is not valid UTF-8"); + } + } + + @Test + public void validTestFile() throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = TestUtils.loadTestFile("/nhkworld.json"); + + // when / then + assertThatCode(() -> parser.parse(input, input.length)).doesNotThrowAnyException(); + } + + @Test + public void invalidTestFile() throws IOException { + // given + SimdJsonParser parser = new SimdJsonParser(); + byte[] input = TestUtils.loadTestFile("/malformed.txt"); + + // when + JsonParsingException ex = assertThrows(JsonParsingException.class, () -> parser.parse(input, input.length)); + + // then + assertThat(ex) + .hasMessage("The input is not valid UTF-8"); + } +} diff --git a/src/test/java/org/simdjson/Utf8ValidatorTest.java b/src/test/java/org/simdjson/Utf8ValidatorTest.java deleted file mode 100644 index 995323b..0000000 --- a/src/test/java/org/simdjson/Utf8ValidatorTest.java +++ /dev/null @@ -1,496 +0,0 @@ -package org.simdjson; - -import jdk.incubator.vector.VectorSpecies; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -import java.io.IOException; -import java.util.Arrays; - -import static org.assertj.core.api.Assertions.*; - -class Utf8ValidatorTest { - private static final VectorSpecies VECTOR_SPECIES = StructuralIndexer.BYTE_SPECIES; - - - /* ASCII / 1 BYTE TESTS */ - - @Test - void validate_allEightBitValues_invalidAscii() { - byte[] invalidAscii = new byte[128]; - - int index = 0; - for (int eightBitVal = 255; eightBitVal >= 128; eightBitVal--) { - invalidAscii[index++] = (byte) eightBitVal; - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < 128; i += VECTOR_SPECIES.vectorByteSize()) { - byte[] vectorChunk = Arrays.copyOfRange(invalidAscii, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - - /* CONTINUATION BYTE TESTS */ - - // continuation byte is never valid without a preceding leader byte - @Test - void validate_continuationByteOutOfOrder_invalid() { - byte minContinuationByte = (byte) 0b10_000000; - byte maxContinuationByte = (byte) 0b10_111111; - byte[] inputBytes = new byte[64]; - int index = 0; - - byte continuationByte = minContinuationByte; - while (continuationByte <= maxContinuationByte) { - inputBytes[index++] = continuationByte; - continuationByte++; - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < inputBytes.length; i += VECTOR_SPECIES.length()) { - byte[] vectorChunk = Arrays.copyOfRange(inputBytes, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - @Test - void validate_extraContinuationByte_2Byte_invalid() { - byte[] inputBytes = new byte[3]; - inputBytes[0] = (byte) 0b110_00010; - inputBytes[1] = (byte) 0b10_000000; - inputBytes[2] = (byte) 0b10_000000; // two byte lead should only have one continuation byte - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationOneByteTooShort_2Byte_invalid() { - byte[] inputBytes = new byte[1]; - inputBytes[0] = (byte) 0b110_00010; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_extraContinuationByte_3Byte_invalid() { - byte[] inputBytes = new byte[4]; - inputBytes[0] = (byte) 0b1110_0000; - inputBytes[1] = (byte) 0b10_100000; - inputBytes[2] = (byte) 0b10_000000; - inputBytes[3] = (byte) 0b10_000000; // three byte lead should only have two continuation bytes - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationOneByteTooShort_3Byte_invalid() { - byte[] inputBytes = new byte[2]; - inputBytes[0] = (byte) 0b1110_0000; - inputBytes[1] = (byte) 0b10_100000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationTwoBytesTooShort_3Byte_invalid() { - byte[] inputBytes = new byte[1]; - inputBytes[0] = (byte) 0b1110_0000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_extraContinuationByte_4Byte_invalid() { - byte[] inputBytes = new byte[5]; - inputBytes[0] = (byte) 0b11110_000; - inputBytes[1] = (byte) 0b10_010000; - inputBytes[2] = (byte) 0b10_000000; - inputBytes[3] = (byte) 0b10_000000; - inputBytes[4] = (byte) 0b10_000000; // four byte lead should only have three continuation bytes - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationOneByteTooShort_4Byte_invalid() { - byte[] inputBytes = new byte[3]; - inputBytes[0] = (byte) 0b11110_000; - inputBytes[1] = (byte) 0b10_010000; - inputBytes[2] = (byte) 0b10_000000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationTwoBytesTooShort_4Byte_invalid() { - byte[] inputBytes = new byte[2]; - inputBytes[0] = (byte) 0b11110_000; - inputBytes[1] = (byte) 0b10_010000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationThreeBytesTooShort_4Byte_invalid() { - byte[] inputBytes = new byte[1]; - inputBytes[0] = (byte) 0b11110_000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - - /* 2 BYTE / LATIN TESTS */ - - @Test - void validate_overlong_2byte_invalid() { - byte minLeaderByte = (byte) 0b110_00000; - byte maxLeaderByte = (byte) 0b110_00001; - byte minContinuationByte = (byte) 0b10_000000; - byte maxContinuationByte = (byte) 0b10_111111; - - /* 7 bit code points in 2 byte utf8 is invalid - 2 to the power of 7 = 128 code points * 2 bytes = 256 bytes */ - byte[] inputBytes = new byte[256]; - int index = 0; - - byte leaderByte = minLeaderByte; - byte continuationByte = minContinuationByte; - while (leaderByte <= maxLeaderByte) { - inputBytes[index++] = leaderByte; - inputBytes[index++] = continuationByte; - if (continuationByte == maxContinuationByte) { - leaderByte++; - continuationByte = minContinuationByte; - } else { - continuationByte++; - } - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < inputBytes.length; i += VECTOR_SPECIES.length()) { - byte[] vectorChunk = Arrays.copyOfRange(inputBytes, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - - /* 3 BYTE / Asiatic TESTS */ - - /* first valid three byte character: 1110_0000 10_100000 10_000000 - anything smaller is invalid as it would fit into 11 bits (two byte utf8) */ - @Test - void validate_overlong_3Byte_allInvalid() { - byte minLeaderByte = (byte) 0b1110_0000; - byte firstValidContinuationByte = (byte) 0b10_100000; - byte minContinuationByte = (byte) 0b10_000000; - byte maxContinuationByte = (byte) 0b10_111111; - - // 2 to the power of 11 = 2048 code points * 3 bytes = 6144 - byte[] inputBytes = new byte[6144]; - int index = 0; - - byte firstContinuationByte = minContinuationByte; - byte secondContinuationByte = minContinuationByte; - while (firstContinuationByte < firstValidContinuationByte) { - inputBytes[index++] = minLeaderByte; - inputBytes[index++] = firstContinuationByte; - inputBytes[index++] = secondContinuationByte; - - if (secondContinuationByte == maxContinuationByte) { - secondContinuationByte = minContinuationByte; - firstContinuationByte++; - } else { - secondContinuationByte++; - } - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < inputBytes.length; i += VECTOR_SPECIES.length()) { - byte[] vectorChunk = Arrays.copyOfRange(inputBytes, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - /* code points in the range of U+D800 - U+DFFF (inclusive) are the surrogates for UTF-16. - These 2048 code points that are reserved for UTF-16 are disallowed in UTF-8 - 1101 1000 0000 0000 -> 1101 1111 1111 1111 */ - @Test - void validate_surrogateCodePoints_invalid() { - final byte leaderByte = (byte) 0b1101_1110; - final byte minContinuationByte = (byte) 0b10_000000; - final byte maxContinuationByte = (byte) 0b10_111111; - final byte minFirstContinuationByte = (byte) 0b10_100000; - - byte firstContinuationByte = minFirstContinuationByte; - byte secondContinuationByte = minContinuationByte; - - // 2048 invalid code points * 3 bytes = 6144 bytes - byte[] inputBytes = new byte[6144]; - int index = 0; - - while (firstContinuationByte <= maxContinuationByte) { - inputBytes[index++] = leaderByte; - inputBytes[index++] = firstContinuationByte; - inputBytes[index++] = secondContinuationByte; - - if (secondContinuationByte == maxContinuationByte) { - firstContinuationByte++; - secondContinuationByte = minContinuationByte; - } else { - secondContinuationByte++; - } - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < inputBytes.length; i += VECTOR_SPECIES.vectorByteSize()) { - byte[] vectorChunk = Arrays.copyOfRange(inputBytes, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - - /* 4 BYTE / Supplementary TESTS */ - - /* Overlong Test, the decoded character must be above U+FFFF / 11110_000 10_001111 10_111111 10_111111 */ - @Test - void validate_overlong_4Byte_allInvalid() { - byte leaderByte = (byte) 0b11110_000; - byte minContinuationByte = (byte) 0b10_000000; - byte maxContinuationByte = (byte) 0b10_111111; - byte maxFirstContinuationByte = (byte) 0b10_001111; - - // 2 to the power of 16 = 65536 valid code points * 4 bytes = 262144 bytes - byte[] inputBytes = new byte[262144]; - int index = 0; - - byte firstContinuationByte = minContinuationByte; - byte secondContinuationByte = minContinuationByte; - byte thirdContinuationByte = minContinuationByte; - while (firstContinuationByte <= maxFirstContinuationByte) { - inputBytes[index++] = leaderByte; - inputBytes[index++] = firstContinuationByte; - inputBytes[index++] = secondContinuationByte; - inputBytes[index++] = thirdContinuationByte; - - if (thirdContinuationByte == maxContinuationByte) { - if (secondContinuationByte == maxContinuationByte) { - firstContinuationByte++; - secondContinuationByte = minContinuationByte; - } else { - secondContinuationByte++; - } - thirdContinuationByte = minContinuationByte; - } else { - thirdContinuationByte++; - } - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < inputBytes.length; i += VECTOR_SPECIES.vectorByteSize()) { - byte[] vectorChunk = Arrays.copyOfRange(inputBytes, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - /* last valid four byte character: 11110_100 10_001111 10_111111 10_111111 - Any code point greater than U+10FFFF will result in a TOO_LARGE error */ - @Test - void validate_tooLarge_4Byte_allInvalid() { - byte minLeaderByte = (byte) 0b11110_100; - byte maxLeaderByte = (byte) 0b11111_111; - byte minContinuationByte = (byte) 0b10_000000; - byte maxContinuationByte = (byte) 0b10_111111; - byte minFirstContinuationByte = (byte) 0b10_010000; - - - byte leaderByte = minLeaderByte; - byte firstContinuationByte = minFirstContinuationByte; - byte secondContinuationByte = minContinuationByte; - byte thirdContinuationByte = minContinuationByte; - - int codePoints = 0x3FFFFF - 0x110000 + 1; - byte[] inputBytes = new byte[codePoints * 4]; - int index = 0; - - while (leaderByte <= maxLeaderByte) { - inputBytes[index++] = leaderByte; - inputBytes[index++] = firstContinuationByte; - inputBytes[index++] = secondContinuationByte; - inputBytes[index++] = thirdContinuationByte; - - if (thirdContinuationByte == maxContinuationByte) { - if (secondContinuationByte == maxContinuationByte) { - if (firstContinuationByte == maxContinuationByte) { - leaderByte++; - firstContinuationByte = minContinuationByte; - } else { - firstContinuationByte++; - } - secondContinuationByte = minContinuationByte; - } else { - secondContinuationByte++; - } - thirdContinuationByte = minContinuationByte; - } else { - thirdContinuationByte++; - } - } - - SimdJsonParser parser = new SimdJsonParser(); - for (int i = 0; i < inputBytes.length; i += VECTOR_SPECIES.vectorByteSize()) { - byte[] vectorChunk = Arrays.copyOfRange(inputBytes, i, i + VECTOR_SPECIES.vectorByteSize()); - - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(vectorChunk, vectorChunk.length)) - .withMessage("Invalid UTF8"); - } - } - - /* check that the data stream does not terminate with an incomplete code point - We just have to check that the last byte in the last vector is strictly smaller than 0xC0 (using an unsigned comparison) - that the second last byte is strictly smaller than 0xE0 - the third last byte is strictly smaller than 0xF0 */ - @Test - void validate_continuationOneByteTooShort_2Byte_eof_invalid() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] inputBytes = new byte[vectorBytes]; - inputBytes[vectorBytes - 1] = (byte) 0b110_00010; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationOneByteTooShort_3Byte_eof_invalid() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] inputBytes = new byte[vectorBytes]; - inputBytes[vectorBytes - 2] = (byte) 0b1110_0000; - inputBytes[vectorBytes - 1] = (byte) 0b10_100000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationTwoBytesTooShort_3Byte_eof_invalid() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] inputBytes = new byte[vectorBytes]; - inputBytes[vectorBytes - 1] = (byte) 0b1110_0000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationOneByteTooShort_4Byte_eof_invalid() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] inputBytes = new byte[vectorBytes]; - inputBytes[vectorBytes - 3] = (byte) 0b11110_000; - inputBytes[vectorBytes - 2] = (byte) 0b10_010000; - inputBytes[vectorBytes - 1] = (byte) 0b10_000000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationTwoBytesTooShort_4Byte_eof_invalid() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] inputBytes = new byte[vectorBytes]; - inputBytes[vectorBytes - 2] = (byte) 0b11110_000; - inputBytes[vectorBytes - 1] = (byte) 0b10_010000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - @Test - void validate_continuationThreeBytesTooShort_4Byte_eof_invalid() { - int vectorBytes = VECTOR_SPECIES.vectorByteSize(); - byte[] inputBytes = new byte[vectorBytes]; - inputBytes[vectorBytes - 1] = (byte) 0b11110_000; - - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } - - - /* file tests */ - - @ParameterizedTest - @ValueSource(strings = {"/twitter.json", "/nhkworld.json"}) - void validate_utf8InputFiles_valid(String inputFilePath) throws IOException { - byte[] inputBytes = TestUtils.loadTestFile(inputFilePath); - SimdJsonParser parser = new SimdJsonParser(); - assertThatCode(() -> parser.parse(inputBytes, inputBytes.length)).doesNotThrowAnyException(); - } - - @Test - void validate_utf8InputFile_invalid() throws IOException { - byte[] inputBytes = TestUtils.loadTestFile("/malformed.txt"); - SimdJsonParser parser = new SimdJsonParser(); - assertThatExceptionOfType(JsonParsingException.class) - .isThrownBy(() -> parser.parse(inputBytes, inputBytes.length)) - .withMessage("Invalid UTF8"); - } -} \ No newline at end of file diff --git a/src/test/java/org/simdjson/schemas/ClassWithIntegerField.java b/src/test/java/org/simdjson/schemas/ClassWithIntegerField.java new file mode 100644 index 0000000..1b5d626 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithIntegerField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithIntegerField { + + private final Integer field; + + public ClassWithIntegerField(@JsonFieldName("field") Integer field) { + this.field = field; + } + + public Integer getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java new file mode 100644 index 0000000..16e0bc0 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveBooleanField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveBooleanField { + + private final boolean field; + + public ClassWithPrimitiveBooleanField(@JsonFieldName("field") boolean field) { + this.field = field; + } + + public boolean getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java new file mode 100644 index 0000000..6d6c5e4 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveByteField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveByteField { + + private final byte field; + + public ClassWithPrimitiveByteField(@JsonFieldName("field") byte field) { + this.field = field; + } + + public byte getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java new file mode 100644 index 0000000..369c5dc --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveCharacterField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveCharacterField { + + private final char field; + + public ClassWithPrimitiveCharacterField(@JsonFieldName("field") char field) { + this.field = field; + } + + public char getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java new file mode 100644 index 0000000..36e7695 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveDoubleField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveDoubleField { + + private final double field; + + public ClassWithPrimitiveDoubleField(@JsonFieldName("field") double field) { + this.field = field; + } + + public double getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java new file mode 100644 index 0000000..fb2bb99 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveFloatField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveFloatField { + + private final float field; + + public ClassWithPrimitiveFloatField(@JsonFieldName("field") float field) { + this.field = field; + } + + public float getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java new file mode 100644 index 0000000..793dc77 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveIntegerField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveIntegerField { + + private final int field; + + public ClassWithPrimitiveIntegerField(@JsonFieldName("field") int field) { + this.field = field; + } + + public int getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java new file mode 100644 index 0000000..17a9d2e --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveLongField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveLongField { + + private final long field; + + public ClassWithPrimitiveLongField(@JsonFieldName("field") long field) { + this.field = field; + } + + public long getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java new file mode 100644 index 0000000..35ac7fe --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithPrimitiveShortField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithPrimitiveShortField { + + private final short field; + + public ClassWithPrimitiveShortField(@JsonFieldName("field") short field) { + this.field = field; + } + + public short getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/ClassWithStringField.java b/src/test/java/org/simdjson/schemas/ClassWithStringField.java new file mode 100644 index 0000000..2c8f3ee --- /dev/null +++ b/src/test/java/org/simdjson/schemas/ClassWithStringField.java @@ -0,0 +1,16 @@ +package org.simdjson.schemas; + +import org.simdjson.annotations.JsonFieldName; + +public class ClassWithStringField { + + private final String field; + + public ClassWithStringField(@JsonFieldName("field") String field) { + this.field = field; + } + + public String getField() { + return field; + } +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java new file mode 100644 index 0000000..351bd23 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithBooleanArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithBooleanArrayField(Boolean[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithBooleanField.java b/src/test/java/org/simdjson/schemas/RecordWithBooleanField.java new file mode 100644 index 0000000..5f8f3cf --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithBooleanField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithBooleanField(Boolean field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java b/src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java new file mode 100644 index 0000000..3f3517c --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithBooleanListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithBooleanListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java new file mode 100644 index 0000000..cf3eecb --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithByteArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithByteArrayField(Byte[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithByteField.java b/src/test/java/org/simdjson/schemas/RecordWithByteField.java new file mode 100644 index 0000000..7297453 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithByteField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithByteField(Byte field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithByteListField.java b/src/test/java/org/simdjson/schemas/RecordWithByteListField.java new file mode 100644 index 0000000..d15732a --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithByteListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithByteListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java new file mode 100644 index 0000000..532cf55 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithCharacterArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithCharacterArrayField(Character[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithCharacterField.java b/src/test/java/org/simdjson/schemas/RecordWithCharacterField.java new file mode 100644 index 0000000..bd1d21a --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithCharacterField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithCharacterField(Character field) { + +} \ No newline at end of file diff --git a/src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java b/src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java new file mode 100644 index 0000000..7bff7b1 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithCharacterListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithCharacterListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java new file mode 100644 index 0000000..d65c4d2 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithDoubleArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithDoubleArrayField(Double[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithDoubleField.java b/src/test/java/org/simdjson/schemas/RecordWithDoubleField.java new file mode 100644 index 0000000..1f3aeb3 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithDoubleField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithDoubleField(Double field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java b/src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java new file mode 100644 index 0000000..cdcdc03 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithDoubleListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithDoubleListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java new file mode 100644 index 0000000..611483b --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithFloatArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithFloatArrayField(Float[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithFloatField.java b/src/test/java/org/simdjson/schemas/RecordWithFloatField.java new file mode 100644 index 0000000..aeb95d7 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithFloatField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithFloatField(Float field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithFloatListField.java b/src/test/java/org/simdjson/schemas/RecordWithFloatListField.java new file mode 100644 index 0000000..ce75e91 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithFloatListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithFloatListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java new file mode 100644 index 0000000..d442af2 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithIntegerArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithIntegerArrayField(Integer[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithIntegerField.java b/src/test/java/org/simdjson/schemas/RecordWithIntegerField.java new file mode 100644 index 0000000..5aafd3e --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithIntegerField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithIntegerField(Integer field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java b/src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java new file mode 100644 index 0000000..6c34fd2 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithIntegerListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithIntegerListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java new file mode 100644 index 0000000..2829062 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithLongArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithLongArrayField(Long[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithLongField.java b/src/test/java/org/simdjson/schemas/RecordWithLongField.java new file mode 100644 index 0000000..698db5c --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithLongField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithLongField(Long field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithLongListField.java b/src/test/java/org/simdjson/schemas/RecordWithLongListField.java new file mode 100644 index 0000000..a0ed295 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithLongListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithLongListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java new file mode 100644 index 0000000..d3c0663 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveBooleanArrayField(boolean[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java new file mode 100644 index 0000000..c67eae3 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveBooleanField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveBooleanField(boolean field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java new file mode 100644 index 0000000..3127a2f --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveByteArrayField(byte[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java new file mode 100644 index 0000000..64e3534 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveByteField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveByteField(byte field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java new file mode 100644 index 0000000..003d4d4 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveCharacterArrayField(char[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java new file mode 100644 index 0000000..9ff7386 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveCharacterField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveCharacterField(char field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java new file mode 100644 index 0000000..29f6f2f --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveDoubleArrayField(double[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java new file mode 100644 index 0000000..6325017 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveDoubleField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveDoubleField(double field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java new file mode 100644 index 0000000..6dbc9a5 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveFloatArrayField(float[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java new file mode 100644 index 0000000..87801be --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveFloatField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveFloatField(float field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java new file mode 100644 index 0000000..412b594 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveIntegerArrayField(int[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java new file mode 100644 index 0000000..9d7b47d --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveIntegerField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveIntegerField(int field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java new file mode 100644 index 0000000..d0afa42 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveLongArrayField(long[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java new file mode 100644 index 0000000..dfb5608 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveLongField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveLongField(long field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java new file mode 100644 index 0000000..95ac8fc --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortArrayField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveShortArrayField(short[] field) { +} + diff --git a/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java new file mode 100644 index 0000000..129c7ca --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithPrimitiveShortField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithPrimitiveShortField(short field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java new file mode 100644 index 0000000..e819871 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithShortArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithShortArrayField(Short[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithShortField.java b/src/test/java/org/simdjson/schemas/RecordWithShortField.java new file mode 100644 index 0000000..046447e --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithShortField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithShortField(Short field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithShortListField.java b/src/test/java/org/simdjson/schemas/RecordWithShortListField.java new file mode 100644 index 0000000..f4c5d20 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithShortListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithShortListField(List field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java b/src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java new file mode 100644 index 0000000..08ce42f --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithStringArrayField.java @@ -0,0 +1,4 @@ +package org.simdjson.schemas; + +public record RecordWithStringArrayField(String[] field) { +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithStringField.java b/src/test/java/org/simdjson/schemas/RecordWithStringField.java new file mode 100644 index 0000000..099d4e6 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithStringField.java @@ -0,0 +1,5 @@ +package org.simdjson.schemas; + +public record RecordWithStringField(String field) { + +} diff --git a/src/test/java/org/simdjson/schemas/RecordWithStringListField.java b/src/test/java/org/simdjson/schemas/RecordWithStringListField.java new file mode 100644 index 0000000..5c4cba9 --- /dev/null +++ b/src/test/java/org/simdjson/schemas/RecordWithStringListField.java @@ -0,0 +1,6 @@ +package org.simdjson.schemas; + +import java.util.List; + +public record RecordWithStringListField(List field) { +} diff --git a/src/test/java/org/simdjson/testutils/CartesianTestCsv.java b/src/test/java/org/simdjson/testutils/CartesianTestCsv.java new file mode 100644 index 0000000..d4c601e --- /dev/null +++ b/src/test/java/org/simdjson/testutils/CartesianTestCsv.java @@ -0,0 +1,16 @@ +package org.simdjson.testutils; + +import org.junitpioneer.jupiter.cartesian.CartesianArgumentsSource; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Target(ElementType.PARAMETER) +@Retention(RetentionPolicy.RUNTIME) +@CartesianArgumentsSource(CartesianTestCsvArgumentsProvider.class) +public @interface CartesianTestCsv { + + String[] value() default {}; +} diff --git a/src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java b/src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java new file mode 100644 index 0000000..35eaea0 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/CartesianTestCsvArgumentsProvider.java @@ -0,0 +1,25 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junitpioneer.jupiter.cartesian.CartesianParameterArgumentsProvider; + +import java.lang.reflect.Parameter; +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Stream; + +class CartesianTestCsvArgumentsProvider implements CartesianParameterArgumentsProvider { + + @Override + public Stream provideArguments(ExtensionContext context, Parameter parameter) { + CartesianTestCsv source = Objects.requireNonNull(parameter.getAnnotation(CartesianTestCsv.class)); + return Arrays.stream(source.value()) + .map(row -> row.split(",")) + .peek(row -> { + for (int i = 0; i < row.length; i++) { + row[i] = row[i].trim(); + } + }) + .map(CartesianTestCsvRow::new); + } +} diff --git a/src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java b/src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java new file mode 100644 index 0000000..4785cfb --- /dev/null +++ b/src/test/java/org/simdjson/testutils/CartesianTestCsvRow.java @@ -0,0 +1,39 @@ +package org.simdjson.testutils; + +import java.util.Arrays; + +public class CartesianTestCsvRow { + + private final String[] cells; + + CartesianTestCsvRow(String[] cells) { + this.cells = cells; + } + + public String getValueAsString(int column) { + return cells[column]; + } + + public double getValueAsDouble(int column) { + return Double.parseDouble(cells[column]); + } + + public float getValueAsFloat(int column) { + return Float.parseFloat(cells[column]); + } + + public Object getValue(int column, Class expectedTye) { + if (expectedTye == Float.class || expectedTye == float.class) { + return getValueAsFloat(column); + } + if (expectedTye == Double.class || expectedTye == double.class) { + return getValueAsDouble(column); + } + throw new UnsupportedOperationException("Unsupported type: " + expectedTye.getName()); + } + + @Override + public String toString() { + return Arrays.toString(cells); + } +} diff --git a/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java new file mode 100644 index 0000000..fefaf36 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFile.java @@ -0,0 +1,82 @@ +package org.simdjson.testutils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Iterator; + +public class FloatingPointNumberTestFile { + + private final File file; + + FloatingPointNumberTestFile(File file) { + this.file = file; + } + + public FloatingPointNumberTestCasesIterator iterator() throws IOException { + return new FloatingPointNumberTestCasesIterator(file); + } + + @Override + public String toString() { + return file.toString(); + } + + public record FloatingPointNumberTestCase(int line, String input, float expectedFloat, double expectedDouble) { + + } + + public static class FloatingPointNumberTestCasesIterator implements Iterator, AutoCloseable { + + private final BufferedReader br; + + private int nextLineNo = 0; + private String nextLine; + + private FloatingPointNumberTestCasesIterator(File file) throws IOException { + br = new BufferedReader(new FileReader(file)); + moveToNextLine(); + } + + @Override + public boolean hasNext() { + return nextLine != null; + } + + @Override + public FloatingPointNumberTestCase next() { + String[] cells = nextLine.split(" "); + float expectedFloat = Float.intBitsToFloat(Integer.decode("0x" + cells[1])); + double expectedDouble = Double.longBitsToDouble(Long.decode("0x" + cells[2])); + String input = readInputNumber(cells[3]); + try { + moveToNextLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new FloatingPointNumberTestCase(nextLineNo, input, expectedFloat, expectedDouble); + } + + @Override + public void close() throws IOException { + br.close(); + } + + private void moveToNextLine() throws IOException { + nextLine = br.readLine(); + nextLineNo++; + } + + private static String readInputNumber(String input) { + boolean isDouble = input.indexOf('e') >= 0 || input.indexOf('E') >= 0 || input.indexOf('.') >= 0; + if (isDouble) { + if (input.startsWith(".")) { + input = "0" + input; + } + return input.replaceFirst("\\.[eE]", ".0e"); + } + return input + ".0"; + } + } +} diff --git a/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java new file mode 100644 index 0000000..bb9f152 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesProvider.java @@ -0,0 +1,34 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.io.File; +import java.nio.file.Path; +import java.util.stream.Stream; + +class FloatingPointNumberTestFilesProvider implements ArgumentsProvider, AnnotationConsumer { + + @Override + public Stream provideArguments(ExtensionContext context) { + return listTestFiles() + .map(FloatingPointNumberTestFile::new) + .map(Arguments::of); + } + + @Override + public void accept(FloatingPointNumberTestFilesSource annotation) { + } + + private static Stream listTestFiles() { + String testDataDir = System.getProperty("org.simdjson.testdata.dir", System.getProperty("user.dir") + "/testdata"); + File[] testFiles = Path.of(testDataDir, "parse-number-fxx-test-data", "data").toFile().listFiles(); + if (testFiles == null) { + return Stream.empty(); + } + return Stream.of(testFiles) + .filter(File::isFile); + } +} diff --git a/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java new file mode 100644 index 0000000..3e2cd3b --- /dev/null +++ b/src/test/java/org/simdjson/testutils/FloatingPointNumberTestFilesSource.java @@ -0,0 +1,26 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Provides files with floating point number test cases. + *

+ * The default location of the files is the directory /testdata within the project directory. + * It can be customized using the system property 'org.simdjson.testdata.dir'. + *

+ * The files are expected to be formatted as described at: + * https://github.com/nigeltao/parse-number-fxx-test-data + */ +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(FloatingPointNumberTestFilesProvider.class) +public @interface FloatingPointNumberTestFilesSource { + +} diff --git a/src/test/java/org/simdjson/JsonValueAssert.java b/src/test/java/org/simdjson/testutils/JsonValueAssert.java similarity index 81% rename from src/test/java/org/simdjson/JsonValueAssert.java rename to src/test/java/org/simdjson/testutils/JsonValueAssert.java index 6c8bf66..d3350c0 100644 --- a/src/test/java/org/simdjson/JsonValueAssert.java +++ b/src/test/java/org/simdjson/testutils/JsonValueAssert.java @@ -1,19 +1,16 @@ -package org.simdjson; +package org.simdjson.testutils; import org.assertj.core.api.AbstractAssert; import org.assertj.core.api.Assertions; +import org.simdjson.JsonValue; -class JsonValueAssert extends AbstractAssert { +public class JsonValueAssert extends AbstractAssert { JsonValueAssert(JsonValue actual) { super(actual, JsonValueAssert.class); } - static JsonValueAssert assertThat(JsonValue actual) { - return new JsonValueAssert(actual); - } - - JsonValueAssert isEqualTo(long expected) { + public JsonValueAssert isEqualTo(long expected) { Assertions.assertThat(actual.isLong()) .withFailMessage("Expecting value to be long but was " + getActualType()) .isTrue(); @@ -21,7 +18,7 @@ JsonValueAssert isEqualTo(long expected) { return this; } - JsonValueAssert isEqualTo(Double expected) { + public JsonValueAssert isEqualTo(Double expected) { Assertions.assertThat(actual.isDouble()) .withFailMessage("Expecting value to be double but was " + getActualType()) .isTrue(); @@ -29,7 +26,7 @@ JsonValueAssert isEqualTo(Double expected) { return this; } - JsonValueAssert isEqualTo(String expected) { + public JsonValueAssert isEqualTo(String expected) { Assertions.assertThat(actual.isString()) .withFailMessage("Expecting value to be string but was " + getActualType()) .isTrue(); @@ -37,7 +34,7 @@ JsonValueAssert isEqualTo(String expected) { return this; } - JsonValueAssert isEqualTo(boolean expected) { + public JsonValueAssert isEqualTo(boolean expected) { Assertions.assertThat(actual.isBoolean()) .withFailMessage("Expecting value to be boolean but was " + getActualType()) .isTrue(); diff --git a/src/test/java/org/simdjson/testutils/MapEntry.java b/src/test/java/org/simdjson/testutils/MapEntry.java new file mode 100644 index 0000000..e821958 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/MapEntry.java @@ -0,0 +1,10 @@ +package org.simdjson.testutils; + +public @interface MapEntry { + + String[] stringKey() default {}; + + Class[] classKey() default {}; + + String value(); +} diff --git a/src/test/java/org/simdjson/testutils/MapSource.java b/src/test/java/org/simdjson/testutils/MapSource.java new file mode 100644 index 0000000..4271b74 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/MapSource.java @@ -0,0 +1,18 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(MapSourceProvider.class) +public @interface MapSource { + + MapEntry[] value(); +} diff --git a/src/test/java/org/simdjson/testutils/MapSourceProvider.java b/src/test/java/org/simdjson/testutils/MapSourceProvider.java new file mode 100644 index 0000000..1e38928 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/MapSourceProvider.java @@ -0,0 +1,39 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.util.Arrays; +import java.util.stream.Stream; + +class MapSourceProvider implements ArgumentsProvider, AnnotationConsumer { + + private MapEntry[] entries; + + @Override + public void accept(MapSource mapSource) { + entries = mapSource.value(); + } + + @Override + public Stream provideArguments(ExtensionContext context) { + return Arrays.stream(entries) + .map(entry -> { + Object[] key = null; + if (entry.stringKey().length != 0) { + key = entry.stringKey(); + } else if (entry.classKey().length != 0) { + key = entry.classKey(); + } + if (key == null) { + throw new IllegalArgumentException("Missing key."); + } + if (key.length > 1) { + throw new IllegalArgumentException("Expected one key, got " + key.length); + } + return Arguments.of(key[0], entry.value()); + }); + } +} diff --git a/src/test/java/org/simdjson/testutils/NumberTestData.java b/src/test/java/org/simdjson/testutils/NumberTestData.java new file mode 100644 index 0000000..215c7f2 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/NumberTestData.java @@ -0,0 +1,42 @@ +package org.simdjson.testutils; + +import java.util.Random; + +class NumberTestData { + + private static final Random RANDOM = new Random(); + + static byte randomByte() { + return (byte) RANDOM.nextInt(); + } + + static short randomShort() { + return (short) RANDOM.nextInt(); + } + + static int randomInt() { + return RANDOM.nextInt(); + } + + static long randomLong() { + return RANDOM.nextLong(); + } + + static double randomDouble() { + while (true) { + double randomVal = Double.longBitsToDouble(RANDOM.nextLong()); + if (randomVal < Double.POSITIVE_INFINITY && randomVal > Double.NEGATIVE_INFINITY) { + return randomVal; + } + } + } + + static float randomFloat() { + while (true) { + float randomVal = Float.intBitsToFloat(RANDOM.nextInt()); + if (randomVal < Float.POSITIVE_INFINITY && randomVal > Float.NEGATIVE_INFINITY) { + return randomVal; + } + } + } +} diff --git a/src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java b/src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java new file mode 100644 index 0000000..d39dc1c --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomIntegralNumberProvider.java @@ -0,0 +1,125 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.api.Named; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Parameter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.function.Supplier; +import java.util.stream.Stream; + +class RandomIntegralNumberProvider implements ArgumentsProvider, AnnotationConsumer { + + private static final int SEQUENCE_SIZE = 10; + + private Class[] classes; + private boolean includeMinMax; + + @Override + public Stream provideArguments(ExtensionContext context) { + return Arrays.stream(classes) + .flatMap(expectedClass -> { + List numbers = generate(expectedClass); + + if (!numbers.isEmpty()) { + return numbers.stream() + .map(num -> createArguments(context, expectedClass, String.valueOf(num), num)); + } + + Constructor constructor = resolveConstructor(expectedClass); + Parameter[] parameters = constructor.getParameters(); + Parameter parameter = parameters[0]; + Class parameterType = parameter.getType(); + numbers = generate(parameterType); + + if (!numbers.isEmpty()) { + return numbers.stream() + .map(num -> { + Object expected = createInstance(constructor, num); + String json = "{\"" + parameter.getName() + "\": " + num + "}"; + return createArguments(context, expectedClass, json, expected); + }); + } + + throw new IllegalArgumentException("Unsupported class: " + expectedClass); + }); + } + + @Override + public void accept(RandomIntegralNumberSource numbersSource) { + classes = numbersSource.classes(); + includeMinMax = numbersSource.includeMinMax(); + } + + private Constructor resolveConstructor(Class expectedClass) { + Constructor[] constructors = expectedClass.getDeclaredConstructors(); + if (constructors.length == 1) { + Constructor constructor = constructors[0]; + Parameter[] parameters = constructor.getParameters(); + if (parameters.length == 1) { + return constructor; + } + } + throw new IllegalArgumentException("Unsupported class: " + expectedClass); + } + + private List generate(Class expectedClass) { + if (expectedClass == Byte.class || expectedClass == byte.class) { + return generateNumbers(NumberTestData::randomByte, Byte.MIN_VALUE, Byte.MAX_VALUE); + } + if (expectedClass == Short.class || expectedClass == short.class) { + return generateNumbers(NumberTestData::randomShort, Short.MIN_VALUE, Short.MAX_VALUE); + } + if (expectedClass == Integer.class || expectedClass == int.class) { + return generateNumbers(NumberTestData::randomInt, Integer.MIN_VALUE, Integer.MAX_VALUE); + } + if (expectedClass == Long.class || expectedClass == long.class) { + return generateNumbers(NumberTestData::randomLong, Long.MIN_VALUE, Long.MAX_VALUE); + } + return Collections.emptyList(); + } + + private List generateNumbers(Supplier generator, T min, T max) { + List numbers = new ArrayList<>(); + if (includeMinMax) { + numbers.add(min); + numbers.add(max); + } + int randomSequenceLen = SEQUENCE_SIZE - numbers.size(); + for (int i = 0; i < randomSequenceLen; i++) { + numbers.add(generator.get()); + } + return numbers; + } + + private static Object createInstance(Constructor constructor, Object arg) { + try { + return constructor.newInstance(arg); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + private static Arguments createArguments(ExtensionContext context, Class schema, String json, Object expected) { + Class[] parameterTypes = context.getRequiredTestMethod().getParameterTypes(); + Object[] args = new Object[parameterTypes.length]; + for (int i = 0; i < args.length; i++) { + if (parameterTypes[i] == Class.class) { + args[i] = Named.named(schema.getName(), schema); + } else if (parameterTypes[i] == String.class) { + args[i] = json; + } else { + args[i] = expected; + } + } + return () -> args; + } +} diff --git a/src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java b/src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java new file mode 100644 index 0000000..d2938f5 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomIntegralNumberSource.java @@ -0,0 +1,23 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(RandomIntegralNumberProvider.class) +public @interface RandomIntegralNumberSource { + + Class[] classes(); + + /** + * If set to true generated test arguments will include the min and max values for a given numeric type. + */ + boolean includeMinMax(); +} diff --git a/src/test/java/org/simdjson/testutils/RandomStringProvider.java b/src/test/java/org/simdjson/testutils/RandomStringProvider.java new file mode 100644 index 0000000..30d9840 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomStringProvider.java @@ -0,0 +1,58 @@ +package org.simdjson.testutils; + +import org.apache.commons.text.StringEscapeUtils; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.util.stream.IntStream; +import java.util.stream.Stream; + +class RandomStringProvider implements ArgumentsProvider, AnnotationConsumer { + + private int count; + private int minChars; + private int maxChars; + + @Override + public void accept(RandomStringSource randomStringSource) { + count = randomStringSource.count(); + if (count <= 0) { + throw new IllegalArgumentException("count has to be greater than zero"); + } + minChars = randomStringSource.minChars(); + if (minChars <= 0) { + throw new IllegalArgumentException("minChars has to be greater than zero"); + } + maxChars = randomStringSource.maxChars(); + if (maxChars <= 0 || maxChars == Integer.MAX_VALUE) { + throw new IllegalArgumentException("maxChars has to be withing the range of [1, Integer.MAX_VALUE - 1]"); + } + if (maxChars < minChars) { + throw new IllegalArgumentException("maxChars has to be greater or equal to minChars"); + } + } + + @Override + public Stream provideArguments(ExtensionContext context) { + Class[] parameterTypes = context.getRequiredTestMethod().getParameterTypes(); + if (parameterTypes.length != 2) { + throw new IllegalArgumentException("Test method should have two arguments: an input string and an expected value."); + } + if (parameterTypes[0] != String.class) { + throw new IllegalArgumentException("The first argument must be a String."); + } + if (parameterTypes[1] != String.class && parameterTypes[1] != Character.class && parameterTypes[1] != char.class) { + throw new IllegalArgumentException("The second argument must be either a String, Character, or char."); + } + return IntStream.range(0, count) + .mapToObj(i -> { + String jsonStr = StringTestData.randomString(minChars, maxChars); + if (parameterTypes[1] == String.class) { + return Arguments.of(jsonStr, StringEscapeUtils.unescapeJson(jsonStr)); + } + return Arguments.of(jsonStr, StringEscapeUtils.unescapeJson(jsonStr).charAt(0)); + }); + } +} diff --git a/src/test/java/org/simdjson/testutils/RandomStringSource.java b/src/test/java/org/simdjson/testutils/RandomStringSource.java new file mode 100644 index 0000000..a7aecb3 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/RandomStringSource.java @@ -0,0 +1,22 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(RandomStringProvider.class) +public @interface RandomStringSource { + + int count() default 10; + + int minChars() default 1; + + int maxChars() default 100; +} diff --git a/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java new file mode 100644 index 0000000..2076379 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueProvider.java @@ -0,0 +1,232 @@ +package org.simdjson.testutils; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.junit.jupiter.api.Named; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.support.AnnotationConsumer; + +import java.lang.reflect.Array; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Parameter; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + +class SchemaBasedRandomValueProvider implements ArgumentsProvider, AnnotationConsumer { + + private static final Set> SUPPORTED_PRIMITIVE_TYPES = Set.of( + Boolean.class, + boolean.class, + String.class, + Character.class, + char.class, + Byte.class, + byte.class, + Short.class, + short.class, + Integer.class, + int.class, + Long.class, + long.class, + Float.class, + float.class, + Double.class, + double.class + ); + private static final GeneratedElement NULL_ELEMENT = new GeneratedElement(null, "null"); + private static final int MIN_ARRAY_ELEMENT = 1; + private static final int MAX_ARRAY_ELEMENT = 50; + + private Class[] schemas; + private boolean nulls; + + @Override + public void accept(SchemaBasedRandomValueSource schemaBasedRandomValueSource) { + schemas = schemaBasedRandomValueSource.schemas(); + nulls = schemaBasedRandomValueSource.nulls(); + } + + @Override + public Stream provideArguments(ExtensionContext context) { + Class[] parameterTypes = context.getRequiredTestMethod().getParameterTypes(); + return Arrays.stream(schemas) + .map(schema -> { + GeneratedElement expected = generate(schema, schema); + Object[] args = new Object[parameterTypes.length]; + for (int i = 0; i < args.length; i++) { + if (parameterTypes[i] == Class.class) { + args[i] = Named.named(schema.getName(), schema); + } else if (parameterTypes[i] == String.class) { + args[i] = expected.string(); + } else { + args[i] = expected.value(); + } + } + return () -> args; + }); + } + + private GeneratedElement generate(Type type, Class c) { + if (SUPPORTED_PRIMITIVE_TYPES.contains(c)) { + return generatePrimitive(type); + } else if (c.isArray()) { + return generateArray(c); + } else if (c == List.class) { + return generateList((ParameterizedType) type); + } else { + Constructor constructor = resolveConstructor(c); + Parameter[] parameters = constructor.getParameters(); + Object[] args = new Object[parameters.length]; + StringBuilder jsonBuilder = new StringBuilder(); + jsonBuilder.append('{'); + for (int i = 0; i < args.length; i++) { + Parameter parameter = parameters[i]; + GeneratedElement generatedElement = generate(parameter.getAnnotatedType().getType(), parameter.getType()); + args[i] = generatedElement.value(); + jsonBuilder.append('"'); + jsonBuilder.append(parameters[i].getName()); + jsonBuilder.append("\": "); + jsonBuilder.append(generatedElement.string()); + } + jsonBuilder.append('}'); + try { + Object o = constructor.newInstance(args); + return new GeneratedElement(o, jsonBuilder.toString()); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + } + + private GeneratedElement generateArray(Class type) { + StringBuilder jsonStringBuilder = new StringBuilder(); + Class elementType = extractElementType(type); + int len = RandomUtils.nextInt(MIN_ARRAY_ELEMENT, MAX_ARRAY_ELEMENT + 1); + Object array = Array.newInstance(elementType, len); + jsonStringBuilder.append('['); + boolean arrayHasNullElement = false; + for (int i = 0; i < len; i++) { + boolean nullElement = nulls && ((!arrayHasNullElement && i == len - 1) || RandomUtils.nextBoolean()); + GeneratedElement element; + if (nullElement) { + element = NULL_ELEMENT; + } else if (elementType.isArray()) { + element = generateArray(elementType); + } else { + element = generateArrayElement(elementType); + } + Array.set(array, i, element.value()); + jsonStringBuilder.append(element.string()); + arrayHasNullElement |= nullElement; + if (i != len - 1) { + jsonStringBuilder.append(','); + } + } + jsonStringBuilder.append(']'); + return new GeneratedElement(array, jsonStringBuilder.toString()); + } + + private GeneratedElement generateList(ParameterizedType type) { + StringBuilder jsonStringBuilder = new StringBuilder(); + Type elementType = type.getActualTypeArguments()[0]; + int len = RandomUtils.nextInt(MIN_ARRAY_ELEMENT, MAX_ARRAY_ELEMENT + 1); + List list = new ArrayList<>(); + jsonStringBuilder.append('['); + boolean arrayHasNullElement = false; + for (int i = 0; i < len; i++) { + boolean nullElement = nulls && ((!arrayHasNullElement && i == len - 1) || RandomUtils.nextBoolean()); + GeneratedElement element; + if (nullElement) { + element = NULL_ELEMENT; + } else if (elementType instanceof ParameterizedType parameterizedType) { + element = generate(elementType, (Class) parameterizedType.getRawType()); + } else { + element = generate(elementType, (Class) elementType); + } + list.add(element.value()); + jsonStringBuilder.append(element.string()); + arrayHasNullElement |= nullElement; + if (i != len - 1) { + jsonStringBuilder.append(','); + } + } + jsonStringBuilder.append(']'); + return new GeneratedElement(list, jsonStringBuilder.toString()); + } + + private static Class extractElementType(Class c) { + Class elementType = c.componentType(); + if (elementType == null) { + return c; + } + return elementType; + } + + private GeneratedElement generateArrayElement(Class elementType) { + if (SUPPORTED_PRIMITIVE_TYPES.contains(elementType)) { + return generatePrimitive(elementType); + } + return generate(elementType, elementType); + } + + private Constructor resolveConstructor(Class expectedClass) { + Constructor[] constructors = expectedClass.getDeclaredConstructors(); + if (constructors.length == 1) { + Constructor constructor = constructors[0]; + constructor.setAccessible(true); + return constructor; + } + throw new IllegalArgumentException("Unsupported class: " + expectedClass + ". It should has only one constructor."); + } + + private GeneratedElement generatePrimitive(Type elementType) { + if (elementType == Boolean.class || elementType == boolean.class) { + boolean element = RandomUtils.nextBoolean(); + return new GeneratedElement(element, Boolean.toString(element)); + } + if (elementType == String.class) { + String element = StringTestData.randomString(1, 50); + return new GeneratedElement(StringEscapeUtils.unescapeJson(element), "\"" + element + "\""); + } + if (elementType == Character.class || elementType == char.class) { + String element = StringTestData.randomString(1, 1); + return new GeneratedElement(StringEscapeUtils.unescapeJson(element).charAt(0), "\"" + element + "\""); + } + if (elementType == Byte.class || elementType == byte.class) { + byte element = NumberTestData.randomByte(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Short.class || elementType == short.class) { + short element = NumberTestData.randomShort(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Integer.class || elementType == int.class) { + int element = NumberTestData.randomInt(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Long.class || elementType == long.class) { + long element = NumberTestData.randomLong(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Float.class || elementType == float.class) { + float element = NumberTestData.randomFloat(); + return new GeneratedElement(element, String.valueOf(element)); + } + if (elementType == Double.class || elementType == double.class) { + double element = NumberTestData.randomDouble(); + return new GeneratedElement(element, String.valueOf(element)); + } + throw new UnsupportedOperationException("Unsupported type: " + elementType + ". The following classes are supported: " + SUPPORTED_PRIMITIVE_TYPES); + } + + private record GeneratedElement(Object value, String string) { + } +} diff --git a/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java new file mode 100644 index 0000000..fc797e9 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/SchemaBasedRandomValueSource.java @@ -0,0 +1,23 @@ +package org.simdjson.testutils; + +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) +@ArgumentsSource(SchemaBasedRandomValueProvider.class) +public @interface SchemaBasedRandomValueSource { + + Class[] schemas(); + + /** + * If set to true at least one null will appear in every generated array. + */ + boolean nulls(); +} diff --git a/src/test/java/org/simdjson/testutils/SimdJsonAssertions.java b/src/test/java/org/simdjson/testutils/SimdJsonAssertions.java new file mode 100644 index 0000000..e0f114b --- /dev/null +++ b/src/test/java/org/simdjson/testutils/SimdJsonAssertions.java @@ -0,0 +1,11 @@ +package org.simdjson.testutils; + +import org.assertj.core.api.Assertions; +import org.simdjson.JsonValue; + +public class SimdJsonAssertions extends Assertions { + + public static JsonValueAssert assertThat(JsonValue actual) { + return new JsonValueAssert(actual); + } +} diff --git a/src/test/java/org/simdjson/testutils/StringTestData.java b/src/test/java/org/simdjson/testutils/StringTestData.java new file mode 100644 index 0000000..b20baca --- /dev/null +++ b/src/test/java/org/simdjson/testutils/StringTestData.java @@ -0,0 +1,112 @@ +package org.simdjson.testutils; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.RandomUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.apache.commons.text.translate.AggregateTranslator; +import org.apache.commons.text.translate.CharSequenceTranslator; +import org.apache.commons.text.translate.JavaUnicodeEscaper; +import org.apache.commons.text.translate.LookupTranslator; + +import java.util.List; +import java.util.Map; + +import static java.lang.Character.MAX_CODE_POINT; +import static java.lang.Character.isBmpCodePoint; +import static java.lang.Character.lowSurrogate; +import static java.util.stream.IntStream.rangeClosed; + +public class StringTestData { + + public static final CharSequenceTranslator ESCAPE_JSON = new AggregateTranslator( + new LookupTranslator(Map.of("\"", "\\\"", "\\", "\\\\")), + JavaUnicodeEscaper.below(0x20) + ); + + public static String randomString(int minChars, int maxChars) { + int stringLen = RandomUtils.nextInt(minChars, maxChars + 1); + var rawString = RandomStringUtils.random(stringLen); + var jsonString = ESCAPE_JSON.translate(rawString); + System.out.println("Generated string: " + jsonString + " [" + StringEscapeUtils.escapeJava(jsonString) + "]"); + return jsonString; + } + + /** + * Returns all usable characters that don't need to be escaped. + * It means that all control characters, '"', and '\' are not returned. + */ + public static List usableSingleCodeUnitCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(Character::isBmpCodePoint) + .filter(codePoint -> !isReservedCodePoint(codePoint)) + .filter(codePoint -> !Character.isISOControl(codePoint)) + .filter(codePoint -> (char) codePoint != '"') + .filter(codePoint -> (char) codePoint != '\\') + .mapToObj(codePoint -> (char) codePoint) + .map(String::valueOf) + .toList(); + } + + public static List usableEscapedSingleCodeUnitCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(Character::isBmpCodePoint) + .filter(codePoint -> !isReservedCodePoint(codePoint)) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List reservedEscapedSingleCodeUnitCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(Character::isBmpCodePoint) + .filter(StringTestData::isReservedCodePoint) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List escapedLowSurrogates() { + return rangeClosed(0xDC00, 0xDFFF) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List usableTwoCodeUnitsCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(codePoint -> !Character.isBmpCodePoint(codePoint)) + .mapToObj(Character::toString) + .toList(); + } + + public static List usableEscapedUnicodeCharacters() { + return rangeClosed(0, MAX_CODE_POINT) + .filter(codePoint -> !isReservedCodePoint(codePoint)) + .mapToObj(StringTestData::toUnicodeEscape) + .toList(); + } + + public static List escapedUnicodeCharactersWithInvalidLowSurrogate() { + return rangeClosed(0x0000, 0xFFFF) + .filter(lowSurrogate -> lowSurrogate < 0xDC00 || lowSurrogate > 0xDFFF) + .mapToObj(lowSurrogate -> String.format("\\uD800\\u%04X", lowSurrogate)) + .toList(); + } + + public static List unescapedControlCharacters() { + return rangeClosed(0, 0x001F) + .mapToObj(codePoint -> (char) codePoint) + .map(String::valueOf) + .toList(); + } + + private static String toUnicodeEscape(int codePoint) { + if (isBmpCodePoint(codePoint)) { + return String.format("\\u%04X", codePoint); + } else { + return String.format("\\u%04X\\u%04X", + (int) Character.highSurrogate(codePoint), (int) lowSurrogate(codePoint)); + } + } + + private static boolean isReservedCodePoint(int codePoint) { + return codePoint >= 0xD800 && codePoint <= 0xDFFF; + } +} diff --git a/src/test/java/org/simdjson/testutils/TestUtils.java b/src/test/java/org/simdjson/testutils/TestUtils.java new file mode 100644 index 0000000..aba84c2 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/TestUtils.java @@ -0,0 +1,40 @@ +package org.simdjson.testutils; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public class TestUtils { + + public static byte[] toUtf8PaddedWithSpaces(String str) { + byte[] strBytes = toUtf8(str); + byte[] padded = new byte[strBytes.length + 64]; + Arrays.fill(padded, (byte) ' '); + System.arraycopy(strBytes, 0, padded, 0, strBytes.length); + return padded; + } + + public static byte[] toUtf8(String str) { + return str.getBytes(UTF_8); + } + + public static byte[] loadTestFile(String name) throws IOException { + try (InputStream is = TestUtils.class.getResourceAsStream(name)) { + return is.readAllBytes(); + } + } + + public static String toHexString(byte[] array) { + var sb = new StringBuilder("["); + for (int i = 0; i < array.length; i++) { + sb.append(String.format("%02X", array[i])); + if (i < array.length - 1) { + sb.append(" "); + } + } + sb.append("]"); + return sb.toString(); + } +} diff --git a/src/test/java/org/simdjson/testutils/Utf8TestData.java b/src/test/java/org/simdjson/testutils/Utf8TestData.java new file mode 100644 index 0000000..5d9d348 --- /dev/null +++ b/src/test/java/org/simdjson/testutils/Utf8TestData.java @@ -0,0 +1,62 @@ +package org.simdjson.testutils; + +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.RandomUtils; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +public class Utf8TestData { + + /** + * Generates UTF-8 sequences from the provided range. Each sequence is of the given length. + * Note that when the length is greater than necessary for a given code point, this function + * produces sequences that are invalid UTF-8. This is a useful property when one wants to + * generate overlong encodings for testing purposes. + */ + public static List utf8Sequences(int from, int to, int length) { + List result = new ArrayList<>(); + for (int i = from; i <= to; i++) { + byte[] bytes = new byte[length]; + int current = i; + // continuation bytes + for (int byteIdx = length - 1; byteIdx >= 1; byteIdx--) { + bytes[byteIdx] = (byte) (0b1000_0000 | (current & 0b0011_1111)); + current = current >>> 6; + } + // leading byte + bytes[0] = (byte) ((0x80000000 >> (24 + length - 1)) | (current & 0b0011_111)); + result.add(bytes); + } + return result; + } + + public static byte[] randomUtf8ByteArray() { + return randomUtf8ByteArray(1, 1000); + } + + public static byte[] randomUtf8ByteArrayIncluding(byte... sequence) { + byte[] prefix = randomUtf8ByteArray(0, 500); + byte[] suffix = randomUtf8ByteArray(0, 500); + byte[] result = new byte[prefix.length + sequence.length + suffix.length]; + System.arraycopy(prefix, 0, result, 0, prefix.length); + System.arraycopy(sequence, 0, result, prefix.length, sequence.length); + System.arraycopy(suffix, 0, result, prefix.length + sequence.length, suffix.length); + return result; + } + + public static byte[] randomUtf8ByteArrayEndedWith(byte... sequence) { + byte[] array = randomUtf8ByteArray(0, 1000); + byte[] result = new byte[array.length + sequence.length]; + System.arraycopy(array, 0, result, 0, array.length); + System.arraycopy(sequence, 0, result, array.length, sequence.length); + return result; + } + + private static byte[] randomUtf8ByteArray(int minChars, int maxChars) { + int stringLen = RandomUtils.nextInt(minChars, maxChars + 1); + var string = RandomStringUtils.random(stringLen); + return string.getBytes(StandardCharsets.UTF_8); + } +}