10000 Fixed detection of unsigned add overflow by piotrrzysko · Pull Request #27 · simdjson/simdjson-java · GitHub
[go: up one dir, main page]

Skip to content

Fixed detection of unsigned add overflow #27

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fixed detection of unsigned add overflow
  • Loading branch information
piotrrzysko committed Sep 23, 2023
commit c33d2fad4f8503c94c3c4397161062275bd335cf
6 changes: 4 additions & 2 deletions src/main/java/org/simdjson/JsonStringScanner.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.simdjson;

import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorSpecies;

class JsonStringScanner {

Expand Down Expand Up @@ -59,7 +58,10 @@ private long findEscaped(long backslash) {
long oddSequenceStarts = backslash & ODD_BITS_MASK & ~followsEscape;

long sequencesStartingOnEvenBits = oddSequenceStarts + backslash;
prevEscaped = ((oddSequenceStarts ^ sequencesStartingOnEvenBits) & (backslash ^ sequencesStartingOnEvenBits)) >>> 63;
// Here, we check if the unsigned addition above caused an overflow. If that's the case, we store 1 in prevEscaped.
// The formula used to detect overflow was taken from 'Hacker's Delight, Second Edition' by Henry S. Warren, Jr.,
// Chapter 2-13.
prevEscaped = ((oddSequenceStarts >>> 1) + (backslash >>> 1) + ((oddSequenceStarts & backslash) & 1)) >>> 63;

long invertMask = sequencesStartingOnEvenBits << 1;
return (EVEN_BITS_MASK ^ invertMask) & followsEscape;
Expand Down
14 changes: 4 additions & 10 deletions src/test/java/org/simdjson/BenchmarkCorrectnessTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import org.junit.jupiter.params.provider.CsvSource;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.StringUtils.padWithSpaces;
import static org.simdjson.StringUtils.toUtf8;
import static org.simdjson.TestUtils.loadTestFile;
import static org.simdjson.TestUtils.padWithSpaces;
import static org.simdjson.TestUtils.toUtf8;

public class BenchmarkCorrectnessTest {

Expand All @@ -21,7 +21,7 @@ public void countUniqueTwitterUsersWithDefaultProfile() throws IOException {
// given
SimdJsonParser parser = new SimdJsonParser();
Set<String> defaultUsers = new HashSet<>();
byte[] json = loadTwitterJson();
byte[] json = loadTestFile("/twitter.json");

// when
JsonValue simdJsonValue = parser.parse(json, json.length);
Expand Down Expand Up @@ -55,10 +55,4 @@ public void numberParserTest(String input, Double expected) {
// then
assertThat(tape.getDouble(0)).isEqualTo(expected);
}

private static byte[] loadTwitterJson() throws IOException {
try (InputStream is = BenchmarkCorrectnessTest.class.getResourceAsStream("/twitter.json")) {
return is.readAllBytes();
}
}
}
2 changes: 1 addition & 1 deletion src/test/java/org/simdjson/CharactersClassifierTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.StringUtils.chunk;
import static org.simdjson.TestUtils.chunk;

public class CharactersClassifierTest {

Expand Down
4 changes: 2 additions & 2 deletions src/test/java/org/simdjson/JsonStringScannerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import org.junit.jupiter.params.provider.ValueSource;

import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.StringUtils.chunk;
import static org.simdjson.StringUtils.padWithSpaces;
import static org.simdjson.TestUtils.chunk;
import static org.simdjson.TestUtils.padWithSpaces;

public class JsonStringScannerTest {

Expand Down
2 changes: 1 addition & 1 deletion src/test/java/org/simdjson/NumberParsingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.simdjson.JsonValueAssert.assertThat;
import static org.simdjson.StringUtils.toUtf8;
import static org.simdjson.TestUtils.toUtf8;

public class NumberParsingTest {

Expand Down
2 changes: 1 addition & 1 deletion src/test/java/org/simdjson/ObjectParsingTest.java
6D47
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.simdjson.JsonValueAssert.assertThat;
import static org.simdjson.StringUtils.toUtf8;
import static org.simdjson.TestUtils.toUtf8;

public class ObjectParsingTest {

Expand Down
31 changes: 30 additions & 1 deletion src/test/java/org/simdjson/SimdJsonParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import java.io.IOException;
import java.util.Iterator;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.fail;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.simdjson.JsonValueAssert.assertThat;
import static org.simdjson.StringUtils.toUtf8;
import static org.simdjson.TestUtils.loadTestFile;
import static org.simdjson.TestUtils.toUtf8;

public class SimdJsonParserTest {

Expand Down Expand Up @@ -291,4 +293,31 @@ public void testLargeArraySize() {
assertThat(jsonValue.isArray()).isTrue();
assertThat(jsonValue.getSize()).isEqualTo(0xFFFFFF);
}

@Test
public void issue26DeepBench() throws IOException {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = loadTestFile("/deep_bench.json");

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
}

@ParameterizedTest
@ValueSource(strings = {"/wide_bench.json", "/deep_bench.json"})
public void issue26(String file) throws IOException {
// given
SimdJsonParser parser = new SimdJsonParser();
byte[] json = loadTestFile(file);

// when
JsonValue jsonValue = parser.parse(json, json.length);

// then
assertThat(jsonValue.isObject()).isTrue();
}
}
2 changes: 1 addition & 1 deletion src/test/java/org/simdjson/StringParsingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.simdjson.JsonValueAssert.assertThat;
import static org.simdjson.StringUtils.toUtf8;
import static org.simdjson.TestUtils.toUtf8;

public class StringParsingTest {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import jdk.incubator.vector.ByteVector;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

import static java.nio.charset.StandardCharsets.UTF_8;

class StringUtils {
class TestUtils {

static String padWithSpaces(String str) {
byte[] strBytes = toUtf8(str);
Expand All @@ -23,4 +25,10 @@ static ByteVector chunk(String str, int n) {
static byte[] toUtf8(String str) {
return str.getBytes(UTF_8);
}

static byte[] loadTestFile(String name) throws IOException {
try (InputStream is = TestUtils.class.getResourceAsStream(name)) {
return is.readAllBytes();
}
}
}
Loading
0