8000 first version · simdjson/simdjson-java@ebcb106 · GitHub
[go: up one dir, main page]

Skip to content

Commit ebcb106

Browse files
committed
first version
1 parent bc88d27 commit ebcb106

33 files changed

+57696
-9
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
.idea
22
.gradle
3+
build
4+
profilers

README.md

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,67 @@
1-
# simdjson-java
1+
# simdjson-java
2+
3+
A Java version of [simdjson](https://github.com/simdjson/simdjson) - a JSON parser using SIMD instructions,
4+
based on the paper [Parsing Gigabytes of JSON per Second](https://arxiv.org/abs/1902.08318)
5+
by Geoff Langdale and Daniel Lemire.
6+
7+
This implementation is still missing several features available in simdsjon. For example:
8+
9+
* Support for Unicode characters
10+
* UTF-8 validation
11+
* Full support for parsing floats
12+
* Support for 512-bit vectors
13+
14+
## Code Sample
15+
16+
```java
17+
byte[] json = loadTwitterJson();
18+
19+
SimdJsonParser parser = new SimdJsonParser();
20+
JsonValue jsonValue = simdJsonParser.parse(json, json.length);
21+
Iterator<JsonValue> tweets = jsonValue.get("statuses").arrayIterator();
22+
while (tweets.hasNext()) {
23+
JsonValue tweet = tweets.next();
24+
JsonValue user = tweet.get("user");
25+
if (user.get("default_profile").asBoolean()) {
26+
System.out.println(user.get("screen_name").asString());
27+
}
28+
}
29+
```
30+
31+
## Benchmarks
32+
33+
To run the JMH benchmarks, execute the following command:
34+
35+
```./gradlew jmh```
36+
37+
## Tests
38+
39+
To run the tests, execute the following command:
40+
41+
```./gradlew test```
42+
43+
## Performance
44+
45+
This section presents a performance comparison of different JSON parsers available as Java libraries. The benchmark used
46+
the [twitter.json](src/jmh/resources/twitter.json) dataset, and its goal was to measure the throughput (ops/s) of parsing
47+
and finding all unique users with a default profile.
48+
49+
**Note that simdjson-java is still missing several features (mentioned in the introduction), so the following results
50+
may not reflect its real performance.**
51+
52+
Environment:
53+
* CPU: Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz
54+
* OS: Ubuntu 23.04, kernel 6.2.0-23-generic
55+
* Java: OpenJDK 64-Bit Server VM Temurin-20.0.1+9
56+
57+
Library | Version | Throughput (ops/s)
58+
---------------------------------------------------|---------|--------------------
59+
simdjson-java | - | 1450.951
60+
simdjson-java (padded) | - | 1505.227
61+
[jackson](https://github.com/FasterXML/jackson) | 2.15.2 | 504.562
62+
[fastjson2](https://github.com/alibaba/fastjson) | 2.0.35 | 590.743
63+
[jsoniter](https://github.com/json-iterator/java) | 0.9.23 | 384.664
64+
65+
To reproduce the benchmark results, execute the following command:
66+
67+
```./gradlew jmh -Pjmh.includes='.*ParseAndSelectBenchmark.*'```

build.gradle

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import me.champeau.jmh.JmhBytecodeGeneratorTask
2+
import org.gradle.internal.os.OperatingSystem
3+
14
plugins {
25
id 10000 'java'
36
id 'me.champeau.jmh' version '0.7.1'
47
}
58

69
group = 'com.github.piotrrzysko'
7-
version = '1.0-SNAPSHOT'
10+
version = '0.0.1-SNAPSHOT'
811

912
repositories {
1013
mavenCentral()
@@ -21,6 +24,10 @@ ext {
2124
}
2225

2326
dependencies {
27+
jmhImplementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.15.2'
28+
jmhImplementation group: 'com.alibaba.fastjson2', name: 'fastjson2', version: '2.0.35'
29+
jmhImplementation group: 'com.jsoniter', name: 'jsoniter', version: '0.9.23'
30+
2431
testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.24.2'
2532
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
2633
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
@@ -29,4 +36,55 @@ dependencies {
2936

3037
test {
3138
useJUnitPlatform()
39+
jvmArgs += [
40+
'--add-modules', 'jdk.incubator.vector',
41+
'-Xmx2g'
42+
]
43+
}
44+
45+
tasks.withType(JmhBytecodeGeneratorTask).configureEach {
46+
jvmArgs.set(["--add-modules=jdk.incubator.vector"])
47+
}
48+
49+
tasks.withType(JavaCompile).configureEach {
50+
options.compilerArgs.add("--add-modules=jdk.incubator.vector")
51+
}
52+
53+
compileTestJava {
54+
options.compilerArgs += [
55+
'--add-modules', 'jdk.incubator.vector'
56+
]
57+
}
58+
59+
jmh {
60+
fork = 1
61+
warmupIterations = 3
62+
iterations = 5
63+
jvmArgsPrepend = [
64+
'--add-modules=jdk.incubator.vector'
65+
]
66+
if (getBooleanProperty('jmh.profilersEnabled', false)) {
67+
if (OperatingSystem.current().isLinux()) {
68+
profilers = [
69+
'perf',
70+
'perfasm:intelSyntax=true',
71+
'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getAsyncProfilerLibPath('LD_LIBRARY_PATH')
72+
]
73+
} else if (OperatingSystem.current().isMacOsX()) {
74+
profilers = [
75+
'async:verbose=true;output=flamegraph;event=cpu;dir=./profilers/async;libPath=' + getAsyncProfilerLibPath('DYLD_LIBRARY_PATH')
76+
]
77+
}
78+
}
79+
if (project.hasProperty('jmh.includes')) {
80+
includes = [project.findProperty('jmh.includes')]
81+
}
82+
}
83+
84+
def getBooleanProperty(String name, boolean defaultValue) {
85+
Boolean.valueOf((project.findProperty(name) ?: defaultValue) as String)
86+
}
87+
88+
static def getAsyncProfilerLibPath(String envVarName) {
89+
System.getenv(envVarName) ?: System.getProperty('java.library.path')
3290
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package com.github.piotrrzysko.simdjson;
2+
3+
import com.alibaba.fastjson2.JSON;
4+
import com.alibaba.fastjson2.JSONObject;
5+
import com.fasterxml.jackson.databind.JsonNode;
6+
import com.fasterxml.jackson.databind.ObjectMapper;
7+
import com.jsoniter.JsonIterator;
8+
import com.jsoniter.any.Any;
9+
import org.openjdk.jmh.annotations.Benchmark;
10+
import org.openjdk.jmh.annotations.BenchmarkMode;
11+
import org.openjdk.jmh.annotations.Level;
12+
import org.openjdk.jmh.annotations.Mode;
13+
import org.openjdk.jmh.annotations.OutputTimeUnit;
14+
import org.openjdk.jmh.annotations.Scope;
15+
import org.openjdk.jmh.annotations.Setup;
16+
import org.openjdk.jmh.annotations.State;
17+
18+
import java.io.IOException;
19+
import java.io.InputStream;
20+
import java.util.HashSet;
21+
import java.util.Iterator;
22+
import java.util.Set;
23+
import java.util.concurrent.TimeUnit;
24+
25+
import static com.github.piotrrzysko.simdjson.SimdJsonPaddingUtil.padded;
26+
27+
@State(Scope.Benchmark)
28+
@BenchmarkMode(Mode.Throughput)
29+
@OutputTimeUnit(TimeUnit.SECONDS)
30+
public class ParseAndSelectBenchmark {
31+
32+
private final SimdJsonParser simdJsonParser = new SimdJsonParser();
33+
private final ObjectMapper objectMapper = new ObjectMapper();
34+
35+
private byte[] buffer;
36+
private byte[] bufferPadded;
37+
38+
@Setup(Level.Trial)
39+
public void setup() throws IOException {
40+
try (InputStream is = ParseBenchmark.class.getResourceAsStream("/twitter.json")) {
41+
buffer = is.readAllBytes();
42+
bufferPadded = padded(buffer);
43+
}
44+
}
45+
46+
@Benchmark
47+
public int countUniqueUsersWithDefaultProfile_jackson() throws IOException {
48+
JsonNode jacksonJsonNode = objectMapper.readTree(buffer);
49+
Set<String> defaultUsers = new HashSet<>();
50+
Iterator<JsonNode> tweets = jacksonJsonNode.get("statuses").elements();
51+
while (tweets.hasNext()) {
52+
JsonNode tweet = tweets.next();
53+
JsonNode user = tweet.get("user");
54+
if (user.get("default_profile").asBoolean()) {
55+
defaultUsers.add(user.get("screen_name").textValue());
56+
}
57+
}
58+
return defaultUsers.size();
59+
}
60+
61+
@Benchmark
62+
public int countUniqueUsersWithDefaultProfile_fastjson() {
63+
JSONObject jsonObject = (JSONObject) JSON.parse(buffer);
64+
Set<String> defaultUsers = new HashSet<>();
65+
Iterator<Object> tweets = jsonObject.getJSONArray("statuses").iterator();
66+
while (tweets.hasNext()) {
67+
JSONObject tweet = (JSONObject) tweets.next();
68+
JSONObject user = (JSONObject) tweet.get("user");
69+
if (user.getBoolean("default_profile")) {
70+
defaultUsers.add(user.getString("screen_name"));
71+
}
72+
}
73+
return defaultUsers.size();
74+
}
75+
76+
@Benchmark
77+
public int countUniqueUsersWithDefaultProfile_jsoniter() {
78+
Any json = JsonIterator.deserialize(buffer);
79+
Set<String> defaultUsers = new HashSet<>();
80+
for (Any tweet : json.get("statuses")) {
81+
Any user = tweet.get("user");
82+
if (user.get("default_profile").toBoolean()) {
83+
defaultUsers.add(user.get("screen_name").toString());
84+
}
85+
}
86+
return defaultUsers.size();
87+
}
88+
89+
@Benchmark
90+
public int countUniqueUsersWithDefaultProfile_simdjson() {
91+
JsonValue simdJsonValue = simdJsonParser.parse(buffer, buffer.length);
92+
Set<String> defaultUsers = new HashSet<>();
93+
Iterator<JsonValue> tweets = simdJsonValue.get("statuses").arrayIterator();
94+
while (tweets.hasNext()) {
95+
JsonValue tweet = tweets.next();
96+
JsonValue user = tweet.get("user");
97+
if (user.get("default_profile").asBoolean()) {
98+
defaultUsers.add(user.get("screen_name").asString());
99+
}
100+
}
101+
return defaultUsers.size();
102+
}
103+
104+
@Benchmark
105+
public int countUniqueUsersWithDefaultProfile_simdjsonPadded() {
106+
JsonValue simdJsonValue = simdJsonParser.parse(bufferPadded, buffer.length);
107+
Set<String> defaultUsers = new HashSet<>();
108+
Iterator<JsonValue> tweets = simdJsonValue.get("statuses").arrayIterator();
109+
while (tweets.hasNext()) {
110+
JsonValue tweet = tweets.next();
111+
JsonValue user = tweet.get("user");
112+
if (user.get("default_profile").asBoolean()) {
113+
defaultUsers.add(user.get("screen_name").asString());
114+
}
115+
}
116+
return defaultUsers.size();
117+
}
118+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package com.github.piotrrzysko.simdjson;
2+
3+
import org.openjdk.jmh.annotations.Benchmark;
4+
import org.openjdk.jmh.annotations.BenchmarkMode;
5+
import org.openjdk.jmh.annotations.Level;
6+
import org.openjdk.jmh.annotations.Mode;
7+
import org.openjdk.jmh.annotations.OutputTimeUnit;
8+
import org.openjdk.jmh.annotations.Param;
9+
import org.openjdk.jmh.annotations.Scope;
10+
import org.openjdk.jmh.annotations.Setup;
11+
import org.openjdk.jmh.annotations.State;
12+
13+
import java.io.IOException;
14+
import java.io.InputStream;
15+
import java.util.concurrent.TimeUnit;
16+
17+
import static com.github.piotrrzysko.simdjson.SimdJsonPaddingUtil.padded;
18+
19+
@State(Scope.Benchmark)
20+
@BenchmarkMode(Mode.Throughput)
21+
@OutputTimeUnit(TimeUnit.SECONDS)
22+
public class ParseBenchmark {
23+
24+
@Param({"/twitter.json" /*, "/gsoc-2018.json - unicode is not supported yet"*/, "/github_events.json"})
25+
String fileName;
26+
27+
private final SimdJsonParser simdJsonParser = new SimdJsonParser();
28+
29+
private byte[] buffer;
30+
private byte[] bufferPadded;
31+
32+
@Setup(Level.Trial)
33+
public void setup() throws IOException {
34+
try (InputStream is = ParseBenchmark.class.getResourceAsStream(fileName)) {
35+
buffer = is.readAllBytes();
36+
bufferPadded = padded(buffer);
37+
}
38+
}
39+
40+
@Benchmark
41+
public JsonValue simdjson() {
42+
return simdJsonParser.parse(buffer, buffer.length);
43+
}
44+
45+
@Benchmark
46+
public JsonValue simdjsonPadded() {
47+
return simdJsonParser.parse(bufferPadded, buffer.length);
48+
}
49+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package com.github.piotrrzysko.simdjson;
2+
3+
class SimdJsonPaddingUtil {
4+
5+
static byte[] padded(byte[] src) {
6+
byte[] bufferPadded = new byte[src.length + 64];
7+
System.arraycopy(src, 0, bufferPadded, 0, src.length);
8+
return bufferPadded;
9+
}
10+
}

0 commit comments

Comments
 (0)
0