8000 Number parsing (#14) · huyu-tom/simdjson-java@cd94004 · GitHub
[go: up one dir, main page]

Skip to content

Commit cd94004

Browse files
authored
Number parsing (simdjson#14)
1 parent 784b720 commit cd94004

14 files changed

+2045
-182
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
.gradle
33
build
44
profilers
5+
testdata

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ This implementation is still missing several features available in simdsjon. For
88

99
* Support for Unicode characters
1010
* UTF-8 validation
11-
* Full support for parsing floats
1211
* Support for 512-bit vectors
1312

1413
## Code Sample

build.gradle

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import me.champeau.jmh.JmhBytecodeGeneratorTask
22
import org.gradle.internal.os.OperatingSystem
3+
import org.ajoberstar.grgit.Grgit
34

45
plugins {
56
id 'java'
67
id 'scala'
78
id 'me.champeau.jmh' version '0.7.1'
9+
id 'org.ajoberstar.grgit' version '5.2.0'
810
}
911

1012
group = 'org.simdjson'
@@ -37,7 +39,22 @@ dependencies {
3739
testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion
3840
}
3941

42+
tasks.register('downloadTestData') {
43+
doFirst {
44+
def testDataDir = new File("${project.projectDir.getAbsolutePath()}/testdata")
45+
if (!testDataDir.exists()) {
46+
testDataDir.mkdir()
47+
}
48+
def numbersTestDataDir = new File("${testDataDir}/parse-number-fxx-test-data")
49+
if (!numbersTestDataDir.exists()) {
50+
def grgit = Grgit.clone(dir: numbersTestDataDir, uri: 'https://github.com/nigeltao/parse-number-fxx-test-data.git')
51+
grgit.close()
52+
}
53+
}
54+
}
55+
4056
test {
57+
dependsOn downloadTestData
4158
useJUnitPlatform()
4259
jvmArgs += [
4360
'--add-modules', 'jdk.incubator.vector',
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package org.simdjson;
2+
3+
import org.openjdk.jmh.annotations.Benchmark;
4+
import org.openjdk.jmh.annotations.BenchmarkMode;
5+
import org.openjdk.jmh.annotations.Level;
6+
import org.openjdk.jmh.annotations.Mode;
7+
import org.openjdk.jmh.annotations.OutputTimeUnit;
8+
import org.openjdk.jmh.annotations.Param;
9+
import org.openjdk.jmh.annotations.Scope;
10+
import org.openjdk.jmh.annotations.Setup;
11+
import org.openjdk.jmh.annotations.State;
12+
13+
import java.io.IOException;
14+
import java.util.concurrent.TimeUnit;
15+
16+
import static org.simdjson.SimdJsonPaddingUtil.padWithSpaces;
17+
18+
@State(Scope.Benchmark)
19+
@BenchmarkMode(Mode.Throughput)
20+
@OutputTimeUnit(TimeUnit.SECONDS)
21+
public class NumberParserBenchmark {
22+
23+
private final Tape tape = new Tape(100);
24+
private final NumberParser numberParser = new NumberParser(tape);
25+
26+
@Param({
27+
"2.2250738585072013e-308", // fast path
28+
"1.00000000000000188558920870223463870174566020691753515394643550663070558368373221972569761144603605635692374830246134201063722058e-309" // slow path
29+
})
30+
String number;
31+
byte[] numberUtf8Bytes;
32+
33+
@Setup(Level.Trial)
34+
public void setup() throws IOException {
35+
numberUtf8Bytes = padWithSpaces(number);
36+
}
37+
38+
@Benchmark
39+
public double baseline() {
40+
return Double.parseDouble(number);
41+
}
42+
43+
@Benchmark
44+
public double simdjson() {
45+
tape.reset();
46+
numberParser.parseNumber(numberUtf8Bytes, 0);
47+
return tape.getDouble(0);
48+
}
49+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
11
package org.simdjson;
22

3+
import java.util.Arrays;
4+
5+
import static java.nio.charset.StandardCharsets.UTF_8;
6+
37
class SimdJsonPaddingUtil {
48

59
static byte[] padded(byte[] src) {
610
byte[] bufferPadded = new byte[src.length + 64];
711
System.arraycopy(src, 0, bufferPadded, 0, src.length);
812
return bufferPadded;
913
}
14+
15+
static byte[] padWithSpaces(String str) {
16+
byte[] strBytes = str.getBytes(UTF_8);
17+
byte[] padded = new byte[strBytes.length + 64];
18+
Arrays.fill(padded, (byte) ' ');
19+
System.arraycopy(strBytes, 0, padded, 0, strBytes.length);
20+
return padded;
21+
}
1022
}

src/main/java/org/simdjson/JsonValue.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ public String toString() {
112112
case INT64 -> {
113113
return String.valueOf(asLong());
114114
}
115+
case DOUBLE -> {
116+
return String.valueOf(asDouble());
117+
}
115118
case TRUE_VALUE, FALSE_VALUE -> {
116119
return String.valueOf(asBoolean());
117120
}

0 commit comments

Comments
 (0)
0