10000 sse2 stable impl · ijl/orjson@d18f26c · GitHub
[go: up one dir, main page]

Skip to content

Commit d18f26c

Browse files
committed
sse2 stable impl
1 parent bd9dca5 commit d18f26c

File tree

4 files changed

+162
-9
lines changed

4 files changed

+162
-9
lines changed

src/serialize/writer/json.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ where
604604
unsafe {
605605
reserve_str!(writer, value);
606606

607-
let written = format_escaped_str_impl_generic_128(
607+
let written = format_escaped_str_impl_sse2_128(
608608
writer.as_mut_buffer_ptr(),
609609
value.as_bytes().as_ptr(),
610610
value.len(),
@@ -631,7 +631,7 @@ where
631631
);
632632
writer.set_written(written);
633633
} else {
634-
let written = format_escaped_str_impl_generic_128(
634+
let written = format_escaped_str_impl_sse2_128(
635635
writer.as_mut_buffer_ptr(),
636636
value.as_bytes().as_ptr(),
637637
value.len(),
@@ -641,7 +641,7 @@ where
641641
}
642642
}
643643

644-
#[cfg(not(feature = "unstable-simd"))]
644+
#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))]
645645
#[inline(always)]
646646
fn format_escaped_str<W>(writer: &mut W, value: &str)
647647
where
@@ -659,6 +659,25 @@ where
659659
}
660660
}
661661

662+
#[cfg(all(not(feature = "unstable-simd"), target_arch = "x86_64"))]
663+
#[inline(always)]
664+
fn format_escaped_str<W>(writer: &mut W, value: &str)
665+
where
666+
W: ?Sized + io::Write + WriteExt,
667+
{
668+
unsafe {
669+
reserve_str!(writer, value);
670+
671+
let written = format_escaped_str_impl_sse2_128(
672+
writer.as_mut_buffer_ptr(),
673+
value.as_bytes().as_ptr(),
674+
value.len(),
675+
);
676+
677+
writer.set_written(written);
678+
}
679+
}
680+
662681
#[inline]
663682
pub fn to_writer<W, T>(writer: W, value: &T) -> Result<()>
664683
where

src/serialize/writer/str/mod.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,25 @@ mod escape;
55
#[macro_use]
66
mod scalar;
77

8+
#[cfg(target_arch = "x86_64")]
9+
mod sse2;
10+
811
#[cfg(all(feature = "unstable-simd", target_arch = "x86_64", feature = "avx512"))]
912
mod avx512;
1013

1114
#[cfg(feature = "unstable-simd")]
1215
mod generic;
1316

14-
#[cfg(all(feature = "unstable-simd", target_arch = "x86_64", feature = "avx512"))]
15-
pub use avx512::format_escaped_str_impl_512vl;
17+
#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))]
18+
pub use scalar::format_escaped_str_scalar;
1619

1720
#[allow(unused_imports)]
1821
#[cfg(feature = "unstable-simd")]
1922
pub use generic::format_escaped_str_impl_generic_128;
2023

21-
#[cfg(not(feature = "unstable-simd"))]
22-
pub use scalar::format_escaped_str_scalar;
24+
#[cfg(all(feature = "unstable-simd", target_arch = "x86_64", feature = "avx512"))]
25+
pub use avx512::format_escaped_str_impl_512vl;
26+
27+
#[allow(unused_imports)]
28+
#[cfg(target_arch = "x86_64")]
29+
pub use sse2::format_escaped_str_impl_sse2_128;

src/serialize/writer/str/scalar.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
22

3-
#[cfg(not(feature = "unstable-simd"))]
3+
#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))]
44
use super::escape::{NEED_ESCAPED, QUOTE_TAB};
55

66
macro_rules! impl_format_scalar {
@@ -20,7 +20,7 @@ macro_rules! impl_format_scalar {
2020
};
2121
}
2222

23-
#[cfg(not(feature = "unstable-simd"))]
23+
#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))]
2424
pub unsafe fn format_escaped_str_scalar(
2525
odst: *mut u8,
2626
value_ptr: *const u8,

src/serialize/writer/str/sse2.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
use super::escape::{NEED_ESCAPED, QUOTE_TAB};
4+
5+
use core::mem::transmute;
6+
7+
use core::arch::x86_64::{
8+
__m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8,
9+
_mm_setzero_si128, _mm_storeu_si128, _mm_subs_epu8,
10+
};
11+
12+
macro_rules! splat_mm128 {
13+
($val:expr) => {
14+
_mm_set1_epi8(transmute::<u8, i8>($val))
15+
};
16+
}
17+
18+
macro_rules! impl_format_simd_sse2_128 {
19+
($dst:expr, $src:expr, $value_len:expr) => {
20+
let last_stride_src = $src.add($value_len).sub(STRIDE);
21+
let mut nb: usize = $value_len;
22+
23+
assume!($value_len >= STRIDE);
24+
25+
let blash = splat_mm128!(b'\\');
26+
let quote = splat_mm128!(b'"');
27+
let x20 = splat_mm128!(31);
28+
let v0 = _mm_setzero_si128();
29+
30+
unsafe {
31+
while nb >= STRIDE {
32+
let str_vec = _mm_loadu_si128($src as *const __m128i);
33+
34+
let mask = _mm_movemask_epi8(_mm_or_si128(
35+
_mm_or_si128(
36+
_mm_cmpeq_epi8(str_vec, blash),
37+
_mm_cmpeq_epi8(str_vec, quote),
38+
),
39+
_mm_cmpeq_epi8(_mm_subs_epu8(str_vec, x20), v0),
40+
)) as u32;
41+
42+
_mm_storeu_si128($dst as *mut __m128i, str_vec);
43+
44+
if unlikely!(mask > 0) {
45+
let cn = trailing_zeros!(mask) as usize;
46+
nb -= cn;
47+
$dst = $dst.add(cn);
48+
$src = $src.add(cn);
49+
nb -= 1;
50+
let escape = QUOTE_TAB[*($s 10000 rc) as usize];
51+
write_escape!(escape, $dst);
52+
$dst = $dst.add(escape.1 as usize);
53+
$src = $src.add(1);
54+
} else {
55+
nb -= STRIDE;
56+
$dst = $dst.add(STRIDE);
57+
$src = $src.add(STRIDE);
58+
}
59+
}
60+
61+
if nb > 0 {
62+
let mut scratch: [u8; 32] = [b'a'; 32];
63+
let mut str_vec = _mm_loadu_si128(last_stride_src as *const __m128i);
64+
_mm_storeu_si128(scratch.as_mut_ptr() as *mut __m128i, str_vec);
65+
66+
let mut scratch_ptr = scratch.as_mut_ptr().add(16 - nb);
67+
str_vec = _mm_loadu_si128(scratch_ptr as *const __m128i);
68+
69+
let mut mask = _mm_movemask_epi8(_mm_or_si128(
70+
_mm_or_si128(
71+
_mm_cmpeq_epi8(str_vec, blash),
72+
_mm_cmpeq_epi8(str_vec, quote),
73+
),
74+
_mm_cmpeq_epi8(_mm_subs_epu8(str_vec, x20), v0),
75+
)) as u32;
76+
77+
while nb > 0 {
78+
_mm_storeu_si128($dst as *mut __m128i, str_vec);
79+
80+
if unlikely!(mask > 0) {
81+
let cn = trailing_zeros!(mask) as usize;
82+
nb -= cn;
83+
$dst = $dst.add(cn);
84+
scratch_ptr = scratch_ptr.add(cn);
85+
nb -= 1;
86+
mask >>= cn + 1;
87+
let escape = QUOTE_TAB[*(scratch_ptr) as usize];
88+
write_escape!(escape, $dst);
89+
$dst = $dst.add(escape.1 as usize);
90+
scratch_ptr = scratch_ptr.add(1);
91+
str_vec = _mm_loadu_si128(scratch_ptr as *const __m128i);
92+
} else {
93+
$dst = $dst.add(nb);
94+
break;
95+
}
96+
}
97+
}
98+
}
99+
};
100+
}
101+
102+
#[allow(dead_code)]
103+
#[inline(never)]
104+
pub unsafe fn format_escaped_str_impl_sse2_128(
105+
odst: *mut u8,
106+
value_ptr: *const u8,
107+
value_len: usize,
108+
) -> usize {
109+
const STRIDE: usize = 16;
110+
111+
let mut dst = odst;
112+
let mut src = value_ptr;
113+
114+
core::ptr::write(dst, b'"');
115+
dst = dst.add(1);
116+
117+
if value_len < STRIDE {
118+
impl_format_scalar!(dst, src, value_len)
119+
} else {
120+
impl_format_simd_sse2_128!(dst, src, value_len);
121+
}
122+
123+
core::ptr::write(dst, b'"');
124+
dst = dst.add(1);
125+
126+
dst as usize - odst as usize
127+
}

0 commit comments

Comments
 (0)
0