10000 Use bstr for wtf8 pattern methods · RustPython/RustPython@e27db2e · GitHub
[go: up one dir, main page]

Skip to content

Commit e27db2e

Browse files
committed
Use bstr for wtf8 pattern methods
1 parent d175479 commit e27db2e

File tree

3 files changed

+22
-34
lines changed

3 files changed

+22
-34
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ rustpython-format = { workspace = true }
1616

1717
ascii = { workspace = true }
1818
bitflags = { workspace = true }
19+
bstr = { workspace = true }
1920
cfg-if = { workspace = true }
2021
itertools = { workspace = true }
2122
libc = { workspace = true }

common/src/wtf8/mod.rs

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ use std::collections::TryReserveError;
4949
use std::string::String;
5050
use std::vec::Vec;
5151

52+
use bstr::ByteSlice;
53+
5254
mod core_char;
5355
mod core_str;
5456

@@ -864,49 +866,27 @@ impl Wtf8 {
864866
}
865867

866868
pub fn split(&self, pat: &Wtf8) -> impl Iterator<Item = &Self> {
867-
self.splitn(usize::MAX, pat)
869+
self.as_bytes()
870+
.split_str(pat)
871+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
868872
}
869873

870874
pub fn splitn(&self, n: usize, pat: &Wtf8) -> impl Iterator<Item = &Self> {
871-
let (haystack, needle) = (self.as_bytes(), pat.as_bytes());
872-
if n == 0 {
873-
return None.into_iter().flatten();
874-
}
875-
let mut prev_idx = Some(0);
876-
let mut iter = memchr::memmem::find_iter(haystack, needle).take(n - 1);
877-
Some(std::iter::from_fn(move || {
878-
prev_idx.map(|prev| {
879-
let idx = iter.next();
880-
let chunk = &haystack[prev..idx.unwrap_or(haystack.len())];
881-
prev_idx = idx.map(|i| i + needle.len());
882-
unsafe { Wtf8::from_bytes_unchecked(chunk) }
883-
})
884-
}))
885-
.into_iter()
886-
.flatten()
875+
self.as_bytes()
876+
.splitn_str(n, pat)
877+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
887878
}
888879

889880
pub fn rsplit(&self, pat: &Wtf8) -> impl Iterator<Item = &Self> {
890-
self.rsplitn(usize::MAX, pat)
881+
self.as_bytes()
882+
.rsplit_str(pat)
883+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
891884
}
892885

893886
pub fn rsplitn(&self, n: usize, pat: &Wtf8) -> impl Iterator<Item = &Self> {
894-
let (haystack, needle) = (self.as_bytes(), pat.as_bytes());
895-
if n == 0 {
896-
return None.into_iter().flatten();
897-
}
898-
let mut prev_idx = Some(haystack.len());
899-
let mut iter = memchr::memmem::rfind_iter(haystack, needle).take(n - 1);
900-
Some(std::iter::from_fn(move || {
901-
prev_idx.map(|prev| {
902-
let idx = iter.next();
903-
let chunk = &haystack[idx.map_or(0, |i| i + needle.len())..prev];
904-
prev_idx = idx;
905-
unsafe { Wtf8::from_bytes_unchecked(chunk) }
906-
})
907-
}))
908-
.into_iter()
909-
.flatten()
887+
self.as_bytes()
888+
.rsplitn_str(n, pat)
889+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
910890
}
911891

912892
pub fn trim_start_matches(&self, f: impl Fn(CodePoint) -> bool) -> &Self {
@@ -980,6 +960,12 @@ impl AsRef<Wtf8> for str {
980960
}
981961
}
982962

963+
impl AsRef<[u8]> for Wtf8 {
964+
fn as_ref(&self) -> &[u8] {
965+
self.as_bytes()
966+
}
967+
}
968+
983969
/// Returns a slice of the given string for the byte range \[`begin`..`end`).
984970
< 3DE3 /span>///
985971
/// # Panics

0 commit comments

Comments
 (0)
0