8000 [JRuby] Optimize `scan()`: Remove duplicate `if (restLen() < patterns… · ruby/strscan@843e931 · GitHub
[go: up one dir, main page]

Skip to content

Commit 843e931

Browse files
authored
[JRuby] Optimize scan(): Remove duplicate if (restLen() < patternsize()) return context.nil; checks in !headonly. (#110)
- before: #109 ## Why? https://github.com/ruby/strscan/blob/d31274f41b7c1e28f23d58cf7bfea03baa818cb7/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java#L371-L373 This means the following : `if (str.size() - curr < pattern.size()) return context.nil;` A similar check is made within `StringSupport#index()` within `!headonly`. https://github.com/jruby/jruby/blob/be7815ec02356a58891c8727bb448f0c6a826d96/core/src/main/java/org/jruby/util/StringSupport.java#L1706-L1720 ```Java public static int index(ByteList source, ByteList other, int offset, Encoding enc) { int sourceLen = source.realSize(); int sourceBegin = source.begin(); int otherLen = other.realSize(); if (otherLen == 0) return offset; if (sourceLen - offset < otherLen) return -1; ``` - source = `strBL` - other = `patternBL` - offset = `strBeg + curr` This means the following : `if (strBL.realSize() - (strBeg + curr) < patternBL.realSize()) return -1;` Both checks are the same. ## Benchmark It shows String as a pattern is 2.40x faster than Regexp as a pattern. ``` $ benchmark-driver benchmark/check_until.yaml Warming up -------------------------------------- regexp 7.613M i/s - 7.593M times in 0.997350s (131.35ns/i) regexp_var 7.793M i/s - 7.772M times in 0.997364s (128.32ns/i) string 13.222M i/s - 13.199M times in 0.998297s (75.63ns/i) string_var 15.283M i/s - 15.216M times in 0.995667s (65.43ns/i) Calculating ------------------------------------- regexp 10.003M i/s - 22.840M times in 2.283361s (99.97ns/i) regexp_var 9.991M i/s - 23.378M times in 2.340019s (100.09ns/i) string 23.454M i/s - 39.666M times in 1.691221s (42.64ns/i) string_var 23.998M i/s - 45.848M times in 1.910447s (41.67ns/i) Comparison: string_var: 23998466.3 i/s string: 23453777.5 i/s - 1.02x slower regexp: 10002809.4 i/s - 2.40x slower regexp_var: 9990580.1 i/s - 2.40x slower ```
1 parent e73a154 commit 843e931

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
265265
check(context);
266266
clearMatched();
267267

268-
if (restLen() < 0) {
268+
int restLen = restLen();
269+
if (restLen < 0) {
269270
return context.nil;
270271
}
271272

@@ -275,7 +276,7 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
275276
if (regex instanceof RubyRegexp) {
276277
pattern = ((RubyRegexp) regex).preparePattern(str);
277278

278-
int range = currPtr + restLen();
279+
int range = currPtr + restLen;
279280

280281
Matcher matcher = pattern.matcher(strBL.getUnsafeBytes(), matchTarget(), range);
281282
final int ret;
@@ -298,17 +299,14 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp
298299
if (ret < 0) return context.nil;
299300
} else {
300301
RubyString pattern = regex.convertToString();
301-
302302
Encoding patternEnc = str.checkEncoding(pattern);
303-
304-
if (restLen() < pattern.size()) {
305-
return context.nil;
306-
}
307-
308303
ByteList patternBL = pattern.getByteList();
309304
int patternSize = patternBL.realSize();
310305

311306
if (headonly) {
307+
if (restLen < pattern.size()) {
308+
return context.nil;
309+
}
312310
if (ByteList.memcmp(strBL.unsafeBytes(), currPtr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) {
313311
return context.nil;
314312
}

test/strscan/test_stringscanner.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,9 @@ def test_scan_string
325325
s = create_string_scanner(str, false)
326326
matched = s.scan('str')
327327
assert_equal 'str', matched
328+
329+
s = create_string_scanner("str")
330+
assert_equal nil, s.scan("str\0\0")
328331
end
329332

330333
def test_skip
@@ -710,6 +713,9 @@ def test_scan_until_string
710713
assert_equal(nil, s.skip_until("Qux"))
711714
assert_equal("\u0000Baz", s.scan_until("Baz"))
712715
assert_equal(11, s.pos)
716+
717+
s = create_string_scanner("str")
718+
assert_equal nil, s.scan_until("str\0\0")
713719
end
714720

715721
def test_skip_until

0 commit comments

Comments
 (0)
0