8000 pack.c: add an offset argument to unpack and unpack1 · ruby/ruby@e5319dc · GitHub
[go: up one dir, main page]

Skip to content

Commit e5319dc

Browse files
committed
pack.c: add an offset argument to unpack and unpack1
[Feature #18254] This is useful to avoid repeteadly copying strings when parsing binary formats
1 parent 717ab0b commit e5319dc

File tree

5 files changed

+103
-13
lines changed

5 files changed

+103
-13
lines changed

pack.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,7 @@ hex2num(char c)
944944
#define UNPACK_1 2
945945

946946
static VALUE
947-
pack_unpack_internal(VALUE str, VALUE fmt, int mode)
947+
pack_unpack_internal(VALUE str, VALUE fmt, int mode, long offset)
948948
{
949949
#define hexdigits ruby_hexdigits
950950
char *s, *send;
@@ -973,8 +973,15 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
973973

974974
StringValue(str);
975975
StringValue(fmt);
976+
977+
if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
978+
len = RSTRING_LEN(str);
979+
if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
980+
976981
s = RSTRING_PTR(str);
977-
send = s + RSTRING_LEN(str);
982+
send = s + len;
983+
s += offset;
984+
978985
p = RSTRING_PTR(fmt);
979986
pend = p + RSTRING_LEN(fmt);
980987

@@ -1614,16 +1621,16 @@ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
16141621
}
16151622

16161623
static VALUE
1617-
pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1624+
pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
16181625
{
16191626
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1620-
return pack_unpack_internal(str, fmt, mode);
1627+
return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
16211628
}
16221629

16231630
static VALUE
1624-
pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt)
1631+
pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
16251632
{
1626-
return pack_unpack_internal(str, fmt, UNPACK_1);
1633+
return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
16271634
}
16281635

16291636
int

pack.rb

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,11 @@ def pack(fmt, buffer: nil)
148148
class String
149149
# call-seq:
150150
# str.unpack(format) -> anArray
151+
# str.unpack(format, offset: anInteger) -> anArray
151152
#
152153
# Decodes <i>str</i> (which may contain binary data) according to the
153-
# format string, returning an array of each value extracted. The
154-
# format string consists of a sequence of single-character directives,
154+
# format string, returning an array of each value extracted.
155+
# The format string consists of a sequence of single-character directives,
155156
# summarized in the table at the end of this entry.
156157
# Each directive may be followed
157158
# by a number, indicating the number of times to repeat with this
@@ -161,7 +162,15 @@ class String
161162
# exclamation mark (``<code>!</code>'') to use the underlying
162163
# platform's native size for the specified type; otherwise, it uses a
163164
# platform-independent consistent size. Spaces are ignored in the
164-
# format string. See also String#unpack1, Array#pack.
165+
# format string.
166+
#
167+
# The keyword <i>offset</i> can be given to start the decoding after skipping
168+
# the specified amount of bytes:
169+
# "abc".unpack("C*") # => [97, 98, 99]
170+
# "abc".unpack("C*", offset: 2) # => [99]
171+
# "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError)
172+
#
173+
# See also String#unpack1, Array#pack.
165174
#
166175
# "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
167176
# "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
@@ -263,15 +272,23 @@ class String
263272
# * J, J! j, and j! are available since Ruby 2.3.
264273
# * Q_, Q!, q_, and q! are available since Ruby 2.1.
265274
# * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
266-
def unpack(fmt)
267-
Primitive.pack_unpack(fmt)
275+
def unpack(fmt, offset: 0)
276+
Primitive.pack_unpack(fmt, offset)
268277
end
269278

270279
# call-seq:
271280
# str.unpack1(format) -> obj
281+
# str.unpack1(format, offset: anInteger) -> obj
272282
#
273283
# Decodes <i>str</i> (which may contain binary data) according to the
274284
# format string, returning the first value extracted.
285+
#
286+
# The keyword <i>offset</i> can be given to start the decoding after skipping
287+
# the specified amount of bytes:
288+
# "abc".unpack1("C*") # => 97
289+
# "abc".unpack1("C*", offset: 2) # => 99
290+
# "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError)
291+
#
275292
# See also String#unpack, Array#pack.
276293
#
277294
# Contrast with String#unpack:
@@ -287,7 +304,7 @@ def unpack(fmt)
287304
#
288305
# Thus unpack1 is convenient, makes clear the intention and signals
289306
# the expected return value to those reading the code.
290-
def unpack1(fmt)
291-
Primitive.pack_unpack1(fmt)
307+
def unpack1(fmt, offset: 0)
308+
Primitive.pack_unpack1(fmt, offset)
292309
end
293310
end

spec/ruby/core/string/unpack/shared/basic.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
it "raises a TypeError when passed an Integer" do
1717
-> { "abc".unpack(1) }.should raise_error(TypeError)
1818
end
19+
20+
ruby_version_is "3.1" do
21+
it "starts unpacking from the given offset" do
22+
"abc".unpack("CC", offset: 1).should == [98, 99]
23+
end
24+
end
1925
end
2026

2127
describe :string_unpack_no_platform, shared: true do
@@ -26,4 +32,18 @@
2632
it "raises an ArgumentError when the format modifier is '!'" do
2733
-> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError)
2834
end
35+
36+
ruby_version_is "3.1" do
37+
it "raises an ArgumentError when the offset is negative" do
38+
-> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError)
39+
end
40+
41+
it "returns nil if the offset is at the end of the string" do
42+
"a".unpack("C", offset: 1).should == [nil]
43+
end
44+
45+
it "raises an ArgumentError when the offset is larget than the string" do
46+
-> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError)
47+
end
48+
end
2949
end

spec/ruby/core/string/unpack1_spec.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,24 @@
77
"aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga"
88
"A".unpack1("B*").should == "01000001"
99
end
10+
11+
ruby_version_is "3.1" do
12+
it "starts unpacking from the given offset" do
13+
"ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0]
14+
"ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga"
15+
"ZA".unpack1("B*", offset: 1).should == "01000001"
16+
end
17+
18+
it "raises an ArgumentError when the offset is negative" do
19+
-> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError)
20+
end
21+
22+
it "returns nil if the offset is at the end of the string" do
23+
"a".unpack1("C", offset: 1).should == nil
24+
end
25+
26+
it "raises an ArgumentError when the offset is larget than the string" do
27+
-> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError)
28+
end
29+
end
1030
end

test/ruby/test_pack.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,4 +869,30 @@ def test_unpack1
869869
assert_equal "hogefuga", "aG9nZWZ1Z2E=".unpack1("m")
870870
assert_equal "01000001", "A".unpack1("B*")
871871
end
872+
873+
def test_unpack1_offset
874+
assert_equal 65, "ZA".unpack1("C", offset: 1)
875+
assert_equal "01000001", "YZA".unpack1("B*", offset: 2)
876+
assert_nil "abc".unpack1("C", offset: 3)
877+
assert_raise_with_message(ArgumentError, /offset can't be negative/) {
878+
"a".unpack1("C", offset: -1)
879+
}
880+
assert_raise_with_message(ArgumentError, /offset outside of string/) {
881+
"a".unpack1("C", offset: 2)
882+
}
883+
assert_nil "a".unpack1("C", offset: 1)
884+
end
885+
886+
def test_unpack_offset
887+
assert_equal [65], "ZA".unpack("C", offset: 1)
888+
assert_equal ["01000001"], "YZA".unpack("B*", offset: 2)
889+
assert_equal [nil, nil, nil], "abc".unpack("CCC", offset: 3)
890+
assert_raise_with_message(ArgumentError, /offset can't be negative/) {
891+
"a".unpack("C", offset: -1)
892+
}
893+
assert_raise_with_message(ArgumentError, /offset outside of string/) {
894+
"a".unpack("C", offset: 2)
895+
}
896+
assert_equal [nil], "a".unpack("C", offset: 1)
897+
end
872898
end

0 commit comments

Comments
 (0)
0