@@ -3622,50 +3622,79 @@ chunk_i(RB_BLOCK_CALL_FUNC_ARGLIST(yielder, enumerator))
3622
3622
3623
3623
/*
3624
3624
* call-seq:
3625
- * enum. chunk { |elt | ... } -> an_enumerator
3625
+ * chunk {|array | ... } -> enumerator
3626
3626
*
3627
- * Enumerates over the items, chunking them together based on the return
3628
- * value of the block.
3627
+ * Returns an Enumerator;
3628
+ * each element in the enumerator is a 2-element array consisting of:
3629
3629
*
3630
- * Consecutive elements which return the same block value are chunked together.
3630
+ * - A value returned by the block.
3631
+ * - An array ("chunk") containing the element for which that value was returned,
3632
+ * and all following elements for which the block returned the same value:
3631
3633
*
3632
- * For example, consecutive even numbers and odd numbers can be
3633
- * chunked as follows.
3634
+ * So that:
3634
3635
*
3635
- * [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5].chunk { |n|
3636
- * n.even?
3637
- * }.each { |even, ary|
3638
- * p [even, ary]
3639
- * }
3640
- * #=> [false, [3, 1]]
3641
- * # [true, [4]]
3642
- * # [false, [1, 5, 9]]
3643
- * # [true, [2, 6]]
3644
- * # [false, [5, 3, 5]]
3636
+ * - Each block return value that is different from its predecessor
3637
+ * begins a new chunk.
3638
+ * - Each block return value that is the same as its predecessor
3639
+ * continues the same chunk.
3640
+ *
3641
+ * Example:
3642
+ *
3643
+ * e = (0..10).chunk {|i| (i / 3).floor } # => #<Enumerator: ...>
3644
+ * # The enumerator elements.
3645
+ * e.next # => [0, [0, 1, 2]]
3646
+ * e.next # => [1, [3, 4, 5]]
3647
+ * e.next # => [2, [6, 7, 8]]
3648
+ * e.next # => [3, [9, 10]]
3649
+ *
3650
+ * \Method +chunk+ is especially useful for an enumerable that is already sorted.
3651
+ * This example counts words for each initial letter in a large array of words:
3652
+ *
3653
+ * # Get sorted words from a web page.
3654
+ * url = 'https://raw.githubusercontent.com/eneko/data-repository/master/data/words.txt'
3655
+ * words = URI::open(url).readlines
3656
+ * # Make chunks, one for each letter.
3657
+ * e = words.chunk {|word| word.upcase[0] } # => #<Enumerator: ...>
3658
+ * # Display 'A' through 'F'.
3659
+ * e.each {|c, words| p [c, words.length]; break if c == 'F' }
3660
+ *
3661
+ * Output:
3662
+ *
3663
+ * ["A", 17096]
3664
+ * ["B", 11070]
3665
+ * ["C", 19901]
3666
+ * ["D", 10896]
3667
+ * ["E", 8736]
3668
+ * ["F", 6860]
3669
+ *
3670
+ * You can use the special symbol <tt>:_alone</tt> to force an element
3671
+ * into its own separate chuck:
3645
3672
*
3646
- * This method is especially useful for sorted series of elements.
3647
- * The following example counts words for each initial letter.
3673
+ * a = [0, 0, 1, 1]
3674
+ * e = a.chunk{|i| i.even? ? :_alone : true }
3675
+ * e.to_a # => [[:_alone, [0]], [:_alone, [0]], [true, [1, 1]]]
3648
3676
*
3649
- * open("/usr/share/dict/words", "r:iso-8859-1") { |f|
3650
- * f.chunk { |line| line.upcase.ord }.each { |ch, lines| p [ch.chr, lines.length] }
3677
+ * For example, you can put each line that contains a URL into its own chunk:
3678
+ *
3679
+ * pattern = /http/
3680
+ * open(filename) { |f|
3681
+ * f.chunk { |line| line =~ pattern ? :_alone : true }.each { |key, lines|
3682
+ * pp lines
3683
+ * }
3651
3684
* }
3652
- * #=> ["\n", 1]
3653
- * # ["A", 1327]
3654
- * # ["B", 1372]
3655
- * # ["C", 1507]
3656
- * # ["D", 791]
3657
- * # ...
3658
3685
*
3659
- * The following key values have special meaning:
3660
- * - +nil+ and +:_separator+ specifies that the elements should be dropped.
3661
- * - +:_alone+ specifies that the element should be chunked by itself.
3686
+ * You can use the special symbol <tt>:_separator</tt> or +nil+
3687
+ * to force an element to be ignored (not included in any chunk):
3662
3688
*
3663
- * Any other symbols that begin with an underscore will raise an error:
3689
+ * a = [0, 0, -1, 1, 1]
3690
+ * e = a.chunk{|i| i < 0 ? :_separator : true }
3691
+ * e.to_a # => [[true, [0, 0]], [true, [1, 1]]]
3664
3692
*
3665
- * items.chunk { |item| :_underscore }
3666
- * #=> RuntimeError: symbols beginning with an underscore are reserved
3693
+ * Note that the separator does end the chunk:
3667
3694
*
3668
- * +nil+ and +:_separator+ can be used to ignore some elements.
3695
+ * a = [0, 0, -1, 1, -1, 1]
3696
+ * e = a.chunk{|i| i < 0 ? :_separator : true }
3697
+ * e.to_a # => [[true, [0, 0]], [true, [1]], [true, [1]]]
3669
3698
*
3670
3699
* For example, the sequence of hyphens in svn log can be eliminated as follows:
3671
3700
*
@@ -3695,18 +3724,6 @@ chunk_i(RB_BLOCK_CALL_FUNC_ARGLIST(yielder, enumerator))
3695
3724
* pp lines
3696
3725
* }
3697
3726
*
3698
- * +:_alone+ can be used to force items into their own chunk.
3699
- * For example, you can put lines that contain a URL by themselves,
3700
- * and chunk the rest of the lines together, like this:
3701
- *
3702
- * pattern = /http/
3703
- * open(filename) { |f|
3704
- * f.chunk { |line| line =~ pattern ? :_alone : true }.each { |key, lines|
3705
- * pp lines
3706
- * }
3707
- * }
3708
- *
3709
- * If no block is given, an enumerator to `chunk` is returned instead.
3710
3727
*/
3711
3728
static VALUE
3712
3729
enum_chunk (VALUE enumerable )
0 commit comments