From 013a3c4bdf4cb843a127df8875d6181f9ab46b13 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Sun, 8 Apr 2012 11:21:50 +0300 Subject: [PATCH 01/17] Enumerator::Lazy changed to use direct procs chaining for #map and #select --- enumerator.c | 185 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 154 insertions(+), 31 deletions(-) diff --git a/enumerator.c b/enumerator.c index f01ddd51e6a071..e69d76ee359a3c 100644 --- a/enumerator.c +++ b/enumerator.c @@ -119,12 +119,14 @@ struct enumerator { VALUE lookahead; VALUE feedvalue; VALUE stop_exc; + VALUE procs; }; static VALUE rb_cGenerator, rb_cYielder; struct generator { VALUE proc; + VALUE obj; }; struct yielder { @@ -148,6 +150,7 @@ enumerator_mark(void *p) rb_gc_mark(ptr->lookahead); rb_gc_mark(ptr->feedvalue); rb_gc_mark(ptr->stop_exc); + rb_gc_mark(ptr->procs); } #define enumerator_free RUBY_TYPED_DEFAULT_FREE @@ -392,6 +395,7 @@ enumerator_init_copy(VALUE obj, VALUE orig) ptr1->fib = 0; ptr1->lookahead = Qundef; ptr1->feedvalue = Qundef; + ptr1->procs = ptr0->procs; return obj; } @@ -1054,6 +1058,7 @@ generator_mark(void *p) { struct generator *ptr = p; rb_gc_mark(ptr->proc); + rb_gc_mark(ptr->obj); } #define generator_free RUBY_TYPED_DEFAULT_FREE @@ -1158,6 +1163,7 @@ generator_init_copy(VALUE obj, VALUE orig) } ptr1->proc = ptr0->proc; + ptr1->obj = ptr0->obj; return obj; } @@ -1178,22 +1184,65 @@ generator_each(int argc, VALUE *argv, VALUE obj) } /* Lazy Enumerator methods */ + +struct proc_entry { + VALUE proc; + VALUE type; +}; + +enum proc_entry_type { + T_PROC_MAP = 0, + T_PROC_SELECT = 1 +}; + +static VALUE +process_element(VALUE procs_array, VALUE yielder, VALUE result, VALUE *move_next) +{ + struct proc_entry *entry; + VALUE *procs = RARRAY_PTR(procs_array); + long i = 0; + + *move_next = Qtrue; + for (i = 0; i < RARRAY_LEN(procs_array); i++) { + Data_Get_Struct(procs[i], struct proc_entry, entry); + if (RTEST(*move_next)) { + switch ((enum proc_entry_type) entry->type) { + case T_PROC_MAP: + result = rb_funcall(entry->proc, rb_intern("call"), + 1, result); + break; + case T_PROC_SELECT: + *move_next = rb_funcall(entry->proc, rb_intern("call"), + 1, result); + break; + } + } + } + + return result; +} static VALUE lazy_init_iterator(VALUE val, VALUE m, int argc, VALUE *argv) { VALUE result; + VALUE yielder = RARRAY_PTR(m)[0]; + VALUE procs = RARRAY_PTR(m)[1]; + VALUE move_next = Qtrue; + if (argc == 1) { VALUE args[2]; - args[0] = m; - args[1] = val; - result = rb_yield_values2(2, args); + args[0] = yielder; + result = args[1] = process_element(procs, yielder, val, &move_next); + if (RTEST(move_next)) { + result = rb_yield_values2(2, args); + } } else { VALUE args; int len = rb_long2int((long)argc + 1); args = rb_ary_tmp_new(len); - rb_ary_push(args, m); + rb_ary_push(args, yielder); if (argc > 0) { rb_ary_cat(args, argv, argc); } @@ -1208,30 +1257,85 @@ static VALUE lazy_init_yielder(VALUE val, VALUE m, int argc, VALUE *argv) { VALUE result; - result = rb_funcall2(m, id_yield, argc, argv); - if (result == Qundef) rb_iter_break(); + VALUE yielder = RARRAY_PTR(m)[0]; + VALUE procs = RARRAY_PTR(m)[1]; + VALUE move_next = Qtrue; + + argv[0] = process_element(procs, yielder, argv[0], &move_next); + if (RTEST(move_next)) { + result = rb_funcall2(yielder, id_yield, argc, argv); + if (result == Qundef) rb_iter_break(); + } return Qnil; } static VALUE lazy_init_block_i(VALUE val, VALUE m, int argc, VALUE *argv) { - rb_block_call(m, id_each, argc-1, argv+1, lazy_init_iterator, val); + rb_block_call(rb_ary_entry(m, 0), id_each, argc-1, argv+1, + lazy_init_iterator, rb_ary_new3(2, val, rb_ary_entry(m, 1))); return Qnil; } static VALUE lazy_init_block(VALUE val, VALUE m, int argc, VALUE *argv) { - rb_block_call(m, id_each, argc-1, argv+1, lazy_init_yielder, val); + rb_block_call(rb_ary_entry(m, 0), id_each, argc-1, argv+1, + lazy_init_yielder, rb_ary_new3(2, val, rb_ary_entry(m, 1))); return Qnil; } +static VALUE +lazy_generator_init(VALUE obj, VALUE procs) +{ + VALUE generator; + struct generator *gen_ptr; + + generator = generator_allocate(rb_cGenerator); + rb_block_call(generator, id_initialize, 0, 0, lazy_init_block, + rb_ary_new3(2, obj, procs)); + gen_ptr = generator_ptr(generator); + gen_ptr->obj = obj; + + return generator; +} + +static VALUE +create_proc_entry(enum proc_entry_type proc_type) +{ + struct proc_entry *entry; + VALUE entry_obj; + + entry_obj = Data_Make_Struct(rb_cObject, struct proc_entry, + 0, RUBY_DEFAULT_FREE, entry); + Data_Get_Struct(entry_obj, struct proc_entry, entry); + entry->proc = rb_block_proc(); + entry->type = proc_type; + + return entry_obj; +} + +static VALUE +lazy_add_proc(VALUE enum_obj, enum proc_entry_type proc_type) +{ + struct enumerator *ptr; + VALUE entry; + + entry = create_proc_entry(proc_type); + ptr = enumerator_ptr(enum_obj); + rb_ary_push(ptr->procs, entry); + + return enum_obj; +} + static VALUE lazy_initialize(int argc, VALUE *argv, VALUE self) { VALUE obj, meth; VALUE generator; + VALUE procs; + struct enumerator *ptr; + struct generator *gen_ptr; int offset; if (argc < 1) { @@ -1248,13 +1352,19 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) offset = 2; } } + procs = rb_ary_new(); + generator = generator_allocate(rb_cGenerator); rb_block_call(generator, id_initialize, 0, 0, (rb_block_given_p() ? lazy_init_block_i : lazy_init_block), - obj); + rb_ary_new3(2, obj, procs)); + gen_ptr = generator_ptr(generator); + gen_ptr->obj = obj; enumerator_init(self, generator, meth, argc - offset, argv + offset); - rb_ivar_set(self, id_receiver, obj); + ptr = enumerator_ptr(self); + ptr->procs = procs; + rb_ivar_set(self, id_receiver, obj); return self; } @@ -1311,29 +1421,50 @@ enumerable_lazy(VALUE obj) result = rb_class_new_instance(1, &obj, rb_cLazy); /* Qfalse indicates that the Enumerator::Lazy has no method name */ + rb_ivar_set(result, id_method, Qfalse); return result; } static VALUE -lazy_map_func(VALUE val, VALUE m, int argc, VALUE *argv) +lazy_copy(VALUE obj) { - VALUE result = rb_yield_values2(argc - 1, &argv[1]); + struct enumerator *e; + struct enumerator *new_e; + struct generator *g; + struct generator *new_g; + VALUE new_obj; + VALUE new_generator; + VALUE new_procs; - rb_funcall(argv[0], id_yield, 1, result); - return Qnil; + e = enumerator_ptr(obj); + new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); + new_e = enumerator_ptr(new_obj); + g = generator_ptr(e->obj); + new_procs = rb_ary_new4(RARRAY_LEN(e->procs), RARRAY_PTR(e->procs)); + + new_generator = lazy_generator_init(g->obj, new_procs); + new_g = generator_ptr(new_generator); + new_g->obj = g->obj; + new_e->procs = new_procs; + new_e->obj = new_generator; + + return new_obj; } static VALUE lazy_map(VALUE obj) { + VALUE new_enum; + if (!rb_block_given_p()) { rb_raise(rb_eArgError, "tried to call lazy map without a block"); } - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - lazy_map_func, 0), - Qnil); + new_enum = lazy_copy(obj); + lazy_add_proc(new_enum, T_PROC_MAP); + + return new_enum; } static VALUE @@ -1400,27 +1531,19 @@ lazy_flat_map(VALUE obj) Qnil); } -static VALUE -lazy_select_func(VALUE val, VALUE m, int argc, VALUE *argv) -{ - VALUE element = rb_enum_values_pack(argc - 1, argv + 1); - - if (RTEST(rb_yield(element))) { - return rb_funcall(argv[0], id_yield, 1, element); - } - return Qnil; -} - static VALUE lazy_select(VALUE obj) { + VALUE new_enum; + if (!rb_block_given_p()) { rb_raise(rb_eArgError, "tried to call lazy select without a block"); } - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - lazy_select_func, 0), - Qnil); + new_enum = lazy_copy(obj); + lazy_add_proc(new_enum, T_PROC_SELECT); + + return lazy_set_method(new_enum, Qnil); } static VALUE From 394ab2424f3326b32e187ac4c8e978f8ead8d519 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Tue, 17 Apr 2012 10:00:19 +0300 Subject: [PATCH 02/17] lazy_init_iterator cleanup --- enumerator.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/enumerator.c b/enumerator.c index e69d76ee359a3c..e89336175a2919 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1225,24 +1225,19 @@ static VALUE lazy_init_iterator(VALUE val, VALUE m, int argc, VALUE *argv) { VALUE result; - VALUE yielder = RARRAY_PTR(m)[0]; - VALUE procs = RARRAY_PTR(m)[1]; - VALUE move_next = Qtrue; if (argc == 1) { VALUE args[2]; - args[0] = yielder; - result = args[1] = process_element(procs, yielder, val, &move_next); - if (RTEST(move_next)) { - result = rb_yield_values2(2, args); - } + args[0] = m; + args[1] = val; + result = rb_yield_values2(2, args); } else { VALUE args; int len = rb_long2int((long)argc + 1); args = rb_ary_tmp_new(len); - rb_ary_push(args, yielder); + rb_ary_push(args, m); if (argc > 0) { rb_ary_cat(args, argv, argc); } @@ -1273,7 +1268,7 @@ static VALUE lazy_init_block_i(VALUE val, VALUE m, int argc, VALUE *argv) { rb_block_call(rb_ary_entry(m, 0), id_each, argc-1, argv+1, - lazy_init_iterator, rb_ary_new3(2, val, rb_ary_entry(m, 1))); + lazy_init_iterator, val); return Qnil; } From 463760c4c304af1b5bd3b4800c6e45055d7eda7c Mon Sep 17 00:00:00 2001 From: gregolsen Date: Tue, 17 Apr 2012 10:28:10 +0300 Subject: [PATCH 03/17] using generator as an object for procs chaining when unoptimized lazy method was called previously --- enumerator.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/enumerator.c b/enumerator.c index e89336175a2919..dee8571da4da8c 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1281,14 +1281,25 @@ lazy_init_block(VALUE val, VALUE m, int argc, VALUE *argv) } static VALUE -lazy_generator_init(VALUE obj, VALUE procs) +lazy_generator_init(VALUE old_generator, VALUE procs) { VALUE generator; + VALUE obj; struct generator *gen_ptr; + struct generator *old_gen_ptr; + + old_gen_ptr = generator_ptr(old_generator); + if (old_gen_ptr->obj) { + obj = old_gen_ptr->obj; + } else { + obj = old_generator; + } generator = generator_allocate(rb_cGenerator); + rb_block_call(generator, id_initialize, 0, 0, lazy_init_block, rb_ary_new3(2, obj, procs)); + gen_ptr = generator_ptr(generator); gen_ptr->obj = obj; @@ -1330,7 +1341,6 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) VALUE generator; VALUE procs; struct enumerator *ptr; - struct generator *gen_ptr; int offset; if (argc < 1) { @@ -1353,8 +1363,6 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) rb_block_call(generator, id_initialize, 0, 0, (rb_block_given_p() ? lazy_init_block_i : lazy_init_block), rb_ary_new3(2, obj, procs)); - gen_ptr = generator_ptr(generator); - gen_ptr->obj = obj; enumerator_init(self, generator, meth, argc - offset, argv + offset); ptr = enumerator_ptr(self); ptr->procs = procs; @@ -1426,8 +1434,6 @@ lazy_copy(VALUE obj) { struct enumerator *e; struct enumerator *new_e; - struct generator *g; - struct generator *new_g; VALUE new_obj; VALUE new_generator; VALUE new_procs; @@ -1435,14 +1441,11 @@ lazy_copy(VALUE obj) e = enumerator_ptr(obj); new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); new_e = enumerator_ptr(new_obj); - g = generator_ptr(e->obj); new_procs = rb_ary_new4(RARRAY_LEN(e->procs), RARRAY_PTR(e->procs)); - new_generator = lazy_generator_init(g->obj, new_procs); - new_g = generator_ptr(new_generator); - new_g->obj = g->obj; - new_e->procs = new_procs; + new_generator = lazy_generator_init(e->obj, new_procs); new_e->obj = new_generator; + new_e->procs = new_procs; return new_obj; } From dda7f1c468f13ff68cfc0832abdd960f346972ac Mon Sep 17 00:00:00 2001 From: gregolsen Date: Tue, 17 Apr 2012 10:45:12 +0300 Subject: [PATCH 04/17] move_next madness moved to process_element --- enumerator.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/enumerator.c b/enumerator.c index dee8571da4da8c..1f3d9cd473db53 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1196,29 +1196,35 @@ enum proc_entry_type { }; static VALUE -process_element(VALUE procs_array, VALUE yielder, VALUE result, VALUE *move_next) +process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) { + VALUE result = argv[0]; struct proc_entry *entry; VALUE *procs = RARRAY_PTR(procs_array); + VALUE move_next = Qtrue; long i = 0; - *move_next = Qtrue; for (i = 0; i < RARRAY_LEN(procs_array); i++) { Data_Get_Struct(procs[i], struct proc_entry, entry); - if (RTEST(*move_next)) { + if (RTEST(move_next)) { switch ((enum proc_entry_type) entry->type) { case T_PROC_MAP: result = rb_funcall(entry->proc, rb_intern("call"), 1, result); break; case T_PROC_SELECT: - *move_next = rb_funcall(entry->proc, rb_intern("call"), + move_next = rb_funcall(entry->proc, rb_intern("call"), 1, result); break; } } } + if (RTEST(move_next)) { + argv[0] = result; + rb_funcall2(yielder, id_yield, argc, argv); + if (result == Qundef) rb_iter_break(); + } return result; } static VALUE @@ -1251,16 +1257,10 @@ lazy_init_iterator(VALUE val, VALUE m, int argc, VALUE *argv) static VALUE lazy_init_yielder(VALUE val, VALUE m, int argc, VALUE *argv) { - VALUE result; VALUE yielder = RARRAY_PTR(m)[0]; VALUE procs = RARRAY_PTR(m)[1]; - VALUE move_next = Qtrue; - argv[0] = process_element(procs, yielder, argv[0], &move_next); - if (RTEST(move_next)) { - result = rb_funcall2(yielder, id_yield, argc, argv); - if (result == Qundef) rb_iter_break(); - } + process_element(procs, yielder, argc, argv); return Qnil; } From b35d33a1376c66901fff670f72f5e9bc481a469d Mon Sep 17 00:00:00 2001 From: gregolsen Date: Sat, 28 Apr 2012 11:08:42 +0300 Subject: [PATCH 05/17] lazy_take optimized --- enumerator.c | 84 ++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/enumerator.c b/enumerator.c index 1f3d9cd473db53..375d68dee730f0 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1188,11 +1188,13 @@ generator_each(int argc, VALUE *argv, VALUE obj) struct proc_entry { VALUE proc; VALUE type; + VALUE memo; }; enum proc_entry_type { T_PROC_MAP = 0, - T_PROC_SELECT = 1 + T_PROC_SELECT = 1, + T_PROC_TAKE = 2 }; static VALUE @@ -1202,6 +1204,8 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) struct proc_entry *entry; VALUE *procs = RARRAY_PTR(procs_array); VALUE move_next = Qtrue; + VALUE break_point = Qtrue; + NODE *memo; long i = 0; for (i = 0; i < RARRAY_LEN(procs_array); i++) { @@ -1216,6 +1220,15 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) move_next = rb_funcall(entry->proc, rb_intern("call"), 1, result); break; + case T_PROC_TAKE: + memo = RNODE(entry->memo); + if (memo->u3.cnt == 0) { + move_next = Qfalse; + break_point = Qundef; + } else if (--memo->u3.cnt == 0) { + break_point = Qundef; + } + break; } } } @@ -1223,8 +1236,8 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) if (RTEST(move_next)) { argv[0] = result; rb_funcall2(yielder, id_yield, argc, argv); - if (result == Qundef) rb_iter_break(); } + if (break_point == Qundef) rb_iter_break(); return result; } static VALUE @@ -1307,7 +1320,7 @@ lazy_generator_init(VALUE old_generator, VALUE procs) } static VALUE -create_proc_entry(enum proc_entry_type proc_type) +create_proc_entry(enum proc_entry_type proc_type, VALUE memo) { struct proc_entry *entry; VALUE entry_obj; @@ -1315,19 +1328,22 @@ create_proc_entry(enum proc_entry_type proc_type) entry_obj = Data_Make_Struct(rb_cObject, struct proc_entry, 0, RUBY_DEFAULT_FREE, entry); Data_Get_Struct(entry_obj, struct proc_entry, entry); - entry->proc = rb_block_proc(); + if (proc_type != T_PROC_TAKE) { + entry->proc = rb_block_proc(); + } entry->type = proc_type; + entry->memo = memo; return entry_obj; } static VALUE -lazy_add_proc(VALUE enum_obj, enum proc_entry_type proc_type) +lazy_add_proc(VALUE enum_obj, enum proc_entry_type proc_type, VALUE memo) { struct enumerator *ptr; VALUE entry; - entry = create_proc_entry(proc_type); + entry = create_proc_entry(proc_type, memo); ptr = enumerator_ptr(enum_obj); rb_ary_push(ptr->procs, entry); @@ -1430,7 +1446,7 @@ enumerable_lazy(VALUE obj) } static VALUE -lazy_copy(VALUE obj) +lazy_copy(int argc, VALUE *argv, VALUE obj) { struct enumerator *e; struct enumerator *new_e; @@ -1439,14 +1455,19 @@ lazy_copy(VALUE obj) VALUE new_procs; e = enumerator_ptr(obj); - new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); - new_e = enumerator_ptr(new_obj); new_procs = rb_ary_new4(RARRAY_LEN(e->procs), RARRAY_PTR(e->procs)); - new_generator = lazy_generator_init(e->obj, new_procs); + + new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); + new_e = enumerator_ptr(new_obj); new_e->obj = new_generator; new_e->procs = new_procs; + if (argc > 0) { + new_e->meth = rb_to_id(*argv++); + new_e->args = rb_ary_new4(argc - 1, argv); + } + return new_obj; } @@ -1459,8 +1480,8 @@ lazy_map(VALUE obj) rb_raise(rb_eArgError, "tried to call lazy map without a block"); } - new_enum = lazy_copy(obj); - lazy_add_proc(new_enum, T_PROC_MAP); + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_MAP, Qnil); return new_enum; } @@ -1538,8 +1559,8 @@ lazy_select(VALUE obj) rb_raise(rb_eArgError, "tried to call lazy select without a block"); } - new_enum = lazy_copy(obj); - lazy_add_proc(new_enum, T_PROC_SELECT); + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_SELECT, Qnil); return lazy_set_method(new_enum, Qnil); } @@ -1650,42 +1671,29 @@ lazy_zip(int argc, VALUE *argv, VALUE obj) rb_ary_new4(argc, argv)); } -static VALUE -lazy_take_func(VALUE val, VALUE args, int argc, VALUE *argv) -{ - NODE *memo = RNODE(args); - - rb_funcall2(argv[0], id_yield, argc - 1, argv + 1); - if (--memo->u3.cnt == 0) { - memo->u3.cnt = memo->u2.argc; - return Qundef; - } - else { - return Qnil; - } -} - static VALUE lazy_take(VALUE obj, VALUE n) { NODE *memo; long len = NUM2LONG(n); - int argc = 1; - VALUE argv[3]; + VALUE new_enum; + int argc = 0; + VALUE argv[2]; if (len < 0) { rb_raise(rb_eArgError, "attempt to take negative size"); } - argv[0] = obj; + if (len == 0) { - argv[1] = sym_cycle; - argv[2] = INT2NUM(0); - argc = 3; + argv[0] = sym_cycle; + argv[1] = INT2NUM(0); + argc = 2; } memo = NEW_MEMO(0, len, len); - return lazy_set_method(rb_block_call(rb_cLazy, id_new, argc, argv, - lazy_take_func, (VALUE) memo), - rb_ary_new3(1, n)); + new_enum = lazy_copy(argc, argv, obj); + lazy_add_proc(new_enum, T_PROC_TAKE, (VALUE) memo); + + return new_enum; } static VALUE From 4c70750be2d45d4e970f840b022d4da0113c948b Mon Sep 17 00:00:00 2001 From: gregolsen Date: Sat, 28 Apr 2012 11:18:17 +0300 Subject: [PATCH 06/17] lazy_drop optimized --- enumerator.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/enumerator.c b/enumerator.c index 375d68dee730f0..c9b315cd609070 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1194,7 +1194,8 @@ struct proc_entry { enum proc_entry_type { T_PROC_MAP = 0, T_PROC_SELECT = 1, - T_PROC_TAKE = 2 + T_PROC_TAKE = 2, + T_PROC_DROP = 3 }; static VALUE @@ -1229,6 +1230,12 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) break_point = Qundef; } break; + case T_PROC_DROP: + memo = RNODE(entry->memo); + if (memo->u3.cnt-- > 0) { + move_next = Qfalse; + } + break; } } } @@ -1328,7 +1335,7 @@ create_proc_entry(enum proc_entry_type proc_type, VALUE memo) entry_obj = Data_Make_Struct(rb_cObject, struct proc_entry, 0, RUBY_DEFAULT_FREE, entry); Data_Get_Struct(entry_obj, struct proc_entry, entry); - if (proc_type != T_PROC_TAKE) { + if (proc_type != T_PROC_TAKE && proc_type != T_PROC_DROP) { entry->proc = rb_block_proc(); } entry->type = proc_type; @@ -1713,33 +1720,22 @@ lazy_take_while(VALUE obj) Qnil); } -static VALUE -lazy_drop_func(VALUE val, VALUE args, int argc, VALUE *argv) -{ - NODE *memo = RNODE(args); - - if (memo->u3.cnt == 0) { - rb_funcall2(argv[0], id_yield, argc - 1, argv + 1); - } - else { - memo->u3.cnt--; - } - return Qnil; -} - static VALUE lazy_drop(VALUE obj, VALUE n) { NODE *memo; long len = NUM2LONG(n); + VALUE new_enum; if (len < 0) { rb_raise(rb_eArgError, "attempt to drop negative size"); } + memo = NEW_MEMO(0, 0, len); - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - lazy_drop_func, (VALUE) memo), - rb_ary_new3(1, n)); + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_DROP, (VALUE) memo); + + return new_enum; } static VALUE From a08728ff22b23e765d92611548382537cf7be869 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Mon, 30 Apr 2012 08:49:37 +0300 Subject: [PATCH 07/17] marking all generators created by unoptimized lazy methods as hybrid --- enumerator.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/enumerator.c b/enumerator.c index c9b315cd609070..3f28ae345fca5c 100644 --- a/enumerator.c +++ b/enumerator.c @@ -127,6 +127,7 @@ static VALUE rb_cGenerator, rb_cYielder; struct generator { VALUE proc; VALUE obj; + VALUE hybrid; }; struct yielder { @@ -1059,6 +1060,7 @@ generator_mark(void *p) struct generator *ptr = p; rb_gc_mark(ptr->proc); rb_gc_mark(ptr->obj); + rb_gc_mark(ptr->hybrid); } #define generator_free RUBY_TYPED_DEFAULT_FREE @@ -1115,6 +1117,7 @@ generator_init(VALUE obj, VALUE proc) } ptr->proc = proc; + ptr->hybrid = Qfalse; return obj; } @@ -1309,10 +1312,10 @@ lazy_generator_init(VALUE old_generator, VALUE procs) struct generator *old_gen_ptr; old_gen_ptr = generator_ptr(old_generator); - if (old_gen_ptr->obj) { - obj = old_gen_ptr->obj; - } else { + if (old_gen_ptr->hybrid) { obj = old_generator; + } else { + obj = old_gen_ptr->obj; } generator = generator_allocate(rb_cGenerator); @@ -1362,6 +1365,7 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) { VALUE obj, meth; VALUE generator; + struct generator *g_ptr; VALUE procs; struct enumerator *ptr; int offset; @@ -1386,6 +1390,8 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) rb_block_call(generator, id_initialize, 0, 0, (rb_block_given_p() ? lazy_init_block_i : lazy_init_block), rb_ary_new3(2, obj, procs)); + g_ptr = generator_ptr(generator); + g_ptr->obj = obj; enumerator_init(self, generator, meth, argc - offset, argv + offset); ptr = enumerator_ptr(self); ptr->procs = procs; @@ -1394,6 +1400,17 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) return self; } +static VALUE +lazy_mark_as_hybrid(VALUE obj) +{ + struct enumerator *e; + struct generator *g; + e = enumerator_ptr(obj); + g = generator_ptr(e->obj); + g->hybrid = Qtrue; + return obj; +} + static VALUE lazy_set_method(VALUE lazy, VALUE args) { @@ -1406,6 +1423,7 @@ lazy_set_method(VALUE lazy, VALUE args) else { rb_ivar_set(lazy, id_arguments, args); } + lazy_mark_as_hybrid(lazy); return lazy; } @@ -1569,7 +1587,7 @@ lazy_select(VALUE obj) new_enum = lazy_copy(0, 0, obj); lazy_add_proc(new_enum, T_PROC_SELECT, Qnil); - return lazy_set_method(new_enum, Qnil); + return new_enum; } static VALUE From eb11c1161f35772c937df19fa84ee2fcf163f772 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Thu, 10 May 2012 08:32:57 +0300 Subject: [PATCH 08/17] lazy #take_while optimized --- enumerator.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/enumerator.c b/enumerator.c index 3f28ae345fca5c..4ac556bd76ef85 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1195,10 +1195,11 @@ struct proc_entry { }; enum proc_entry_type { - T_PROC_MAP = 0, - T_PROC_SELECT = 1, - T_PROC_TAKE = 2, - T_PROC_DROP = 3 + T_PROC_MAP = 0, + T_PROC_SELECT = 1, + T_PROC_TAKE = 2, + T_PROC_DROP = 3, + T_PROC_TAKE_WHILE = 4 }; static VALUE @@ -1239,6 +1240,11 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) move_next = Qfalse; } break; + case T_PROC_TAKE_WHILE: + move_next = rb_funcall(entry->proc, rb_intern("call"), + 1, result); + if (!RTEST(move_next)) result = Qundef; + break; } } } @@ -1721,21 +1727,15 @@ lazy_take(VALUE obj, VALUE n) return new_enum; } -static VALUE -lazy_take_while_func(VALUE val, VALUE args, int argc, VALUE *argv) -{ - VALUE result = rb_yield_values2(argc - 1, &argv[1]); - if (!RTEST(result)) return Qundef; - rb_funcall2(argv[0], id_yield, argc - 1, argv + 1); - return Qnil; -} - static VALUE lazy_take_while(VALUE obj) { - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - lazy_take_while_func, 0), - Qnil); + VALUE new_enum; + + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_TAKE_WHILE, Qnil); + + return lazy_set_method(new_enum, Qnil); } static VALUE From 540f7bda4fe868a2d0b1d83de09615c47d4df0f4 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Thu, 10 May 2012 09:07:07 +0300 Subject: [PATCH 09/17] lazy #reject & #drop_while optimized --- enumerator.c | 58 +++++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/enumerator.c b/enumerator.c index 4ac556bd76ef85..63cfb048b0b450 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1199,7 +1199,9 @@ enum proc_entry_type { T_PROC_SELECT = 1, T_PROC_TAKE = 2, T_PROC_DROP = 3, - T_PROC_TAKE_WHILE = 4 + T_PROC_TAKE_WHILE = 4, + T_PROC_DROP_WHILE = 5, + T_PROC_REJECT = 6 }; static VALUE @@ -1245,6 +1247,18 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) 1, result); if (!RTEST(move_next)) result = Qundef; break; + case T_PROC_DROP_WHILE: + memo = RNODE(entry->memo); + if (!memo->u3.state) { + move_next = !RTEST(rb_funcall(entry->proc, + rb_intern("call"), 1, result)); + if (move_next) memo->u3.state = TRUE; + } + break; + case T_PROC_REJECT: + move_next = !RTEST(rb_funcall(entry->proc, rb_intern("call"), + 1, result)); + break; } } } @@ -1596,27 +1610,19 @@ lazy_select(VALUE obj) return new_enum; } -static VALUE -lazy_reject_func(VALUE val, VALUE m, int argc, VALUE *argv) -{ - VALUE element = rb_enum_values_pack(argc - 1, argv + 1); - - if (!RTEST(rb_yield(element))) { - return rb_funcall(argv[0], id_yield, 1, element); - } - return Qnil; -} - static VALUE lazy_reject(VALUE obj) { + VALUE new_enum; + if (!rb_block_given_p()) { rb_raise(rb_eArgError, "tried to call lazy reject without a block"); } - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - lazy_reject_func, 0), - Qnil); + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_REJECT, Qnil); + + return lazy_set_method(new_enum, Qnil); } static VALUE @@ -1756,29 +1762,17 @@ lazy_drop(VALUE obj, VALUE n) return new_enum; } -static VALUE -lazy_drop_while_func(VALUE val, VALUE args, int argc, VALUE *argv) -{ - NODE *memo = RNODE(args); - - if (!memo->u3.state && !RTEST(rb_yield_values2(argc - 1, &argv[1]))) { - memo->u3.state = TRUE; - } - if (memo->u3.state) { - rb_funcall2(argv[0], id_yield, argc - 1, argv + 1); - } - return Qnil; -} - static VALUE lazy_drop_while(VALUE obj) { NODE *memo; + VALUE new_enum; memo = NEW_MEMO(0, 0, FALSE); - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - lazy_drop_while_func, (VALUE) memo), - Qnil); + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_DROP_WHILE, (VALUE) memo); + + return lazy_set_method(new_enum, Qnil); } static VALUE From 8c05fcc792055bfd8fa0c65aa2f4c36f5e06b04b Mon Sep 17 00:00:00 2001 From: gregolsen Date: Tue, 15 May 2012 21:45:56 +0300 Subject: [PATCH 10/17] #grep optimized --- enumerator.c | 57 +++++++++++++++++++--------------------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/enumerator.c b/enumerator.c index 63cfb048b0b450..26f4253b2c90eb 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1201,13 +1201,14 @@ enum proc_entry_type { T_PROC_DROP = 3, T_PROC_TAKE_WHILE = 4, T_PROC_DROP_WHILE = 5, - T_PROC_REJECT = 6 + T_PROC_REJECT = 6, + T_PROC_GREP = 7 }; static VALUE process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) { - VALUE result = argv[0]; + VALUE result = rb_enum_values_pack(argc, argv); struct proc_entry *entry; VALUE *procs = RARRAY_PTR(procs_array); VALUE move_next = Qtrue; @@ -1259,13 +1260,20 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) move_next = !RTEST(rb_funcall(entry->proc, rb_intern("call"), 1, result)); break; + case T_PROC_GREP: + move_next = rb_funcall(entry->memo, id_eqq, 1, result); + + if (RTEST(move_next) && entry->proc) { + result = rb_funcall(entry->proc, + rb_intern("call"), 1, result); + } + break; } } } if (RTEST(move_next)) { - argv[0] = result; - rb_funcall2(yielder, id_yield, argc, argv); + rb_funcall2(yielder, id_yield, 1, &result); } if (break_point == Qundef) rb_iter_break(); return result; @@ -1358,7 +1366,7 @@ create_proc_entry(enum proc_entry_type proc_type, VALUE memo) entry_obj = Data_Make_Struct(rb_cObject, struct proc_entry, 0, RUBY_DEFAULT_FREE, entry); Data_Get_Struct(entry_obj, struct proc_entry, entry); - if (proc_type != T_PROC_TAKE && proc_type != T_PROC_DROP) { + if (rb_block_given_p()) { entry->proc = rb_block_proc(); } entry->type = proc_type; @@ -1622,41 +1630,18 @@ lazy_reject(VALUE obj) new_enum = lazy_copy(0, 0, obj); lazy_add_proc(new_enum, T_PROC_REJECT, Qnil); - return lazy_set_method(new_enum, Qnil); -} - -static VALUE -lazy_grep_func(VALUE val, VALUE m, int argc, VALUE *argv) -{ - VALUE i = rb_enum_values_pack(argc - 1, argv + 1); - VALUE result = rb_funcall(m, id_eqq, 1, i); - - if (RTEST(result)) { - rb_funcall(argv[0], id_yield, 1, i); - } - return Qnil; + return new_enum; } static VALUE -lazy_grep_iter(VALUE val, VALUE m, int argc, VALUE *argv) +lazy_grep(VALUE obj, VALUE pattern) { - VALUE i = rb_enum_values_pack(argc - 1, argv + 1); - VALUE result = rb_funcall(m, id_eqq, 1, i); + VALUE new_enum; - if (RTEST(result)) { - rb_funcall(argv[0], id_yield, 1, rb_yield(i)); - } - return Qnil; -} + new_enum = lazy_copy(0, 0, obj); + lazy_add_proc(new_enum, T_PROC_GREP, pattern); -static VALUE -lazy_grep(VALUE obj, VALUE pattern) -{ - return lazy_set_method(rb_block_call(rb_cLazy, id_new, 1, &obj, - rb_block_given_p() ? - lazy_grep_iter : lazy_grep_func, - pattern), - rb_ary_new3(1, pattern)); + return new_enum; } static VALUE @@ -1741,7 +1726,7 @@ lazy_take_while(VALUE obj) new_enum = lazy_copy(0, 0, obj); lazy_add_proc(new_enum, T_PROC_TAKE_WHILE, Qnil); - return lazy_set_method(new_enum, Qnil); + return new_enum; } static VALUE @@ -1772,7 +1757,7 @@ lazy_drop_while(VALUE obj) new_enum = lazy_copy(0, 0, obj); lazy_add_proc(new_enum, T_PROC_DROP_WHILE, (VALUE) memo); - return lazy_set_method(new_enum, Qnil); + return new_enum; } static VALUE From 6a6284f15ec9fcfb006f5ab4c77038ea76e6c751 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Wed, 16 May 2012 10:29:23 +0300 Subject: [PATCH 11/17] instantiating proc_entry properly with marking for GC --- enumerator.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/enumerator.c b/enumerator.c index 26f4253b2c90eb..fd1745fc73a164 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1205,6 +1205,32 @@ enum proc_entry_type { T_PROC_GREP = 7 }; +static void +proc_entry_mark(void *p) +{ + struct proc_entry *ptr = p; + rb_gc_mark(ptr->proc); + rb_gc_mark(ptr->type); + rb_gc_mark(ptr->memo); +} + +#define proc_entry_free RUBY_TYPED_DEFAULT_FREE + +static size_t +proc_entry_memsize(const void *p) +{ + return p ? sizeof(struct proc_entry) : 0; +} + +static const rb_data_type_t proc_entry_data_type = { + "proc_entry", + { + proc_entry_mark, + proc_entry_free, + proc_entry_memsize, + }, +}; + static VALUE process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) { @@ -1217,7 +1243,7 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) long i = 0; for (i = 0; i < RARRAY_LEN(procs_array); i++) { - Data_Get_Struct(procs[i], struct proc_entry, entry); + TypedData_Get_Struct(procs[i], struct proc_entry, &proc_entry_data_type, entry); if (RTEST(move_next)) { switch ((enum proc_entry_type) entry->type) { case T_PROC_MAP: @@ -1363,9 +1389,8 @@ create_proc_entry(enum proc_entry_type proc_type, VALUE memo) struct proc_entry *entry; VALUE entry_obj; - entry_obj = Data_Make_Struct(rb_cObject, struct proc_entry, - 0, RUBY_DEFAULT_FREE, entry); - Data_Get_Struct(entry_obj, struct proc_entry, entry); + entry_obj = TypedData_Make_Struct(rb_cObject, struct proc_entry, + &proc_entry_data_type, entry); if (rb_block_given_p()) { entry->proc = rb_block_proc(); } From b8af5d6a77fd50ec8328f793063c0868b98a73bc Mon Sep 17 00:00:00 2001 From: gregolsen Date: Wed, 6 Jun 2012 19:22:07 +0300 Subject: [PATCH 12/17] adding test for hybrid chaining --- test/ruby/test_lazy_enumerator.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/ruby/test_lazy_enumerator.rb b/test/ruby/test_lazy_enumerator.rb index 106f8006f9a3ee..574257085c7304 100644 --- a/test/ruby/test_lazy_enumerator.rb +++ b/test/ruby/test_lazy_enumerator.rb @@ -296,6 +296,11 @@ def test_force assert_equal([1, 2, 3], (1..Float::INFINITY).lazy.take(3).force) end + def test_hybrid_chaining + assert_equal([2], [1,2,3].lazy.cycle(1).select { |x| x == 2 }.force) + assert_equal([2], [1,2,3].lazy.select { |x| x == 2 }.cycle(1).force) + end + def test_inspect assert_equal("#", Enumerator::Lazy.new(1..10).inspect) From e40e8ed2dfa63060a7e4a39be9235d45a04d10e6 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Thu, 7 Jun 2012 11:37:29 +0300 Subject: [PATCH 13/17] fixing multiple force call problem for #take --- enumerator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/enumerator.c b/enumerator.c index fd1745fc73a164..0d7fbbb963d5ac 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1260,6 +1260,7 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) move_next = Qfalse; break_point = Qundef; } else if (--memo->u3.cnt == 0) { + memo->u3.cnt = memo->u2.argc; break_point = Qundef; } break; @@ -1736,6 +1737,7 @@ lazy_take(VALUE obj, VALUE n) argv[1] = INT2NUM(0); argc = 2; } + memo = NEW_MEMO(0, len, len); new_enum = lazy_copy(argc, argv, obj); lazy_add_proc(new_enum, T_PROC_TAKE, (VALUE) memo); From b2437fc9a6ac969944b2973c476046f5011aff2e Mon Sep 17 00:00:00 2001 From: gregolsen Date: Thu, 7 Jun 2012 11:58:47 +0300 Subject: [PATCH 14/17] tiny cleanup --- enumerator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/enumerator.c b/enumerator.c index 0d7fbbb963d5ac..a7bf68fcd2f6f0 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1238,7 +1238,7 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) struct proc_entry *entry; VALUE *procs = RARRAY_PTR(procs_array); VALUE move_next = Qtrue; - VALUE break_point = Qtrue; + VALUE break_point = Qfalse; NODE *memo; long i = 0; @@ -1258,10 +1258,10 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) memo = RNODE(entry->memo); if (memo->u3.cnt == 0) { move_next = Qfalse; - break_point = Qundef; + break_point = Qtrue; } else if (--memo->u3.cnt == 0) { memo->u3.cnt = memo->u2.argc; - break_point = Qundef; + break_point = Qtrue; } break; case T_PROC_DROP: @@ -1302,7 +1302,7 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) if (RTEST(move_next)) { rb_funcall2(yielder, id_yield, 1, &result); } - if (break_point == Qundef) rb_iter_break(); + if (break_point) rb_iter_break(); return result; } static VALUE From 434ff9df4b258369d25e4ac83b900e574a05e1e6 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Sat, 14 Jul 2012 17:14:18 +0300 Subject: [PATCH 15/17] #inspect fixed to support procs chainged lazy enumerators --- enumerator.c | 286 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 176 insertions(+), 110 deletions(-) diff --git a/enumerator.c b/enumerator.c index a7bf68fcd2f6f0..1f0c26adde5ba8 100644 --- a/enumerator.c +++ b/enumerator.c @@ -859,93 +859,6 @@ enumerator_rewind(VALUE obj) return obj; } -static VALUE -inspect_enumerator(VALUE obj, VALUE dummy, int recur) -{ - struct enumerator *e; - const char *cname; - VALUE eobj, eargs, str, method; - int tainted, untrusted; - - TypedData_Get_Struct(obj, struct enumerator, &enumerator_data_type, e); - - cname = rb_obj_classname(obj); - - if (!e || e->obj == Qundef) { - return rb_sprintf("#<%s: uninitialized>", cname); - } - - if (recur) { - str = rb_sprintf("#<%s: ...>", cname); - OBJ_TAINT(str); - return str; - } - - eobj = rb_attr_get(obj, id_receiver); - if (NIL_P(eobj)) { - eobj = e->obj; - } - - tainted = OBJ_TAINTED(eobj); - untrusted = OBJ_UNTRUSTED(eobj); - - /* (1..100).each_cons(2) => "#" */ - str = rb_sprintf("#<%s: ", cname); - rb_str_concat(str, rb_inspect(eobj)); - method = rb_attr_get(obj, id_method); - if (NIL_P(method)) { - rb_str_buf_cat2(str, ":"); - rb_str_buf_cat2(str, rb_id2name(e->meth)); - } - else if (method != Qfalse) { - Check_Type(method, T_SYMBOL); - rb_str_buf_cat2(str, ":"); - rb_str_buf_cat2(str, rb_id2name(SYM2ID(method))); - } - - eargs = rb_attr_get(obj, id_arguments); - if (NIL_P(eargs)) { - eargs = e->args; - } - if (eargs != Qfalse) { - long argc = RARRAY_LEN(eargs); - VALUE *argv = RARRAY_PTR(eargs); - - if (argc > 0) { - rb_str_buf_cat2(str, "("); - - while (argc--) { - VALUE arg = *argv++; - - rb_str_concat(str, rb_inspect(arg)); - rb_str_buf_cat2(str, argc > 0 ? ", " : ")"); - - if (OBJ_TAINTED(arg)) tainted = TRUE; - if (OBJ_UNTRUSTED(arg)) untrusted = TRUE; - } - } - } - - rb_str_buf_cat2(str, ">"); - - if (tainted) OBJ_TAINT(str); - if (untrusted) OBJ_UNTRUST(str); - return str; -} - -/* - * call-seq: - * e.inspect -> string - * - * Creates a printable version of e. - */ - -static VALUE -enumerator_inspect(VALUE obj) -{ - return rb_exec_recursive(inspect_enumerator, obj, 0); -} - /* * Yielder */ @@ -1359,16 +1272,17 @@ lazy_init_block(VALUE val, VALUE m, int argc, VALUE *argv) } static VALUE -lazy_generator_init(VALUE old_generator, VALUE procs) +lazy_generator_init(VALUE enumerator, VALUE procs) { VALUE generator; VALUE obj; struct generator *gen_ptr; struct generator *old_gen_ptr; + struct enumerator *e = enumerator_ptr(enumerator); - old_gen_ptr = generator_ptr(old_generator); + old_gen_ptr = generator_ptr(e->obj); if (old_gen_ptr->hybrid) { - obj = old_generator; + obj = enumerator; } else { obj = old_gen_ptr->obj; } @@ -1411,7 +1325,7 @@ lazy_add_proc(VALUE enum_obj, enum proc_entry_type proc_type, VALUE memo) ptr = enumerator_ptr(enum_obj); rb_ary_push(ptr->procs, entry); - return enum_obj; + return entry; } static VALUE @@ -1466,17 +1380,23 @@ lazy_mark_as_hybrid(VALUE obj) } static VALUE -lazy_set_method(VALUE lazy, VALUE args) -{ +lazy_obj_set_method(VALUE obj, VALUE args) { ID id = rb_frame_this_func(); - rb_ivar_set(lazy, id_method, ID2SYM(id)); + rb_ivar_set(obj, id_method, ID2SYM(id)); if (NIL_P(args)) { /* Qfalse indicates that the arguments are empty */ - rb_ivar_set(lazy, id_arguments, Qfalse); + rb_ivar_set(obj, id_arguments, Qfalse); } else { - rb_ivar_set(lazy, id_arguments, args); + rb_ivar_set(obj, id_arguments, args); } + return obj; +} + +static VALUE +lazy_set_method(VALUE lazy, VALUE args) +{ + lazy_obj_set_method(lazy, args); lazy_mark_as_hybrid(lazy); return lazy; } @@ -1535,7 +1455,7 @@ lazy_copy(int argc, VALUE *argv, VALUE obj) e = enumerator_ptr(obj); new_procs = rb_ary_new4(RARRAY_LEN(e->procs), RARRAY_PTR(e->procs)); - new_generator = lazy_generator_init(e->obj, new_procs); + new_generator = lazy_generator_init(obj, new_procs); new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); new_e = enumerator_ptr(new_obj); @@ -1554,13 +1474,15 @@ static VALUE lazy_map(VALUE obj) { VALUE new_enum; + VALUE entry; if (!rb_block_given_p()) { rb_raise(rb_eArgError, "tried to call lazy map without a block"); } new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_MAP, Qnil); + entry = lazy_add_proc(new_enum, T_PROC_MAP, Qnil); + lazy_obj_set_method(entry, Qnil); return new_enum; } @@ -1633,13 +1555,15 @@ static VALUE lazy_select(VALUE obj) { VALUE new_enum; + VALUE entry; if (!rb_block_given_p()) { rb_raise(rb_eArgError, "tried to call lazy select without a block"); } new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_SELECT, Qnil); + entry = lazy_add_proc(new_enum, T_PROC_SELECT, Qnil); + lazy_obj_set_method(entry, Qnil); return new_enum; } @@ -1647,14 +1571,15 @@ lazy_select(VALUE obj) static VALUE lazy_reject(VALUE obj) { - VALUE new_enum; + VALUE new_enum, entry; if (!rb_block_given_p()) { rb_raise(rb_eArgError, "tried to call lazy reject without a block"); } new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_REJECT, Qnil); + entry = lazy_add_proc(new_enum, T_PROC_REJECT, Qnil); + lazy_obj_set_method(entry, Qnil); return new_enum; } @@ -1662,10 +1587,11 @@ lazy_reject(VALUE obj) static VALUE lazy_grep(VALUE obj, VALUE pattern) { - VALUE new_enum; + VALUE new_enum, entry; new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_GREP, pattern); + entry = lazy_add_proc(new_enum, T_PROC_GREP, pattern); + lazy_obj_set_method(entry, rb_ary_new3(1, pattern)); return new_enum; } @@ -1727,6 +1653,7 @@ lazy_take(VALUE obj, VALUE n) VALUE new_enum; int argc = 0; VALUE argv[2]; + VALUE entry; if (len < 0) { rb_raise(rb_eArgError, "attempt to take negative size"); @@ -1740,7 +1667,8 @@ lazy_take(VALUE obj, VALUE n) memo = NEW_MEMO(0, len, len); new_enum = lazy_copy(argc, argv, obj); - lazy_add_proc(new_enum, T_PROC_TAKE, (VALUE) memo); + entry = lazy_add_proc(new_enum, T_PROC_TAKE, (VALUE) memo); + lazy_obj_set_method(entry, rb_ary_new3(1, n)); return new_enum; } @@ -1748,10 +1676,11 @@ lazy_take(VALUE obj, VALUE n) static VALUE lazy_take_while(VALUE obj) { - VALUE new_enum; + VALUE new_enum, entry; new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_TAKE_WHILE, Qnil); + entry = lazy_add_proc(new_enum, T_PROC_TAKE_WHILE, Qnil); + lazy_obj_set_method(entry, Qnil); return new_enum; } @@ -1761,7 +1690,7 @@ lazy_drop(VALUE obj, VALUE n) { NODE *memo; long len = NUM2LONG(n); - VALUE new_enum; + VALUE new_enum, entry; if (len < 0) { rb_raise(rb_eArgError, "attempt to drop negative size"); @@ -1769,7 +1698,8 @@ lazy_drop(VALUE obj, VALUE n) memo = NEW_MEMO(0, 0, len); new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_DROP, (VALUE) memo); + entry = lazy_add_proc(new_enum, T_PROC_DROP, (VALUE) memo); + lazy_obj_set_method(entry, rb_ary_new3(1, n)); return new_enum; } @@ -1778,11 +1708,12 @@ static VALUE lazy_drop_while(VALUE obj) { NODE *memo; - VALUE new_enum; + VALUE new_enum, entry; memo = NEW_MEMO(0, 0, FALSE); new_enum = lazy_copy(0, 0, obj); - lazy_add_proc(new_enum, T_PROC_DROP_WHILE, (VALUE) memo); + entry = lazy_add_proc(new_enum, T_PROC_DROP_WHILE, (VALUE) memo); + lazy_obj_set_method(entry, Qnil); return new_enum; } @@ -1820,6 +1751,141 @@ lazy_lazy(VALUE obj) return obj; } +static VALUE +append_method(VALUE obj, VALUE str, ID default_method) +{ + VALUE method; + + method = rb_attr_get(obj, id_method); + if (NIL_P(method)) { + rb_str_buf_cat2(str, ":"); + rb_str_buf_cat2(str, rb_id2name(default_method)); + } + else if (method != Qfalse) { + Check_Type(method, T_SYMBOL); + rb_str_buf_cat2(str, ":"); + rb_str_buf_cat2(str, rb_id2name(SYM2ID(method))); + } + return str; +} + +static VALUE +append_args(VALUE obj, VALUE str, VALUE default_args) +{ + VALUE eargs; + int tainted, untrusted; + + eargs = rb_attr_get(obj, id_arguments); + if (NIL_P(eargs)) { + eargs = default_args; + } + if (eargs != Qfalse) { + long argc = RARRAY_LEN(eargs); + VALUE *argv = RARRAY_PTR(eargs); + + if (argc > 0) { + rb_str_buf_cat2(str, "("); + + while (argc--) { + VALUE arg = *argv++; + + rb_str_concat(str, rb_inspect(arg)); + rb_str_buf_cat2(str, argc > 0 ? ", " : ")"); + + if (OBJ_TAINTED(arg)) tainted = TRUE; + if (OBJ_UNTRUSTED(arg)) untrusted = TRUE; + } + } + } + + if (tainted) { OBJ_TAINT(str); } + if (untrusted) { OBJ_UNTRUST(str); } + return str; +} + +static VALUE +inspect_enumerator(VALUE obj, VALUE dummy, int recur) +{ + struct enumerator *e; + struct generator *g; + const char *cname; + VALUE eobj, str; + int tainted, untrusted; + VALUE *procs; + int i; + + TypedData_Get_Struct(obj, struct enumerator, &enumerator_data_type, e); + + cname = rb_obj_classname(obj); + + if (!e || e->obj == Qundef) { + return rb_sprintf("#<%s: uninitialized>", cname); + } + + if (recur) { + str = rb_sprintf("#<%s: ...>", cname); + OBJ_TAINT(str); + return str; + } + + if (e->procs && RARRAY_LEN(e->procs) > 0) { + g = generator_ptr(e->obj); + eobj = g->obj; + } else { + eobj = rb_attr_get(obj, id_receiver); + if (NIL_P(eobj)) { + eobj = e->obj; + } + } + + tainted = OBJ_TAINTED(eobj); + untrusted = OBJ_UNTRUSTED(eobj); + + /* (1..100).each_cons(2) => "#" + * In case procs chained enumerator traversing all proc entries manually + */ + if (e->procs && RARRAY_LEN(e->procs) > 0) { + if (strcmp(rb_obj_classname(eobj), cname) == 0) { + str = rb_inspect(eobj); + } else { + str = rb_sprintf("#<%s: ", cname); + rb_str_concat(str, rb_inspect(eobj)); + rb_str_buf_cat2(str, ">"); + } + procs = RARRAY_PTR(e->procs); + for (i = 0; i < RARRAY_LEN(e->procs); i++) { + str = rb_str_concat(rb_sprintf("#<%s: ", cname), str); + append_method(procs[i], str, e->meth); + append_args(procs[i], str, e->args); + rb_str_buf_cat2(str, ">"); + } + } else { + str = rb_sprintf("#<%s: ", cname); + rb_str_concat(str, rb_inspect(eobj)); + append_method(obj, str, e->meth); + append_args(obj, str, e->args); + + rb_str_buf_cat2(str, ">"); + } + + + if (tainted) OBJ_TAINT(str); + if (untrusted) OBJ_UNTRUST(str); + return str; +} + +/* + * call-seq: + * e.inspect -> string + * + * Creates a printable version of e. + */ + +static VALUE +enumerator_inspect(VALUE obj) +{ + return rb_exec_recursive(inspect_enumerator, obj, 0); +} /* * Document-class: StopIteration * From 68cc5b221bf875606ca63191f391aa4ea4453272 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Tue, 31 Jul 2012 12:56:44 +0300 Subject: [PATCH 16/17] cycle_chain test fixed --- enumerator.c | 58 +++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/enumerator.c b/enumerator.c index 1f0c26adde5ba8..f3b4c6692d0990 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1030,7 +1030,7 @@ generator_init(VALUE obj, VALUE proc) } ptr->proc = proc; - ptr->hybrid = Qfalse; + ptr->hybrid = Qtrue; return obj; } @@ -1215,7 +1215,9 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) if (RTEST(move_next)) { rb_funcall2(yielder, id_yield, 1, &result); } - if (break_point) rb_iter_break(); + if (RTEST(break_point)) { + rb_iter_break(); + } return result; } static VALUE @@ -1333,7 +1335,6 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) { VALUE obj, meth; VALUE generator; - struct generator *g_ptr; VALUE procs; struct enumerator *ptr; int offset; @@ -1358,8 +1359,6 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) rb_block_call(generator, id_initialize, 0, 0, (rb_block_given_p() ? lazy_init_block_i : lazy_init_block), rb_ary_new3(2, obj, procs)); - g_ptr = generator_ptr(generator); - g_ptr->obj = obj; enumerator_init(self, generator, meth, argc - offset, argv + offset); ptr = enumerator_ptr(self); ptr->procs = procs; @@ -1369,35 +1368,17 @@ lazy_initialize(int argc, VALUE *argv, VALUE self) } static VALUE -lazy_mark_as_hybrid(VALUE obj) +lazy_set_method(VALUE lazy, VALUE args) { - struct enumerator *e; - struct generator *g; - e = enumerator_ptr(obj); - g = generator_ptr(e->obj); - g->hybrid = Qtrue; - return obj; -} - -static VALUE -lazy_obj_set_method(VALUE obj, VALUE args) { ID id = rb_frame_this_func(); - rb_ivar_set(obj, id_method, ID2SYM(id)); + rb_ivar_set(lazy, id_method, ID2SYM(id)); if (NIL_P(args)) { /* Qfalse indicates that the arguments are empty */ - rb_ivar_set(obj, id_arguments, Qfalse); + rb_ivar_set(lazy, id_arguments, Qfalse); } else { - rb_ivar_set(obj, id_arguments, args); + rb_ivar_set(lazy, id_arguments, args); } - return obj; -} - -static VALUE -lazy_set_method(VALUE lazy, VALUE args) -{ - lazy_obj_set_method(lazy, args); - lazy_mark_as_hybrid(lazy); return lazy; } @@ -1449,6 +1430,7 @@ lazy_copy(int argc, VALUE *argv, VALUE obj) { struct enumerator *e; struct enumerator *new_e; + struct generator *g; VALUE new_obj; VALUE new_generator; VALUE new_procs; @@ -1456,15 +1438,21 @@ lazy_copy(int argc, VALUE *argv, VALUE obj) e = enumerator_ptr(obj); new_procs = rb_ary_new4(RARRAY_LEN(e->procs), RARRAY_PTR(e->procs)); new_generator = lazy_generator_init(obj, new_procs); + g = generator_ptr(new_generator); + g->hybrid = Qfalse; new_obj = enumerator_init_copy(enumerator_allocate(rb_cLazy), obj); new_e = enumerator_ptr(new_obj); new_e->obj = new_generator; new_e->procs = new_procs; + new_e->meth = rb_to_id(sym_each); if (argc > 0) { new_e->meth = rb_to_id(*argv++); new_e->args = rb_ary_new4(argc - 1, argv); + } else { + new_e->meth = id_each; + new_e->args = rb_ary_new4(argc, argv); } return new_obj; @@ -1482,7 +1470,7 @@ lazy_map(VALUE obj) new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_MAP, Qnil); - lazy_obj_set_method(entry, Qnil); + lazy_set_method(entry, Qnil); return new_enum; } @@ -1563,7 +1551,7 @@ lazy_select(VALUE obj) new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_SELECT, Qnil); - lazy_obj_set_method(entry, Qnil); + lazy_set_method(entry, Qnil); return new_enum; } @@ -1579,7 +1567,7 @@ lazy_reject(VALUE obj) new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_REJECT, Qnil); - lazy_obj_set_method(entry, Qnil); + lazy_set_method(entry, Qnil); return new_enum; } @@ -1591,7 +1579,7 @@ lazy_grep(VALUE obj, VALUE pattern) new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_GREP, pattern); - lazy_obj_set_method(entry, rb_ary_new3(1, pattern)); + lazy_set_method(entry, rb_ary_new3(1, pattern)); return new_enum; } @@ -1668,7 +1656,7 @@ lazy_take(VALUE obj, VALUE n) memo = NEW_MEMO(0, len, len); new_enum = lazy_copy(argc, argv, obj); entry = lazy_add_proc(new_enum, T_PROC_TAKE, (VALUE) memo); - lazy_obj_set_method(entry, rb_ary_new3(1, n)); + lazy_set_method(entry, rb_ary_new3(1, n)); return new_enum; } @@ -1680,7 +1668,7 @@ lazy_take_while(VALUE obj) new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_TAKE_WHILE, Qnil); - lazy_obj_set_method(entry, Qnil); + lazy_set_method(entry, Qnil); return new_enum; } @@ -1699,7 +1687,7 @@ lazy_drop(VALUE obj, VALUE n) memo = NEW_MEMO(0, 0, len); new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_DROP, (VALUE) memo); - lazy_obj_set_method(entry, rb_ary_new3(1, n)); + lazy_set_method(entry, rb_ary_new3(1, n)); return new_enum; } @@ -1713,7 +1701,7 @@ lazy_drop_while(VALUE obj) memo = NEW_MEMO(0, 0, FALSE); new_enum = lazy_copy(0, 0, obj); entry = lazy_add_proc(new_enum, T_PROC_DROP_WHILE, (VALUE) memo); - lazy_obj_set_method(entry, Qnil); + lazy_set_method(entry, Qnil); return new_enum; } From 30f11469511f6ae56e980d92b24c77b53996f079 Mon Sep 17 00:00:00 2001 From: gregolsen Date: Tue, 31 Jul 2012 14:30:19 +0300 Subject: [PATCH 17/17] rb_funcall changed to rb_proc_call_with_block for speed up --- enumerator.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/enumerator.c b/enumerator.c index f3b4c6692d0990..60ad9b2419c5c7 100644 --- a/enumerator.c +++ b/enumerator.c @@ -1160,12 +1160,10 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) if (RTEST(move_next)) { switch ((enum proc_entry_type) entry->type) { case T_PROC_MAP: - result = rb_funcall(entry->proc, rb_intern("call"), - 1, result); + result = rb_proc_call_with_block(entry->proc, 1, &result, Qnil); break; case T_PROC_SELECT: - move_next = rb_funcall(entry->proc, rb_intern("call"), - 1, result); + move_next = rb_proc_call_with_block(entry->proc, 1, &result, Qnil); break; case T_PROC_TAKE: memo = RNODE(entry->memo); @@ -1184,28 +1182,24 @@ process_element(VALUE procs_array, VALUE yielder, int argc, VALUE* argv) } break; case T_PROC_TAKE_WHILE: - move_next = rb_funcall(entry->proc, rb_intern("call"), - 1, result); + move_next = rb_proc_call_with_block(entry->proc, 1, &result, Qnil); if (!RTEST(move_next)) result = Qundef; break; case T_PROC_DROP_WHILE: memo = RNODE(entry->memo); if (!memo->u3.state) { - move_next = !RTEST(rb_funcall(entry->proc, - rb_intern("call"), 1, result)); + move_next = !RTEST(rb_proc_call_with_block(entry->proc, 1, &result, Qnil)); if (move_next) memo->u3.state = TRUE; } break; case T_PROC_REJECT: - move_next = !RTEST(rb_funcall(entry->proc, rb_intern("call"), - 1, result)); + move_next = !RTEST(rb_proc_call_with_block(entry->proc, 1, &result, Qnil)); break; case T_PROC_GREP: move_next = rb_funcall(entry->memo, id_eqq, 1, result); if (RTEST(move_next) && entry->proc) { - result = rb_funcall(entry->proc, - rb_intern("call"), 1, result); + result = rb_proc_call_with_block(entry->proc, 1, &result, Qnil); } break; }