8000 Use `alloca` to improve performance of thread creation. by ioquatix · Pull Request #2227 · ruby/ruby · GitHub
[go: up one dir, main page]

Skip to content

Use alloca to improve performance of thread creation. #2227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ lcov*.info
/*.pc
/*.rc
/*_prelude.c
/build*
/COPYING.LIB
/ChangeLog
/Doxyfile
Expand Down
8 changes: 8 additions & 0 deletions benchmark/vm_thread_alive_check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
benchmark:
vm_thread_alive_check: |
t = Thread.new{}
while t.alive?
Thread.pass
end
loop_count: 50_000

6 changes: 0 additions & 6 deletions benchmark/vm_thread_alive_check1.rb

This file was deleted.

4 changes: 2 additions & 2 deletions benchmark/vm_thread_pass.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Plenty Thtread.pass
# A performance may depend on GVL implementation.

tmax = (ARGV.shift || 2).to_i
lmax = 200_000 / tmax
tmax = (ARGV.shift || 8).to_i
lmax = 400_000 / tmax

(1..tmax).map{
Thread.new{
Expand Down
4 changes: 2 additions & 2 deletions benchmark/vm_thread_pass_flood.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# n.b. this is a good test for GVL when pinned to a single CPU

1000.times{
5_000.times{
Thread.new{loop{Thread.pass}}
}

i = 0
while i<10000
while i<10_000
i += 1
end
2 changes: 1 addition & 1 deletion benchmark/vm_thread_queue.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require 'thread'

n = 1_000_000
n = 10_000_000
q = Thread::Queue.new
consumer = Thread.new{
while q.pop
Expand Down
4 changes: 4 additions & 0 deletions benchmark/vm_thread_sleep.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
benchmark:
vm_thread_sleep: |
Thread.new { sleep }
loop_count: 10_000
21 changes: 17 additions & 4 deletions bootstraptest/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ def exec_test(pathes)
$stderr.puts unless @quiet and @tty and @error == error
end
$stderr.print(erase) if @quiet
@errbuf.each do |msg|
$stderr.puts msg
end
if @error == 0
if @count == 0
$stderr.puts "No tests, no problem"
Expand All @@ -216,9 +219,6 @@ def exec_test(pathes)
end
exit true
else
@errbuf.each do |msg|
$stderr.puts msg
end
$stderr.puts "#{@failed}FAIL#{@reset} #{@error}/#{@count} tests failed"
exit false
end
Expand All @@ -244,7 +244,7 @@ def show_progress(message = '')
else
$stderr.print "#{@failed}F"
$stderr.printf(" %.3f", t) if @verbose
$stderr.print "#{@reset}"
$stderr.print @reset
$stderr.puts if @verbose
error faildesc, message
unless errout.empty?
Expand All @@ -263,6 +263,19 @@ def show_progress(message = '')
error err.message, message
end

def show_limit(testsrc, opt = '', **argh)
result = get_result_string(testsrc, opt, **argh)
$stderr.print '.'
$stderr.print @reset
$stderr.puts if @verbose

if @tty
$stderr.puts "#{erase}#{result}"
else
@errbuf.push result
end
end

def assert_check(testsrc, message = '', opt = '', **argh)
show_progress(message) {
result = get_result_string(testsrc, opt, **argh)
Expand Down
29 changes: 27 additions & 2 deletions bootstraptest/test_thread.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
# Thread and Fiber

show_limit %q{
threads = []
begin
threads << Thread.new{sleep}

raise Exception, "skipping" if threads.count >= 10_000
rescue Exception => error
puts "Thread count: #{threads.count} (#{error})"
break
end while true
}
show_limit %q{
fibers = []
begin
fiber = Fiber.new{Fiber.yield}
fiber.resume
fibers << fiber

raise Exception, "skipping" if fibers.count >= 10_000
rescue Exception => error
puts "Fiber count: #{fibers.count} (#{error})"
break
end while true
}
assert_equal %q{ok}, %q{
Thread.new{
}.join
Expand All @@ -10,6 +32,9 @@
:ok
}.value
}
assert_equal %q{ok}, %q{
:ok if Thread.new{sleep}.backtrace == []
}
assert_equal %q{ok}, %q{
begin
v = 0 F438
Expand Down
46 changes: 23 additions & 23 deletions cont.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ typedef struct rb_context_struct {
#endif
} machine;
rb_execution_context_t saved_ec;
int free_vm_stack;
rb_jmpbuf_t jmpbuf;
rb_ensure_entry_t *ensure_array;
/* Pointer to MJIT info about the continuation. */
Expand Down Expand Up @@ -407,7 +408,10 @@ cont_free(void *ptr)
rb_context_t *cont = ptr;

RUBY_FREE_ENTER("cont");
ruby_xfree(cont->saved_ec.vm_stack);

if (cont->free_vm_stack) {
ruby_xfree(cont->saved_ec.vm_stack);
}

#if FIBER_USE_NATIVE
if (cont->type == CONTINUATION_CONTEXT) {
Expand All @@ -420,10 +424,10 @@ cont_free(void *ptr)
rb_fiber_t *fib = (rb_fiber_t*)cont;
#if defined(FIBER_USE_COROUTINE)
coroutine_destroy(&fib->context);
if (fib->ss_sp != NULL) {
if (fiber_is_root_p(fib)) {
rb_bug("Illegal root fiber parameter");
}
if (fib->ss_sp != NULL) {
if (fiber_is_root_p(fib)) {
rb_bug("Illegal root fiber parameter");
}
#ifdef _WIN32
VirtualFree((void*)fib->ss_sp, 0, MEM_RELEASE);
#else
Expand Down Expand Up @@ -1525,19 +1529,8 @@ fiber_init(VALUE fibval, VALUE proc)
else {
vm_stack = ruby_xmalloc(fib_stack_bytes);
}
rb_ec_set_vm_stack(sec, vm_stack, fib_stack_bytes / sizeof(VALUE));
sec->cfp = (void *)(sec->vm_stack + sec->vm_stack_size);

rb_vm_push_frame(sec,
NULL,
VM_FRAME_MAGIC_DUMMY | VM_ENV_FLAG_LOCAL | VM_FRAME_FLAG_FINISH | VM_FRAME_FLAG_CFRAME,
Qnil, /* self */
VM_BLOCK_HANDLER_NONE,
0, /* specval */
NULL, /* pc */
sec->vm_stack, /* sp */
0, /* local_size */
0);
cont->free_vm_stack = 1;
rb_ec_initialize_vm_stack(sec, vm_stack, fib_stack_bytes / sizeof(VALUE));

sec->tag = NULL;
sec->local_storage = NULL;
Expand Down Expand Up @@ -1661,6 +1654,8 @@ rb_threadptr_root_fiber_setup(rb_thread_t *th)
fiber_status_set(fib, FIBER_RESUMED); /* skip CREATED */
th->ec = &fib->cont.saved_ec;

VM_ASSERT(fib->cont.free_vm_stack == 0);

/* NOTE: On WIN32, fib_handle is not allocated yet. */
}

Expand All @@ -1673,6 +1668,8 @@ rb_threadptr_root_fiber_release(rb_thread_t *th)
else {
VM_ASSERT(th->ec->fiber_ptr->cont.type == FIBER_CONTEXT);
VM_ASSERT(th->ec->fiber_ptr->cont.self == 0);

// th->ec->fiber_ptr->cont.saved_ec.vm_stack = NULL;
fiber_free(th->ec->fiber_ptr);

if (th->ec == ruby_current_execution_context_ptr) {
Expand Down Expand Up @@ -1878,12 +1875,15 @@ rb_fiber_close(rb_fiber_t *fib)
size_t stack_bytes = ec->vm_stack_size * sizeof(VALUE);

fiber_status_set(fib, FIBER_TERMINATED);
if (stack_bytes == rb_ec_vm_ptr(ec)->default_params.thread_vm_stack_size) {
rb_thread_recycle_stack_release(vm_stack);
}
else {
ruby_xfree(vm_stack);
if (fib->cont.free_vm_stack) {
if (stack_bytes == rb_ec_vm_ptr(ec)->default_params.thread_vm_stack_size) {
rb_thread_recycle_stack_release(vm_stack);
}
else {
ruby_xfree(vm_stack);
}
}

rb_ec_set_vm_stack(ec, NULL, 0);

#if !FIBER_USE_NATIVE
Expand Down
6 changes: 6 additions & 0 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -9398,6 +9398,12 @@ rb_memerror(void)
rb_objspace_t *objspace = rb_objspace_of(rb_ec_vm_ptr(ec));
VALUE exc;

if (0) {
// Print out pid, sleep, so you can attach debugger to see what went wrong:
fprintf(stderr, "rb_memerror pid=%d\n", getpid());
sleep(60);
}

if (during_gc) gc_exit(objspace, "rb_memerror");

exc = nomem_error;
Expand Down
30 changes: 27 additions & 3 deletions thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@
#include "mjit.h"
#include "hrtime.h"

#ifdef __linux__
// Normally, gcc(1) translates calls to alloca() with inlined code. This is not done when either the -ansi, -std=c89, -std=c99, or the -std=c11 option is given and the header <alloca.h> is not included.
#include <alloca.h>
#endif

#ifndef USE_NATIVE_THREAD_PRIORITY
#define USE_NATIVE_THREAD_PRIORITY 0
#define RUBY_THREAD_PRIORITY_MAX 3
Expand Down Expand Up @@ -695,21 +700,40 @@ thread_do_start(rb_thread_t *th)
}

void rb_ec_clear_current_thread_trace_func(const rb_execution_context_t *ec);
rb_control_frame_t *
rb_vm_push_frame(rb_execution_context_t *sec,
const rb_iseq_t *iseq,
VALUE type,
VALUE self,
VALUE specval,
VALUE cref_or_me,
const VALUE *pc,
VALUE *sp,
int local_size,
int stack_max);

static int
thread_start_func_2(rb_thread_t *th, VALUE *stack_start, VALUE *register_stack_start)
{
STACK_GROW_DIR_DETECTION;
enum ruby_tag_type state;
rb_thread_list_t *join_list;
rb_thread_t *main_th;
VALUE errinfo = Qnil;
size_t size = th->vm->default_params.thread_vm_stack_size / sizeof(VALUE);
VALUE * vm_stack = NULL;

if (th == th->vm->main_thread) {
rb_bug("thread_start_func_2 must not be used for main thread");
}

if (th == th->vm->main_thread)
rb_bug("thread_start_func_2 must not be used for main thread");
vm_stack = alloca(size * sizeof(VALUE));
rb_ec_initialize_vm_stack(th->ec, vm_stack, size);

ruby_thread_set_native(th);

th->ec->machine.stack_start = stack_start;
th->ec->machine.stack_start = STACK_DIR_UPPER(vm_stack + size, vm_stack);
th->ec->machine.stack_maxsize -= size * sizeof(VALUE);
#ifdef __ia64
th->ec->machine.register_stack_start = register_stack_start;
#endif
Expand Down
2 changes: 1 addition & 1 deletion thread_pthread.c
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ native_thread_create(rb_thread_t *th)
}
else {
pthread_attr_t attr;
const size_t stack_size = th->vm->default_params.thread_machine_stack_size;
const size_t stack_size = th->vm->default_params.thread_machine_stack_size + th->vm->default_params.thread_vm_stack_size;
const size_t space = space_size(stack_size);

th->ec->machine.stack_maxsize = stack_size - space;
Expand Down
4 changes: 2 additions & 2 deletions thread_win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ typedef LPTHREAD_START_ROUTINE w32_thread_start_func;
static HANDLE
w32_create_thread(DWORD stack_size, w32_thread_start_func func, void *val)
{
return start_thread(0, stack_size, func, val, CREATE_SUSPENDED, 0);
return start_thread(0, stack_size, func, val, CREATE_SUSPENDED | STACK_SIZE_PARAM_IS_A_RESERVATION, 0);
}

int
Expand Down Expand Up @@ -592,7 +592,7 @@ thread_start_func_1(void *th_ptr)
static int
native_thread_create(rb_thread_t *th)
{
size_t stack_size = 4 * 1024; /* 4KB is the minimum commit size */
const size_t stack_size = th->vm->default_params.thread_machine_stack_size + th->vm->default_params.thread_vm_stack_size;
th->thread_id = w32_create_thread(stack_size, thread_start_func_1, th);

if ((th->thread_id) == 0) {
Expand Down
35 changes: 23 additions & 12 deletions vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2685,26 +2685,37 @@ thread_alloc(VALUE klass)
return obj;
}

void
rb_ec_initialize_vm_stack(rb_execution_context_t *ec, VALUE *stack, size_t size)
{
rb_ec_set_vm_stack(ec, stack, size);

ec->cfp = (void *)(ec->vm_stack + ec->vm_stack_size);

rb_vm_push_frame(ec,
NULL /* dummy iseq */,
VM_FRAME_MAGIC_DUMMY | VM_ENV_FLA B135 G_LOCAL | VM_FRAME_FLAG_FINISH | VM_FRAME_FLAG_CFRAME /* dummy frame */,
Qnil /* dummy self */, VM_BLOCK_HANDLER_NONE /* dummy block ptr */,
0 /* dummy cref/me */,
0 /* dummy pc */, ec->vm_stack, 0, 0
);
}

static void
th_init(rb_thread_t *th, VALUE self)
{
th->self = self;
rb_threadptr_root_fiber_setup(th);

{
/* vm_stack_size is word number.
* th->vm->default_params.thread_vm_stack_size is byte size. */
size_t size = th->vm->default_params.thread_vm_stack_size / sizeof(VALUE);
rb_ec_set_vm_stack(th->ec, rb_thread_recycle_stack(size), size);
if (self == 0) {
size_t size = th->vm->default_params.thread_vm_stack_size / sizeof(VALUE);
rb_ec_initialize_vm_stack(th->ec, ALLOC_N(VALUE, size), size);
} else {
VM_ASSERT(th->ec->cfp == NULL);
VM_ASSERT(th->ec->vm_stack == NULL);
VM_ASSERT(th->ec->vm_stack_size == 0);
}

th->ec->cfp = (void *)(th->ec->vm_stack + th->ec->vm_stack_size);

vm_push_frame(th->ec, 0 /* dummy iseq */, VM_FRAME_MAGIC_DUMMY | VM_ENV_FLAG_LOCAL | VM_FRAME_FLAG_FINISH | VM_FRAME_FLAG_CFRAME /* dummy frame */,
Qnil /* dummy self */, VM_BLOCK_HANDLER_NONE /* dummy block ptr */,
0 /* dummy cref/me */,
0 /* dummy pc */, th->ec->vm_stack, 0, 0);

th->status = THREAD_RUNNABLE;
th->last_status = Qnil;
th->ec->errinfo = Qnil;
Expand Down
Loading
0