8000 Dump with debugger before killing stuck worker by nobu · Pull Request #13618 · ruby/ruby · GitHub
[go: up one dir, main page]

Skip to content

Dump with debugger before killing stuck worker #13618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tool/lib/dump.gdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
set height 0
set width 0
set confirm off

echo \n>>> Threads\n\n
info threads

echo \n>>> Machine level backtrace\n\n
thread apply all info stack full

echo \n>>> Dump Ruby level backtrace (if possible)\n\n
call rb_vmdebug_stack_dump_all_threads()
call fflush(stderr)

echo ">>> Finish\n"
detach
quit
13 changes: 13 additions & 0 deletions tool/lib/dump.lldb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
script print("\n>>> Threads\n\n")
thread list

script print("\n>>> Machine level backtrace\n\n")
thread backtrace all

script print("\n>>> Dump Ruby level backtrace (if possible)\n\n")
call rb_vmdebug_stack_dump_all_threads()
call fflush(stderr)

script print(">>> Finish\n")
detach
quit
75 changes: 66 additions & 9 deletions tool/lib/envutil.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,70 @@ def timeout(sec, klass = nil, message = nil, &blk)
end
module_function :timeout

class Debugger
@list = []

attr_accessor :name

def self.register(name, &block)
@list << new(name, &block)
end

def initialize(name, &block)
@name = name
instance_eval(&block)
end

def usable?; false; end

def start(pid, *args) end

def dump(pid, timeout: 60, reprieve: timeout&.div(4))
dpid = start(pid, *command_file(File.join(__dir__, "dump.#{name}")))
rescue E 10000 rrno::ENOENT
return
else
return unless dpid
[[timeout, :TERM], [reprieve, :KILL]].find do |t, sig|
return EnvUtil.timeout(t) {Process.wait(dpid)}
rescue Timeout::Error
Process.kill(sig, dpid)
end
true
end

# sudo -n: --non-interactive
PRECOMMAND = (%[sudo -n] if /darwin/ =~ RUBY_PLATFORM)

def spawn(*args, **opts)
super(*PRECOMMAND, *args, **opts)
end

register("gdb") do
class << self
def usable?; system(*%w[gdb --batch --quiet --nx -ex exit]); end
def start(pid, *args)
spawn(*%w[gdb --batch --quiet --pid #{pid}], *args)
end
def command_file(file) "--command=#{file}"; end
end
end

register("lldb") do
class << self
def usable?; system(*%w[lldb -Q --no-lldbinit -o exit]); end
def start(pid, *args)
spawn(*%w[lldb --batch -Q --attach-pid #{pid}])
end
def command_file(file) ["--source", file]; end
end
end

def self.search
@debugger ||= @list.find(&:usable?)
end
end

def terminate(pid, signal = :TERM, pgroup = nil, reprieve = 1)
reprieve = apply_timeout_scale(reprieve) if reprieve

Expand All @@ -94,17 +158,10 @@ def terminate(pid, signal = :TERM, pgroup = nil, reprieve = 1)
pgroup = pid
end

lldb = true if /darwin/ =~ RUBY_PLATFORM

while signal = signals.shift

if lldb and [:ABRT, :KILL].include?(signal)
lldb = false
# sudo -n: --non-interactive
# lldb -p: attach
# -o: run command
system(*%W[sudo -n lldb -p #{pid} --batch -o bt\ all -o call\ rb_vmdebug_stack_dump_all_threads() -o quit])
true
if (dbg = Debugger.search) and [:ABRT, :KILL].include?(signal)
dbg.dump(pid)
end

begin
Expand Down
1 change: 1 addition & 0 deletions tool/lib/test/unit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ def quit(reason = :normal)
end

def kill
EnvUtil::Debugger.search&.dump(@pid)
signal = RUBY_PLATFORM =~ /mswin|mingw/ ? :KILL : :SEGV
Process.kill(signal, @pid)
warn "worker #{to_s} does not respond; #{signal} is sent"
Expand Down
Loading
3719
0