8000 [Bug #20228] Fix memory leak in Regexp timeout by peterzhu2118 · Pull Request #9765 · ruby/ruby · GitHub
[go: up one dir, main page]

Skip to content

[Bug #20228] Fix memory leak in Regexp timeout #9765

New issue

Have a question about thi 8000 s project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 56 additions & 15 deletions re.c
8000
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ static const char casetable[] = {
# error >>> "You lose. You will need a translation table for your character set." <<<
#endif

// The process-global timeout for regexp matching
rb_hrtime_t rb_reg_match_time_limit = 0;

int
rb_memcicmp(const void *x, const void *y, long len)
{
Expand Down Expand Up @@ -1732,6 +1735,23 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p
ONIG_OPTION_NONE);
}

struct rb_reg_onig_match_args {
VALUE re;
VALUE str;
struct reg_onig_search_args args;
struct re_registers regs;

OnigPosition result;
};

static VALUE
rb_reg_onig_match_try(VALUE value_args)
{
struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
ar 8000 gs->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
return Qnil;
}

/* returns byte offset */
static long
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
Expand All @@ -1742,22 +1762,38 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
return -1;
}

struct reg_onig_search_args args = {
.pos = pos,
.range = reverse ? 0 : len,
struct rb_reg_onig_match_args args = {
.re = re,
.str = str,
.args = {
.pos = pos,
.range = reverse ? 0 : len,
},
.regs = {0}
};

struct re_registers regs = {0};
/* If there is a timeout set, then rb_reg_onig_match could raise a
* Regexp::TimeoutError so we want to protect it from leaking memory. */
if (rb_reg_match_time_limit) {
int state;
rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
if (state) {
onig_region_free(&args.regs, false);
rb_jump_tag(state);
}
}
else {
rb_reg_onig_match_try((VALUE)&args);
}

OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, &regs);
if (result == ONIG_MISMATCH) {
if (args.result == ONIG_MISMATCH) {
rb_backref_set(Qnil);
return ONIG_MISMATCH;
}

VALUE match = match_alloc(rb_cMatch);
rb_matchext_t *rm = RMATCH_EXT(match);
rm->regs = regs;
rm->regs = args.regs;

if (set_backref_str) {
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
8000 Expand All @@ -1774,7 +1810,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
rb_backref_set(match);
if (set_match) *set_match = match;

return result;
return args.result;
}

long
Expand Down Expand Up @@ -4601,12 +4637,9 @@ re_warn(const char *s)
rb_warn("%s", s);
}

// The process-global timeout for regexp matching
rb_hrtime_t rb_reg_match_time_limit = 0;

// This function is periodically called during regexp matching
void
rb_reg_check_timeout(regex_t *reg, void *end_time_)
bool
rb_reg_timeout_p(regex_t *reg, void *end_time_)
{
rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;

Expand All @@ -4631,10 +4664,18 @@ rb_reg_check_timeout(regex_t *reg, void *end_time_)
}
else {
if (*end_time < rb_hrtime_now()) {
// timeout is exceeded
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
// Timeout has exceeded
return true;
}
}

return false;
}

void
rb_reg_raise_timeout(void)
{
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
}

/*
Expand Down
7 changes: 6 additions & 1 deletion regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -2293,7 +2293,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar *pkeep;
char *alloca_base;
char *xmalloc_base = NULL;
OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
OnigStackType *stkp; /* used as any purpose. */
OnigStackIndex si;
OnigStackIndex *repeat_stk;
Expand Down Expand Up @@ -4202,6 +4202,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_SAVE;
xfree(xmalloc_base);
return ONIGERR_UNEXPECTED_BYTECODE;

timeout:
xfree(xmalloc_base);
xfree(stk_base);
HANDLE_REG_TIMEOUT_IN_MATCH_AT;
}


Expand Down
20 changes: 13 additions & 7 deletions regint.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,18 @@
#ifdef RUBY

# define CHECK_INTERRUPT_IN_MATCH_AT do { \
msa->counter++; \
if (msa->counter >= 128) { \
msa->counter = 0; \
rb_reg_check_timeout(reg, &msa->end_time); \
rb_thread_check_ints(); \
} \
msa->counter++; \
if (msa->counter >= 128) { \
msa->counter = 0; \
if (rb_reg_timeout_p(reg, &msa->end_time)) { \
goto timeout; \
} \
rb_thread_check_ints(); \
} \
} while(0)
# define HANDLE_REG_TIMEOUT_IN_MATCH_AT do { \
rb_reg_raise_timeout(); \
} while (0)
# define onig_st_init_table st_init_table
# define onig_st_init_table_with_size st_init_table_with_size
# define onig_st_init_numtable st_init_numtable
Expand Down Expand Up @@ -996,7 +1001,8 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
#ifdef RUBY
extern size_t onig_memsize(const regex_t *reg);
extern size_t onig_region_memsize(const struct re_registers *regs);
void rb_reg_check_timeout(regex_t *reg, void *end_time);
bool rb_reg_timeout_p(regex_t *reg, void *end_time);
NORETURN(void rb_reg_raise_timeout(void));
#endif

RUBY_SYMBOL_EXPORT_END
Expand Down
17 changes: 17 additions & 0 deletions test/ruby/test_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,23 @@ def test_s_timeout_corner_cases
end;
end

def test_s_timeout_memory_leak
assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~"end;"}", "[Bug #20228]", rss: true)
Regexp.timeout = 0.001
regex = /^(a*)*$/
str = "a" * 1000000 + "x"

code = proc do
regex =~ str
rescue
end

10.times(&code)
begin;
1_000.times(&code)
end;
end

def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout)
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect }
Expand Down
0