From cb302881629f997f403e705425f69e5f6b0741ac Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 22 Oct 2025 19:47:20 -0400 Subject: [PATCH] Fix memory leak of transcoding when fallback raises When the fallback function in transcode_loop raises, it will leak the memory in rb_econv_t. The following script reproduces the leak: 10.times do 100_000.times do "\ufffd".encode(Encoding::US_ASCII, fallback: proc { raise }) rescue end puts `ps -o rss= -p #{$$}` end Before: 451196 889980 1328508 1767676 2206460 2645372 3083900 3522428 3960956 4399484 After: 12508 12636 12892 12892 13148 13404 13532 13788 13916 13916 --- test/ruby/test_string.rb | 28 ++++++++++++++++++++++++++++ transcode.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 1e0f31ba7c..6445832798 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -3747,6 +3747,34 @@ CODE Warning[:deprecated] = deprecated end + def test_encode_fallback_raise_memory_leak + { + "hash" => <<~RUBY, + fallback = Hash.new { raise } + RUBY + "proc" => <<~RUBY, + fallback = proc { raise } + RUBY + "method" => <<~RUBY, + def my_method = raise + fallback = method(:my_method) + RUBY + "aref" => <<~RUBY, + fallback = Object.new + def fallback.[] = raise + RUBY + }.each do |type, code| + assert_no_memory_leak([], '', <<~RUBY, "fallback type is #{type}", rss: true) + #{code} + + 100_000.times do |i| + "\\ufffd".encode(Encoding::US_ASCII, fallback:) + rescue + end + RUBY + end + end + private def assert_bytesplice_result(expected, s, *args) diff --git a/transcode.c b/transcode.c index 072e1942b1..09ecb1c651 100644 --- a/transcode.c +++ b/transcode.c @@ -2349,6 +2349,20 @@ aref_fallback(VALUE fallback, VALUE c) return rb_funcallv_public(fallback, idAREF, 1, &c); } +struct transcode_loop_fallback_args { + VALUE (*fallback_func)(VALUE, VALUE); + VALUE fallback; + VALUE rep; +}; + +static VALUE +transcode_loop_fallback_try(VALUE a) +{ + struct transcode_loop_fallback_args *args = (struct transcode_loop_fallback_args *)a; + + return args->fallback_func(args->fallback, args->rep); +} + static void transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, const unsigned char *in_stop, unsigned char *out_stop, @@ -2398,7 +2412,21 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, (const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len, rb_enc_find(ec->last_error.source_encoding)); - rep = (*fallback_func)(fallback, rep); + + + struct transcode_loop_fallback_args args = { + .fallback_func = fallback_func, + .fallback = fallback, + .rep = rep, + }; + + int state; + rep = rb_protect(transcode_loop_fallback_try, (VALUE)&args, &state); + if (state) { + rb_econv_close(ec); + rb_jump_tag(state); + } + if (!UNDEF_P(rep) && !NIL_P(rep)) { StringValue(rep); ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),