Cache array length in rb_ary_join (#15362)

When all elements are strings, we never have to recalculate the length
of the array because there are no conversion methods that are called, so
the length will never change. This speeds up the fast path by ~10%.

```ruby
a = ["1"*10, "2"*10, "3"*10, "4"*10, "5"*10] * 10
10_000_000.times do
  a.join
end
```

```
hyperfine --warmup 1 'ruby ../ruby2/test.rb' './exe/ruby ../ruby2/test.rb'
Benchmark 1: ruby ../ruby2/test.rb
  Time (mean ± σ):      3.779 s ±  0.053 s    [User: 3.754 s, System: 0.017 s]
  Range (min … max):    3.715 s …  3.874 s    10 runs

Benchmark 2: ./exe/ruby ../ruby2/test.rb
  Time (mean ± σ):      3.411 s ±  0.038 s    [User: 3.387 s, System: 0.017 s]
  Range (min … max):    3.360 s …  3.472 s    10 runs

Summary
  ./exe/ruby ../ruby2/test.rb ran
    1.11 ± 0.02 times faster than ruby ../ruby2/test.rb
```
This commit is contained in:
Luke Gruber 2025-12-02 17:35:53 -05:00 committed by GitHub
parent a63147eed1
commit a211abbcbd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2025-12-02 22:36:22 +00:00
Merged-By: luke-gru <luke.gru@gmail.com>

35
array.c
View File

@ -2917,23 +2917,28 @@ rb_ary_join(VALUE ary, VALUE sep)
StringValue(sep);
len += RSTRING_LEN(sep) * (RARRAY_LEN(ary) - 1);
}
for (i=0; i<RARRAY_LEN(ary); i++) {
long len_memo = RARRAY_LEN(ary);
for (i=0; i < len_memo; i++) {
val = RARRAY_AREF(ary, i);
tmp = rb_check_string_type(val);
if (NIL_P(tmp) || tmp != val) {
int first;
long n = RARRAY_LEN(ary);
if (i > n) i = n;
result = rb_str_buf_new(len + (n-i)*10);
rb_enc_associate(result, rb_usascii_encoding());
i = ary_join_0(ary, sep, i, result);
first = i == 0;
ary_join_1(ary, ary, sep, i, result, &first);
return result;
if (RB_UNLIKELY(!RB_TYPE_P(val, T_STRING))) {
tmp = rb_check_string_type(val);
if (NIL_P(tmp) || tmp != val) {
int first;
long n = RARRAY_LEN(ary);
if (i > n) i = n;
result = rb_str_buf_new(len + (n-i)*10);
rb_enc_associate(result, rb_usascii_encoding());
i = ary_join_0(ary, sep, i, result);
first = i == 0;
ary_join_1(ary, ary, sep, i, result, &first);
return result;
}
len += RSTRING_LEN(tmp);
len_memo = RARRAY_LEN(ary);
}
else {
len += RSTRING_LEN(val);
}
len += RSTRING_LEN(tmp);
}
result = rb_str_new(0, len);