Optimize File.join common use case

`File.join` is a hotspot for common libraries such as Zeitwerk
and Bootsnap. It has a fairly flexible signature, but 99% of
the time it's called with just two (or a small number of) UTF-8 strings.

If we optimistically optimize for that use case we can cut down a large
number of type and encoding checks, significantly speeding up the method.

The one remaining expensive check we could try to optimize is `str_null_check`.
Given it's common to use the same base string for joining, we could memoize it.
Also we could precompute it for literal strings.

```
compare-ruby: ruby 4.1.0dev (2026-01-17T14:40:03Z master 00a3b71eaf) +PRISM [arm64-darwin25]
built-ruby: ruby 4.1.0dev (2026-01-18T12:10:38Z spedup-file-join 069bab58d4) +PRISM [arm64-darwin25]
warming up....

|              |compare-ruby|built-ruby|
|:-------------|-----------:|---------:|
|two_strings   |      2.475M|    9.444M|
|              |           -|     3.82x|
|many_strings  |    551.975k|    2.346M|
|              |           -|     4.25x|
|array         |    514.946k|  522.034k|
|              |           -|     1.01x|
|mixed         |    621.236k|  633.189k|
|              |           -|     1.02x|
```
This commit is contained in:
Jean Boussier 2026-01-18 10:33:54 +01:00
parent d1dc4bdb2f
commit 6cd4549060
Notes: git 2026-01-18 15:32:02 +00:00
10 changed files with 197 additions and 39 deletions

7
benchmark/file_join.yml Normal file
View File

@ -0,0 +1,7 @@
prelude: |
# frozen_string_literal: true
benchmark:
two_strings: File.join(__FILE__, "path")
many_strings: File.join(__FILE__, "path", "a", "b", "c", "d")
array: File.join([__FILE__, "path", "a", "b", "c", "d"])
mixed: File.join(__FILE__, "path", "a", "b", ["c", "d"])

41
depend
View File

@ -799,6 +799,7 @@ box.$(OBJEXT): {$(VPATH)}constant.h
box.$(OBJEXT): {$(VPATH)}darray.h
box.$(OBJEXT): {$(VPATH)}debug_counter.h
box.$(OBJEXT): {$(VPATH)}defines.h
box.$(OBJEXT): {$(VPATH)}encindex.h
box.$(OBJEXT): {$(VPATH)}encoding.h
box.$(OBJEXT): {$(VPATH)}eval_intern.h
box.$(OBJEXT): {$(VPATH)}id.h
@ -1250,6 +1251,7 @@ class.$(OBJEXT): {$(VPATH)}config.h
class.$(OBJEXT): {$(VPATH)}constant.h
class.$(OBJEXT): {$(VPATH)}debug_counter.h
class.$(OBJEXT): {$(VPATH)}defines.h
class.$(OBJEXT): {$(VPATH)}encindex.h
class.$(OBJEXT): {$(VPATH)}encoding.h
class.$(OBJEXT): {$(VPATH)}id.h
class.$(OBJEXT): {$(VPATH)}id_table.h
@ -1449,6 +1451,7 @@ compar.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
compar.$(OBJEXT): {$(VPATH)}compar.c
compar.$(OBJEXT): {$(VPATH)}config.h
compar.$(OBJEXT): {$(VPATH)}defines.h
compar.$(OBJEXT): {$(VPATH)}encindex.h
compar.$(OBJEXT): {$(VPATH)}encoding.h
compar.$(OBJEXT): {$(VPATH)}id.h
compar.$(OBJEXT): {$(VPATH)}intern.h
@ -1921,6 +1924,7 @@ complex.$(OBJEXT): {$(VPATH)}config.h
complex.$(OBJEXT): {$(VPATH)}constant.h
complex.$(OBJEXT): {$(VPATH)}debug_counter.h
complex.$(OBJEXT): {$(VPATH)}defines.h
complex.$(OBJEXT): {$(VPATH)}encindex.h
complex.$(OBJEXT): {$(VPATH)}encoding.h
complex.$(OBJEXT): {$(VPATH)}id.h
complex.$(OBJEXT): {$(VPATH)}id_table.h
@ -2126,6 +2130,7 @@ concurrent_set.$(OBJEXT): {$(VPATH)}concurrent_set.c
concurrent_set.$(OBJEXT): {$(VPATH)}config.h
concurrent_set.$(OBJEXT): {$(VPATH)}debug_counter.h
concurrent_set.$(OBJEXT): {$(VPATH)}defines.h
concurrent_set.$(OBJEXT): {$(VPATH)}encindex.h
concurrent_set.$(OBJEXT): {$(VPATH)}encoding.h
concurrent_set.$(OBJEXT): {$(VPATH)}id.h
concurrent_set.$(OBJEXT): {$(VPATH)}id_table.h
@ -2364,6 +2369,7 @@ cont.$(OBJEXT): {$(VPATH)}constant.h
cont.$(OBJEXT): {$(VPATH)}cont.c
cont.$(OBJEXT): {$(VPATH)}debug_counter.h
cont.$(OBJEXT): {$(VPATH)}defines.h
cont.$(OBJEXT): {$(VPATH)}encindex.h
cont.$(OBJEXT): {$(VPATH)}encoding.h
cont.$(OBJEXT): {$(VPATH)}eval_intern.h
cont.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@ -4906,6 +4912,7 @@ enumerator.$(OBJEXT): {$(VPATH)}config.h
enumerator.$(OBJEXT): {$(VPATH)}constant.h
enumerator.$(OBJEXT): {$(VPATH)}debug_counter.h
enumerator.$(OBJEXT): {$(VPATH)}defines.h
enumerator.$(OBJEXT): {$(VPATH)}encindex.h
enumerator.$(OBJEXT): {$(VPATH)}encoding.h
enumerator.$(OBJEXT): {$(VPATH)}enumerator.c
enumerator.$(OBJEXT): {$(VPATH)}id.h
@ -5126,6 +5133,7 @@ error.$(OBJEXT): {$(VPATH)}config.h
error.$(OBJEXT): {$(VPATH)}constant.h
error.$(OBJEXT): {$(VPATH)}debug_counter.h
error.$(OBJEXT): {$(VPATH)}defines.h
error.$(OBJEXT): {$(VPATH)}encindex.h
error.$(OBJEXT): {$(VPATH)}encoding.h
error.$(OBJEXT): {$(VPATH)}error.c
error.$(OBJEXT): {$(VPATH)}id.h
@ -5373,6 +5381,7 @@ eval.$(OBJEXT): {$(VPATH)}config.h
eval.$(OBJEXT): {$(VPATH)}constant.h
eval.$(OBJEXT): {$(VPATH)}debug_counter.h
eval.$(OBJEXT): {$(VPATH)}defines.h
eval.$(OBJEXT): {$(VPATH)}encindex.h
eval.$(OBJEXT): {$(VPATH)}encoding.h
eval.$(OBJEXT): {$(VPATH)}eval.c
eval.$(OBJEXT): {$(VPATH)}eval_error.c
@ -5584,6 +5593,7 @@ file.$(OBJEXT): $(top_srcdir)/internal/array.h
file.$(OBJEXT): $(top_srcdir)/internal/class.h
file.$(OBJEXT): $(top_srcdir)/internal/compilers.h
file.$(OBJEXT): $(top_srcdir)/internal/dir.h
file.$(OBJEXT): $(top_srcdir)/internal/encoding.h
file.$(OBJEXT): $(top_srcdir)/internal/error.h
file.$(OBJEXT): $(top_srcdir)/internal/file.h
file.$(OBJEXT): $(top_srcdir)/internal/gc.h
@ -5865,6 +5875,7 @@ gc.$(OBJEXT): {$(VPATH)}darray.h
gc.$(OBJEXT): {$(VPATH)}debug.h
gc.$(OBJEXT): {$(VPATH)}debug_counter.h
gc.$(OBJEXT): {$(VPATH)}defines.h
gc.$(OBJEXT): {$(VPATH)}encindex.h
gc.$(OBJEXT): {$(VPATH)}encoding.h
gc.$(OBJEXT): {$(VPATH)}eval_intern.h
gc.$(OBJEXT): {$(VPATH)}gc.c
@ -6373,6 +6384,7 @@ hash.$(OBJEXT): {$(VPATH)}config.h
hash.$(OBJEXT): {$(VPATH)}constant.h
hash.$(OBJEXT): {$(VPATH)}debug_counter.h
hash.$(OBJEXT): {$(VPATH)}defines.h
hash.$(OBJEXT): {$(VPATH)}encindex.h
hash.$(OBJEXT): {$(VPATH)}encoding.h
hash.$(OBJEXT): {$(VPATH)}hash.c
hash.$(OBJEXT): {$(VPATH)}hash.rbinc
@ -7203,6 +7215,7 @@ io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
io_buffer.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
io_buffer.$(OBJEXT): {$(VPATH)}config.h
io_buffer.$(OBJEXT): {$(VPATH)}defines.h
io_buffer.$(OBJEXT): {$(VPATH)}encindex.h
io_buffer.$(OBJEXT): {$(VPATH)}encoding.h
io_buffer.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
io_buffer.$(OBJEXT): {$(VPATH)}id.h
@ -7454,6 +7467,7 @@ iseq.$(OBJEXT): {$(VPATH)}config.h
iseq.$(OBJEXT): {$(VPATH)}constant.h
iseq.$(OBJEXT): {$(VPATH)}debug_counter.h
iseq.$(OBJEXT): {$(VPATH)}defines.h
iseq.$(OBJEXT): {$(VPATH)}encindex.h
iseq.$(OBJEXT): {$(VPATH)}encoding.h
iseq.$(OBJEXT): {$(VPATH)}eval_intern.h
iseq.$(OBJEXT): {$(VPATH)}id.h
@ -7702,6 +7716,7 @@ jit.$(OBJEXT): {$(VPATH)}config.h
jit.$(OBJEXT): {$(VPATH)}constant.h
jit.$(OBJEXT): {$(VPATH)}debug_counter.h
jit.$(OBJEXT): {$(VPATH)}defines.h
jit.$(OBJEXT): {$(VPATH)}encindex.h
jit.$(OBJEXT): {$(VPATH)}encoding.h
jit.$(OBJEXT): {$(VPATH)}id.h
jit.$(OBJEXT): {$(VPATH)}id_table.h
@ -7956,6 +7971,7 @@ load.$(OBJEXT): {$(VPATH)}constant.h
load.$(OBJEXT): {$(VPATH)}darray.h
load.$(OBJEXT): {$(VPATH)}defines.h
load.$(OBJEXT): {$(VPATH)}dln.h
load.$(OBJEXT): {$(VPATH)}encindex.h
load.$(OBJEXT): {$(VPATH)}encoding.h
load.$(OBJEXT): {$(VPATH)}eval_intern.h
load.$(OBJEXT): {$(VPATH)}id.h
@ -9979,6 +9995,7 @@ numeric.$(OBJEXT): {$(VPATH)}builtin.h
numeric.$(OBJEXT): {$(VPATH)}config.h
numeric.$(OBJEXT): {$(VPATH)}constant.h
numeric.$(OBJEXT): {$(VPATH)}defines.h
numeric.$(OBJEXT): {$(VPATH)}encindex.h
numeric.$(OBJEXT): {$(VPATH)}encoding.h
numeric.$(OBJEXT): {$(VPATH)}id.h
numeric.$(OBJEXT): {$(VPATH)}id_table.h
@ -10200,6 +10217,7 @@ object.$(OBJEXT): {$(VPATH)}config.h
object.$(OBJEXT): {$(VPATH)}constant.h
object.$(OBJEXT): {$(VPATH)}debug_counter.h
object.$(OBJEXT): {$(VPATH)}defines.h
object.$(OBJEXT): {$(VPATH)}encindex.h
object.$(OBJEXT): {$(VPATH)}encoding.h
object.$(OBJEXT): {$(VPATH)}id.h
object.$(OBJEXT): {$(VPATH)}id_table.h
@ -10418,6 +10436,7 @@ pack.$(OBJEXT): {$(VPATH)}builtin.h
pack.$(OBJEXT): {$(VPATH)}config.h
pack.$(OBJEXT): {$(VPATH)}constant.h
pack.$(OBJEXT): {$(VPATH)}defines.h
pack.$(OBJEXT): {$(VPATH)}encindex.h
pack.$(OBJEXT): {$(VPATH)}encoding.h
pack.$(OBJEXT): {$(VPATH)}id.h
pack.$(OBJEXT): {$(VPATH)}id_table.h
@ -10644,6 +10663,7 @@ parse.$(OBJEXT): {$(VPATH)}config.h
parse.$(OBJEXT): {$(VPATH)}constant.h
parse.$(OBJEXT): {$(VPATH)}defines.h
parse.$(OBJEXT): {$(VPATH)}defs/keywords
parse.$(OBJEXT): {$(VPATH)}encindex.h
parse.$(OBJEXT): {$(VPATH)}encoding.h
parse.$(OBJEXT): {$(VPATH)}id.h
parse.$(OBJEXT): {$(VPATH)}id_table.h
@ -12125,6 +12145,7 @@ proc.$(OBJEXT): {$(VPATH)}config.h
proc.$(OBJEXT): {$(VPATH)}constant.h
proc.$(OBJEXT): {$(VPATH)}debug_counter.h
proc.$(OBJEXT): {$(VPATH)}defines.h
proc.$(OBJEXT): {$(VPATH)}encindex.h
proc.$(OBJEXT): {$(VPATH)}encoding.h
proc.$(OBJEXT): {$(VPATH)}eval_intern.h
proc.$(OBJEXT): {$(VPATH)}id.h
@ -12356,6 +12377,7 @@ process.$(OBJEXT): {$(VPATH)}constant.h
process.$(OBJEXT): {$(VPATH)}debug_counter.h
process.$(OBJEXT): {$(VPATH)}defines.h
process.$(OBJEXT): {$(VPATH)}dln.h
process.$(OBJEXT): {$(VPATH)}encindex.h
process.$(OBJEXT): {$(VPATH)}encoding.h
process.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
process.$(OBJEXT): {$(VPATH)}hrtime.h
@ -12585,6 +12607,7 @@ ractor.$(OBJEXT): {$(VPATH)}config.h
ractor.$(OBJEXT): {$(VPATH)}constant.h
ractor.$(OBJEXT): {$(VPATH)}debug_counter.h
ractor.$(OBJEXT): {$(VPATH)}defines.h
ractor.$(OBJEXT): {$(VPATH)}encindex.h
ractor.$(OBJEXT): {$(VPATH)}encoding.h
ractor.$(OBJEXT): {$(VPATH)}eval_intern.h
ractor.$(OBJEXT): {$(VPATH)}id.h
@ -13018,6 +13041,7 @@ range.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
range.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
range.$(OBJEXT): {$(VPATH)}config.h
range.$(OBJEXT): {$(VPATH)}defines.h
range.$(OBJEXT): {$(VPATH)}encindex.h
range.$(OBJEXT): {$(VPATH)}encoding.h
range.$(OBJEXT): {$(VPATH)}id.h
range.$(OBJEXT): {$(VPATH)}id_table.h
@ -14688,6 +14712,7 @@ ruby.$(OBJEXT): {$(VPATH)}constant.h
ruby.$(OBJEXT): {$(VPATH)}debug_counter.h
ruby.$(OBJEXT): {$(VPATH)}defines.h
ruby.$(OBJEXT): {$(VPATH)}dln.h
ruby.$(OBJEXT): {$(VPATH)}encindex.h
ruby.$(OBJEXT): {$(VPATH)}encoding.h
ruby.$(OBJEXT): {$(VPATH)}eval_intern.h
ruby.$(OBJEXT): {$(VPATH)}id.h
@ -14896,6 +14921,7 @@ ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
ruby_parser.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
ruby_parser.$(OBJEXT): {$(VPATH)}config.h
ruby_parser.$(OBJEXT): {$(VPATH)}defines.h
ruby_parser.$(OBJEXT): {$(VPATH)}encindex.h
ruby_parser.$(OBJEXT): {$(VPATH)}encoding.h
ruby_parser.$(OBJEXT): {$(VPATH)}intern.h
ruby_parser.$(OBJEXT): {$(VPATH)}internal.h
@ -15306,6 +15332,7 @@ set.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
set.$(OBJEXT): {$(VPATH)}config.h
set.$(OBJEXT): {$(VPATH)}constant.h
set.$(OBJEXT): {$(VPATH)}defines.h
set.$(OBJEXT): {$(VPATH)}encindex.h
set.$(OBJEXT): {$(VPATH)}encoding.h
set.$(OBJEXT): {$(VPATH)}id.h
set.$(OBJEXT): {$(VPATH)}id_table.h
@ -15678,6 +15705,7 @@ shape.$(OBJEXT): {$(VPATH)}config.h
shape.$(OBJEXT): {$(VPATH)}constant.h
shape.$(OBJEXT): {$(VPATH)}debug_counter.h
shape.$(OBJEXT): {$(VPATH)}defines.h
shape.$(OBJEXT): {$(VPATH)}encindex.h
shape.$(OBJEXT): {$(VPATH)}encoding.h
shape.$(OBJEXT): {$(VPATH)}id.h
shape.$(OBJEXT): {$(VPATH)}id_table.h
@ -15892,6 +15920,7 @@ signal.$(OBJEXT): {$(VPATH)}config.h
signal.$(OBJEXT): {$(VPATH)}constant.h
signal.$(OBJEXT): {$(VPATH)}debug_counter.h
signal.$(OBJEXT): {$(VPATH)}defines.h
signal.$(OBJEXT): {$(VPATH)}encindex.h
signal.$(OBJEXT): {$(VPATH)}encoding.h
signal.$(OBJEXT): {$(VPATH)}eval_intern.h
signal.$(OBJEXT): {$(VPATH)}id.h
@ -16101,6 +16130,7 @@ sprintf.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
sprintf.$(OBJEXT): {$(VPATH)}config.h
sprintf.$(OBJEXT): {$(VPATH)}constant.h
sprintf.$(OBJEXT): {$(VPATH)}defines.h
sprintf.$(OBJEXT): {$(VPATH)}encindex.h
sprintf.$(OBJEXT): {$(VPATH)}encoding.h
sprintf.$(OBJEXT): {$(VPATH)}id.h
sprintf.$(OBJEXT): {$(VPATH)}id_table.h
@ -16457,6 +16487,7 @@ strftime.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
strftime.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
strftime.$(OBJEXT): {$(VPATH)}config.h
strftime.$(OBJEXT): {$(VPATH)}defines.h
strftime.$(OBJEXT): {$(VPATH)}encindex.h
strftime.$(OBJEXT): {$(VPATH)}encoding.h
strftime.$(OBJEXT): {$(VPATH)}intern.h
strftime.$(OBJEXT): {$(VPATH)}internal.h
@ -16925,6 +16956,7 @@ struct.$(OBJEXT): {$(VPATH)}config.h
struct.$(OBJEXT): {$(VPATH)}constant.h
struct.$(OBJEXT): {$(VPATH)}debug_counter.h
struct.$(OBJEXT): {$(VPATH)}defines.h
struct.$(OBJEXT): {$(VPATH)}encindex.h
struct.$(OBJEXT): {$(VPATH)}encoding.h
struct.$(OBJEXT): {$(VPATH)}id.h
struct.$(OBJEXT): {$(VPATH)}id_table.h
@ -17141,6 +17173,7 @@ symbol.$(OBJEXT): {$(VPATH)}constant.h
symbol.$(OBJEXT): {$(VPATH)}darray.h
symbol.$(OBJEXT): {$(VPATH)}debug_counter.h
symbol.$(OBJEXT): {$(VPATH)}defines.h
symbol.$(OBJEXT): {$(VPATH)}encindex.h
symbol.$(OBJEXT): {$(VPATH)}encoding.h
symbol.$(OBJEXT): {$(VPATH)}id.c
symbol.$(OBJEXT): {$(VPATH)}id.h
@ -17398,6 +17431,7 @@ thread.$(OBJEXT): {$(VPATH)}constant.h
thread.$(OBJEXT): {$(VPATH)}debug.h
thread.$(OBJEXT): {$(VPATH)}debug_counter.h
thread.$(OBJEXT): {$(VPATH)}defines.h
thread.$(OBJEXT): {$(VPATH)}encindex.h
thread.$(OBJEXT): {$(VPATH)}encoding.h
thread.$(OBJEXT): {$(VPATH)}eval_intern.h
thread.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@ -17628,6 +17662,7 @@ time.$(OBJEXT): {$(VPATH)}builtin.h
time.$(OBJEXT): {$(VPATH)}config.h
time.$(OBJEXT): {$(VPATH)}constant.h
time.$(OBJEXT): {$(VPATH)}defines.h
time.$(OBJEXT): {$(VPATH)}encindex.h
time.$(OBJEXT): {$(VPATH)}encoding.h
time.$(OBJEXT): {$(VPATH)}id.h
time.$(OBJEXT): {$(VPATH)}id_table.h
@ -17830,6 +17865,7 @@ transcode.$(OBJEXT): {$(VPATH)}config.h
transcode.$(OBJEXT): {$(VPATH)}constant.h
transcode.$(OBJEXT): {$(VPATH)}debug_counter.h
transcode.$(OBJEXT): {$(VPATH)}defines.h
transcode.$(OBJEXT): {$(VPATH)}encindex.h
transcode.$(OBJEXT): {$(VPATH)}encoding.h
transcode.$(OBJEXT): {$(VPATH)}id.h
transcode.$(OBJEXT): {$(VPATH)}id_table.h
@ -18211,6 +18247,7 @@ variable.$(OBJEXT): {$(VPATH)}config.h
variable.$(OBJEXT): {$(VPATH)}constant.h
variable.$(OBJEXT): {$(VPATH)}debug_counter.h
variable.$(OBJEXT): {$(VPATH)}defines.h
variable.$(OBJEXT): {$(VPATH)}encindex.h
variable.$(OBJEXT): {$(VPATH)}encoding.h
variable.$(OBJEXT): {$(VPATH)}id.h
variable.$(OBJEXT): {$(VPATH)}id_table.h
@ -18687,6 +18724,7 @@ vm.$(OBJEXT): {$(VPATH)}constant.h
vm.$(OBJEXT): {$(VPATH)}debug_counter.h
vm.$(OBJEXT): {$(VPATH)}defines.h
vm.$(OBJEXT): {$(VPATH)}defs/opt_operand.def
vm.$(OBJEXT): {$(VPATH)}encindex.h
vm.$(OBJEXT): {$(VPATH)}encoding.h
vm.$(OBJEXT): {$(VPATH)}eval_intern.h
vm.$(OBJEXT): {$(VPATH)}id.h
@ -18951,6 +18989,7 @@ vm_backtrace.$(OBJEXT): {$(VPATH)}constant.h
vm_backtrace.$(OBJEXT): {$(VPATH)}debug.h
vm_backtrace.$(OBJEXT): {$(VPATH)}debug_counter.h
vm_backtrace.$(OBJEXT): {$(VPATH)}defines.h
vm_backtrace.$(OBJEXT): {$(VPATH)}encindex.h
vm_backtrace.$(OBJEXT): {$(VPATH)}encoding.h
vm_backtrace.$(OBJEXT): {$(VPATH)}eval_intern.h
vm_backtrace.$(OBJEXT): {$(VPATH)}id.h
@ -20087,6 +20126,7 @@ yjit.$(OBJEXT): {$(VPATH)}constant.h
yjit.$(OBJEXT): {$(VPATH)}debug.h
yjit.$(OBJEXT): {$(VPATH)}debug_counter.h
yjit.$(OBJEXT): {$(VPATH)}defines.h
yjit.$(OBJEXT): {$(VPATH)}encindex.h
yjit.$(OBJEXT): {$(VPATH)}encoding.h
yjit.$(OBJEXT): {$(VPATH)}id.h
yjit.$(OBJEXT): {$(VPATH)}id_table.h
@ -20342,6 +20382,7 @@ zjit.$(OBJEXT): {$(VPATH)}constant.h
zjit.$(OBJEXT): {$(VPATH)}debug.h
zjit.$(OBJEXT): {$(VPATH)}debug_counter.h
zjit.$(OBJEXT): {$(VPATH)}defines.h
zjit.$(OBJEXT): {$(VPATH)}encindex.h
zjit.$(OBJEXT): {$(VPATH)}encoding.h
zjit.$(OBJEXT): {$(VPATH)}id.h
zjit.$(OBJEXT): {$(VPATH)}id_table.h

View File

@ -172,6 +172,7 @@ stack.o: $(hdrdir)/ruby/oniguruma.h
stack.o: $(hdrdir)/ruby/ruby.h
stack.o: $(hdrdir)/ruby/st.h
stack.o: $(hdrdir)/ruby/subst.h
stack.o: $(top_srcdir)/encindex.h
stack.o: $(top_srcdir)/internal/compilers.h
stack.o: $(top_srcdir)/internal/string.h
stack.o: stack.c

View File

@ -172,6 +172,7 @@ capacity.o: $(hdrdir)/ruby/oniguruma.h
capacity.o: $(hdrdir)/ruby/ruby.h
capacity.o: $(hdrdir)/ruby/st.h
capacity.o: $(hdrdir)/ruby/subst.h
capacity.o: $(top_srcdir)/encindex.h
capacity.o: $(top_srcdir)/internal/compilers.h
capacity.o: $(top_srcdir)/internal/string.h
capacity.o: capacity.c
@ -679,6 +680,7 @@ cstr.o: $(hdrdir)/ruby/oniguruma.h
cstr.o: $(hdrdir)/ruby/ruby.h
cstr.o: $(hdrdir)/ruby/st.h
cstr.o: $(hdrdir)/ruby/subst.h
cstr.o: $(top_srcdir)/encindex.h
cstr.o: $(top_srcdir)/internal.h
cstr.o: $(top_srcdir)/internal/compilers.h
cstr.o: $(top_srcdir)/internal/string.h
@ -1535,6 +1537,7 @@ fstring.o: $(hdrdir)/ruby/oniguruma.h
fstring.o: $(hdrdir)/ruby/ruby.h
fstring.o: $(hdrdir)/ruby/st.h
fstring.o: $(hdrdir)/ruby/subst.h
fstring.o: $(top_srcdir)/encindex.h
fstring.o: $(top_srcdir)/internal/compilers.h
fstring.o: $(top_srcdir)/internal/string.h
fstring.o: fstring.c

View File

@ -602,6 +602,7 @@ objspace_dump.o: $(top_srcdir)/ccan/list/list.h
objspace_dump.o: $(top_srcdir)/ccan/str/str.h
objspace_dump.o: $(top_srcdir)/constant.h
objspace_dump.o: $(top_srcdir)/debug_counter.h
objspace_dump.o: $(top_srcdir)/encindex.h
objspace_dump.o: $(top_srcdir)/id_table.h
objspace_dump.o: $(top_srcdir)/internal.h
objspace_dump.o: $(top_srcdir)/internal/array.h

View File

@ -578,6 +578,7 @@ ripper.o: $(top_srcdir)/ccan/container_of/container_of.h
ripper.o: $(top_srcdir)/ccan/list/list.h
ripper.o: $(top_srcdir)/ccan/str/str.h
ripper.o: $(top_srcdir)/constant.h
ripper.o: $(top_srcdir)/encindex.h
ripper.o: $(top_srcdir)/id_table.h
ripper.o: $(top_srcdir)/internal.h
ripper.o: $(top_srcdir)/internal/array.h

View File

@ -193,6 +193,7 @@ ancdata.o: $(top_srcdir)/ccan/check_type/check_type.h
ancdata.o: $(top_srcdir)/ccan/container_of/container_of.h
ancdata.o: $(top_srcdir)/ccan/list/list.h
ancdata.o: $(top_srcdir)/ccan/str/str.h
ancdata.o: $(top_srcdir)/encindex.h
ancdata.o: $(top_srcdir)/id_table.h
ancdata.o: $(top_srcdir)/internal.h
ancdata.o: $(top_srcdir)/internal/array.h
@ -408,6 +409,7 @@ basicsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
basicsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
basicsocket.o: $(top_srcdir)/ccan/list/list.h
basicsocket.o: $(top_srcdir)/ccan/str/str.h
basicsocket.o: $(top_srcdir)/encindex.h
basicsocket.o: $(top_srcdir)/id_table.h
basicsocket.o: $(top_srcdir)/internal.h
basicsocket.o: $(top_srcdir)/internal/array.h
@ -623,6 +625,7 @@ constants.o: $(top_srcdir)/ccan/check_type/check_type.h
constants.o: $(top_srcdir)/ccan/container_of/container_of.h
constants.o: $(top_srcdir)/ccan/list/list.h
constants.o: $(top_srcdir)/ccan/str/str.h
constants.o: $(top_srcdir)/encindex.h
constants.o: $(top_srcdir)/id_table.h
constants.o: $(top_srcdir)/internal.h
constants.o: $(top_srcdir)/internal/array.h
@ -839,6 +842,7 @@ ifaddr.o: $(top_srcdir)/ccan/check_type/check_type.h
ifaddr.o: $(top_srcdir)/ccan/container_of/container_of.h
ifaddr.o: $(top_srcdir)/ccan/list/list.h
ifaddr.o: $(top_srcdir)/ccan/str/str.h
ifaddr.o: $(top_srcdir)/encindex.h
ifaddr.o: $(top_srcdir)/id_table.h
ifaddr.o: $(top_srcdir)/internal.h
ifaddr.o: $(top_srcdir)/internal/array.h
@ -1054,6 +1058,7 @@ init.o: $(top_srcdir)/ccan/check_type/check_type.h
init.o: $(top_srcdir)/ccan/container_of/container_of.h
init.o: $(top_srcdir)/ccan/list/list.h
init.o: $(top_srcdir)/ccan/str/str.h
init.o: $(top_srcdir)/encindex.h
init.o: $(top_srcdir)/id_table.h
init.o: $(top_srcdir)/internal.h
init.o: $(top_srcdir)/internal/array.h
@ -1269,6 +1274,7 @@ ipsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
ipsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
ipsocket.o: $(top_srcdir)/ccan/list/list.h
ipsocket.o: $(top_srcdir)/ccan/str/str.h
ipsocket.o: $(top_srcdir)/encindex.h
ipsocket.o: $(top_srcdir)/id_table.h
ipsocket.o: $(top_srcdir)/internal.h
ipsocket.o: $(top_srcdir)/internal/array.h
@ -1484,6 +1490,7 @@ option.o: $(top_srcdir)/ccan/check_type/check_type.h
option.o: $(top_srcdir)/ccan/container_of/container_of.h
option.o: $(top_srcdir)/ccan/list/list.h
option.o: $(top_srcdir)/ccan/str/str.h
option.o: $(top_srcdir)/encindex.h
option.o: $(top_srcdir)/id_table.h
option.o: $(top_srcdir)/internal.h
option.o: $(top_srcdir)/internal/array.h
@ -1699,6 +1706,7 @@ raddrinfo.o: $(top_srcdir)/ccan/check_type/check_type.h
raddrinfo.o: $(top_srcdir)/ccan/container_of/container_of.h
raddrinfo.o: $(top_srcdir)/ccan/list/list.h
raddrinfo.o: $(top_srcdir)/ccan/str/str.h
raddrinfo.o: $(top_srcdir)/encindex.h
raddrinfo.o: $(top_srcdir)/id_table.h
raddrinfo.o: $(top_srcdir)/internal.h
raddrinfo.o: $(top_srcdir)/internal/array.h
@ -1914,6 +1922,7 @@ socket.o: $(top_srcdir)/ccan/check_type/check_type.h
socket.o: $(top_srcdir)/ccan/container_of/container_of.h
socket.o: $(top_srcdir)/ccan/list/list.h
socket.o: $(top_srcdir)/ccan/str/str.h
socket.o: $(top_srcdir)/encindex.h
socket.o: $(top_srcdir)/id_table.h
socket.o: $(top_srcdir)/internal.h
socket.o: $(top_srcdir)/internal/array.h
@ -2129,6 +2138,7 @@ sockssocket.o: $(top_srcdir)/ccan/check_type/check_type.h
sockssocket.o: $(top_srcdir)/ccan/container_of/container_of.h
sockssocket.o: $(top_srcdir)/ccan/list/list.h
sockssocket.o: $(top_srcdir)/ccan/str/str.h
sockssocket.o: $(top_srcdir)/encindex.h
sockssocket.o: $(top_srcdir)/id_table.h
sockssocket.o: $(top_srcdir)/internal.h
sockssocket.o: $(top_srcdir)/internal/array.h
@ -2344,6 +2354,7 @@ tcpserver.o: $(top_srcdir)/ccan/check_type/check_type.h
tcpserver.o: $(top_srcdir)/ccan/container_of/container_of.h
tcpserver.o: $(top_srcdir)/ccan/list/list.h
tcpserver.o: $(top_srcdir)/ccan/str/str.h
tcpserver.o: $(top_srcdir)/encindex.h
tcpserver.o: $(top_srcdir)/id_table.h
tcpserver.o: $(top_srcdir)/internal.h
tcpserver.o: $(top_srcdir)/internal/array.h
@ -2559,6 +2570,7 @@ tcpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
tcpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
tcpsocket.o: $(top_srcdir)/ccan/list/list.h
tcpsocket.o: $(top_srcdir)/ccan/str/str.h
tcpsocket.o: $(top_srcdir)/encindex.h
tcpsocket.o: $(top_srcdir)/id_table.h
tcpsocket.o: $(top_srcdir)/internal.h
tcpsocket.o: $(top_srcdir)/internal/array.h
@ -2774,6 +2786,7 @@ udpsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
udpsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
udpsocket.o: $(top_srcdir)/ccan/list/list.h
udpsocket.o: $(top_srcdir)/ccan/str/str.h
udpsocket.o: $(top_srcdir)/encindex.h
udpsocket.o: $(top_srcdir)/id_table.h
udpsocket.o: $(top_srcdir)/internal.h
udpsocket.o: $(top_srcdir)/internal/array.h
@ -2989,6 +3002,7 @@ unixserver.o: $(top_srcdir)/ccan/check_type/check_type.h
unixserver.o: $(top_srcdir)/ccan/container_of/container_of.h
unixserver.o: $(top_srcdir)/ccan/list/list.h
unixserver.o: $(top_srcdir)/ccan/str/str.h
unixserver.o: $(top_srcdir)/encindex.h
unixserver.o: $(top_srcdir)/id_table.h
unixserver.o: $(top_srcdir)/internal.h
unixserver.o: $(top_srcdir)/internal/array.h
@ -3204,6 +3218,7 @@ unixsocket.o: $(top_srcdir)/ccan/check_type/check_type.h
unixsocket.o: $(top_srcdir)/ccan/container_of/container_of.h
unixsocket.o: $(top_srcdir)/ccan/list/list.h
unixsocket.o: $(top_srcdir)/ccan/str/str.h
unixsocket.o: $(top_srcdir)/encindex.h
unixsocket.o: $(top_srcdir)/id_table.h
unixsocket.o: $(top_srcdir)/internal.h
unixsocket.o: $(top_srcdir)/internal/array.h

99
file.c
View File

@ -169,6 +169,7 @@ typedef struct timespec stat_timestamp;
#include "internal.h"
#include "internal/compilers.h"
#include "internal/dir.h"
#include "internal/encoding.h"
#include "internal/error.h"
#include "internal/file.h"
#include "internal/io.h"
@ -3713,6 +3714,22 @@ chompdirsep(const char *path, const char *end, rb_encoding *enc)
return (char *)path;
}
static char *
single_byte_chompdirsep(const char *path, const char *end)
{
while (path < end) {
if (isdirsep(*path)) {
const char *last = path++;
while (path < end && isdirsep(*path)) path++;
if (path >= end) return (char *)last;
}
else {
path++;
}
}
return (char *)path;
}
char *
rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
{
@ -3723,7 +3740,7 @@ rb_enc_path_end(const char *path, const char *end, rb_encoding *enc)
static rb_encoding *
fs_enc_check(VALUE path1, VALUE path2)
{
rb_encoding *enc = rb_enc_check(path1, path2);
rb_encoding *enc = rb_enc_check_str(path1, path2);
int encidx = rb_enc_to_index(enc);
if (encidx == ENCINDEX_US_ASCII) {
encidx = rb_enc_get_index(path1);
@ -4651,7 +4668,7 @@ rb_check_realpath_emulate(VALUE basedir, VALUE path, rb_encoding *origenc, enum
return resolved;
}
static VALUE rb_file_join(VALUE ary);
static VALUE rb_file_join(long argc, VALUE *args);
#ifndef HAVE_REALPATH
static VALUE
@ -4692,7 +4709,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, rb_encoding *origenc, enum
unresolved_path = rb_str_dup_frozen(path);
if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) {
unresolved_path = rb_file_join(rb_assoc_new(basedir, unresolved_path));
VALUE paths[2] = {basedir, unresolved_path};
unresolved_path = rb_file_join(2, paths);
}
if (origenc) unresolved_path = TO_OSPATH(unresolved_path);
@ -5255,15 +5273,17 @@ rb_file_s_split(VALUE klass, VALUE path)
return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path,Qundef));
}
static VALUE rb_file_join_ary(VALUE ary);
static VALUE
file_inspect_join(VALUE ary, VALUE arg, int recur)
{
if (recur || ary == arg) rb_raise(rb_eArgError, "recursive array");
return rb_file_join(arg);
return rb_file_join_ary(arg);
}
static VALUE
rb_file_join(VALUE ary)
rb_file_join_ary(VALUE ary)
{
long len, i;
VALUE result, tmp;
@ -5328,6 +5348,69 @@ rb_file_join(VALUE ary)
return result;
}
static inline VALUE
rb_file_join_fastpath(long argc, VALUE *args)
{
long size = argc;
long i;
for (i = 0; i < argc; i++) {
VALUE tmp = args[i];
if (RB_LIKELY(RB_TYPE_P(tmp, T_STRING) && rb_str_enc_fastpath(tmp))) {
size += RSTRING_LEN(tmp);
}
else {
return 0;
}
}
VALUE result = rb_str_buf_new(size);
StringValueCStr(args[0]);
int encidx = ENCODING_GET_INLINED(args[0]);
ENCODING_SET_INLINED(result, encidx);
rb_str_buf_append(result, args[0]);
const char *name = RSTRING_PTR(result);
for (i = 1; i < argc; i++) {
VALUE tmp = args[i];
StringValueCStr(tmp);
long len = RSTRING_LEN(result);
const char *tail = single_byte_chompdirsep(name, name + len);
if (RSTRING_PTR(tmp) && isdirsep(RSTRING_PTR(tmp)[0])) {
rb_str_set_len(result, tail - name);
}
else if (!*tail) {
rb_str_cat(result, "/", 1);
}
if (RB_UNLIKELY(ENCODING_GET_INLINED(tmp) != encidx)) {
rb_encoding *new_enc = fs_enc_check(result, tmp);
rb_enc_associate(result, new_enc);
encidx = rb_enc_to_index(new_enc);
}
rb_str_buf_append(result, tmp);
}
return result;
}
static inline VALUE
rb_file_join(long argc, VALUE *args)
{
if (RB_UNLIKELY(argc == 0)) {
return rb_str_new(0, 0);
}
VALUE result = rb_file_join_fastpath(argc, args);
if (RB_LIKELY(result)) {
return result;
}
return rb_file_join_ary(rb_ary_new_from_values(argc, args));
}
/*
* call-seq:
* File.join(string, ...) -> string
@ -5340,9 +5423,9 @@ rb_file_join(VALUE ary)
*/
static VALUE
rb_file_s_join(VALUE klass, VALUE args)
rb_file_s_join(int argc, VALUE *argv, VALUE klass)
{
return rb_file_join(args);
return rb_file_join(argc, argv);
}
#if defined(HAVE_TRUNCATE)
@ -7584,7 +7667,7 @@ Init_File(void)
/* separates directory parts in path */
rb_define_const(rb_cFile, "SEPARATOR", separator);
rb_define_singleton_method(rb_cFile, "split", rb_file_s_split, 1);
rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -2);
rb_define_singleton_method(rb_cFile, "join", rb_file_s_join, -1);
#ifdef DOSISH
/* platform specific alternative separator */

View File

@ -14,6 +14,7 @@
#include "ruby/internal/stdbool.h" /* for bool */
#include "ruby/encoding.h" /* for rb_encoding */
#include "ruby/ruby.h" /* for VALUE */
#include "encindex.h"
#define STR_SHARED FL_USER0 /* = ELTS_SHARED */
#define STR_NOEMBED FL_USER1
@ -29,6 +30,26 @@ enum ruby_rstring_private_flags {
# undef rb_fstring_cstr
#endif
static inline bool
rb_str_encindex_fastpath(int encindex)
{
// The overwhelming majority of strings are in one of these 3 encodings.
switch (encindex) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
case ENCINDEX_US_ASCII:
return true;
default:
return false;
}
}
static inline bool
rb_str_enc_fastpath(VALUE str)
{
return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
}
/* string.c */
VALUE rb_str_dup_m(VALUE str);
VALUE rb_fstring(VALUE);

View File

@ -146,27 +146,7 @@ VALUE rb_cSymbol;
RSTRING(str)->len = (n); \
} while (0)
static inline bool
str_encindex_fastpath(int encindex)
{
// The overwhelming majority of strings are in one of these 3 encodings.
switch (encindex) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
case ENCINDEX_US_ASCII:
return true;
default:
return false;
}
}
static inline bool
str_enc_fastpath(VALUE str)
{
return str_encindex_fastpath(ENCODING_GET_INLINED(str));
}
#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_LEN(str) (rb_str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
const int term_fill_len = (termlen);\
@ -960,7 +940,7 @@ static inline bool
rb_enc_str_asciicompat(VALUE str)
{
int encindex = ENCODING_GET_INLINED(str);
return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
return rb_str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
}
int
@ -2796,7 +2776,7 @@ rb_must_asciicompat(VALUE str)
rb_raise(rb_eTypeError, "not encoding capable object");
}
if (RB_LIKELY(str_encindex_fastpath(encindex))) {
if (RB_LIKELY(rb_str_encindex_fastpath(encindex))) {
return;
}
@ -2897,16 +2877,21 @@ str_null_check(VALUE str, int *w)
{
char *s = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
rb_encoding *enc = rb_enc_get(str);
const int minlen = rb_enc_mbminlen(enc);
int minlen = 1;
if (minlen > 1) {
*w = 1;
if (str_null_char(s, len, minlen, enc)) {
return NULL;
if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
rb_encoding *enc = rb_enc_get(str);
minlen = rb_enc_mbminlen(enc);
if (minlen > 1) {
*w = 1;
if (str_null_char(s, len, minlen, enc)) {
return NULL;
}
return str_fill_term(str, s, len, minlen);
}
return str_fill_term(str, s, len, minlen);
}
*w = 0;
if (!s || memchr(s, 0, len)) {
return NULL;
@ -3765,7 +3750,7 @@ rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
if (str_enc_fastpath(str)) {
if (rb_str_enc_fastpath(str)) {
switch (str2_cr) {
case ENC_CODERANGE_7BIT:
// If RHS is 7bit we can do simple concatenation