From 27bb1623cd048f3cbfc527cc315894803deabba2 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Mon, 19 Jan 2026 07:32:09 +0100 Subject: [PATCH] file.c: Optimize `rb_file_dirname_n` fixed costs - `str_null_check` was performed twice, once by `FilePathStringValue` and a second time by `StringValueCStr`. - `StringValueCStr` was checking for the terminator presence, but we don't care about that. - `FilePathStringValue` calls `rb_str_new_frozen` to ensure `fname` isn't mutated, but that's costly for such a check. Instead we can do it in debug mode only. - `rb_enc_get` is slow because it accepts arbitrary objects, even immediates, so it has to do numerous type checks. Add a much faster `rb_str_enc_get` when we know we're dealing with a string. - `rb_enc_copy` is slow for the same reasons, since we already have the encoding, we can use `rb_enc_str_new` instead. --- benchmark/file_dirname.yml | 5 +++++ file.c | 45 ++++++++++++++++++++++++-------------- internal/string.h | 7 ++++++ string.c | 2 +- 4 files changed, 41 insertions(+), 18 deletions(-) create mode 100644 benchmark/file_dirname.yml diff --git a/benchmark/file_dirname.yml b/benchmark/file_dirname.yml new file mode 100644 index 0000000000..d5c134ad4b --- /dev/null +++ b/benchmark/file_dirname.yml @@ -0,0 +1,5 @@ +prelude: | + # frozen_string_literal: true +benchmark: + long: File.dirname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml") + short: File.dirname("foo/bar") diff --git a/file.c b/file.c index 809253fab0..9f4f45e5c6 100644 --- a/file.c +++ b/file.c @@ -214,15 +214,16 @@ file_path_convert(VALUE name) return name; } -static rb_encoding * +static void check_path_encoding(VALUE str) { - rb_encoding *enc = rb_enc_get(str); - if (!rb_enc_asciicompat(enc)) { - rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, - rb_enc_name(enc), rb_str_inspect(str)); + if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { + rb_encoding *enc = rb_str_enc_get(str); + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, + rb_enc_name(enc), rb_str_inspect(str)); + } } - return enc; } VALUE @@ -250,7 +251,7 @@ rb_get_path_check_convert(VALUE obj) rb_raise(rb_eArgError, "path name contains null byte"); } - return rb_str_new4(obj); + return rb_str_new_frozen(obj); } VALUE @@ -265,6 +266,19 @@ rb_get_path(VALUE obj) return rb_get_path_check_convert(rb_get_path_check_to_string(obj)); } +static inline VALUE +check_path(VALUE obj, const char **cstr) +{ + VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj)); +#if RUBY_DEBUG + str = rb_str_new_frozen(str); +#endif + *cstr = RSTRING_PTR(str); + return str; +} + +#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr); + VALUE rb_str_encode_ospath(VALUE path) { @@ -4952,7 +4966,8 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _) if (rb_check_arity(argc, 1, 2) == 2) { fext = argv[1]; StringValue(fext); - enc = check_path_encoding(fext); + check_path_encoding(fext); + enc = rb_str_enc_get(fext); } fname = argv[0]; FilePathStringValue(fname); @@ -5031,10 +5046,9 @@ rb_file_dirname_n(VALUE fname, int n) const char **seps; if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n); - FilePathStringValue(fname); - name = StringValueCStr(fname); + CheckPath(fname, name); end = name + RSTRING_LEN(fname); - enc = rb_enc_get(fname); + enc = rb_str_enc_get(fname); root = skiproot(name, end, enc); #ifdef DOSISH_UNC if (root > name + 1 && isdirsep(*name)) @@ -5077,24 +5091,21 @@ rb_file_dirname_n(VALUE fname, int n) } } if (p == name) { - dirname = rb_str_new(".", 1); - rb_enc_copy(dirname, fname); - return dirname; + return rb_enc_str_new(".", 1, enc); } #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && isdirsep(*(name + 2))) { const char *top = skiproot(name + 2, end, enc); - dirname = rb_str_new(name, 3); + dirname = rb_enc_str_new(name, 3, enc); rb_str_cat(dirname, top, p - top); } else #endif - dirname = rb_str_new(name, p - name); + dirname = rb_enc_str_new(name, p - name, enc); #ifdef DOSISH_DRIVE_LETTER if (has_drive_letter(name) && root == name + 2 && p - name == 2) rb_str_cat(dirname, ".", 1); #endif - rb_enc_copy(dirname, fname); return dirname; } diff --git a/internal/string.h b/internal/string.h index cd1e8d7929..dd5e20c0c6 100644 --- a/internal/string.h +++ b/internal/string.h @@ -50,6 +50,13 @@ rb_str_enc_fastpath(VALUE str) return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); } +static inline rb_encoding * +rb_str_enc_get(VALUE str) +{ + RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); + return rb_enc_from_index(ENCODING_GET(str)); +} + /* string.c */ VALUE rb_str_dup_m(VALUE str); VALUE rb_fstring(VALUE); diff --git a/string.c b/string.c index 1e0b9929ef..464eab2146 100644 --- a/string.c +++ b/string.c @@ -2880,7 +2880,7 @@ str_null_check(VALUE str, int *w) int minlen = 1; if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { - rb_encoding *enc = rb_enc_get(str); + rb_encoding *enc = rb_str_enc_get(str); minlen = rb_enc_mbminlen(enc); if (minlen > 1) {