file.c: Optimize rb_file_dirname_n fixed costs

- `str_null_check` was performed twice, once by `FilePathStringValue`
  and a second time by `StringValueCStr`.
- `StringValueCStr` was checking for the terminator presence, but we
  don't care about that.
- `FilePathStringValue` calls `rb_str_new_frozen` to ensure `fname`
  isn't mutated, but that's costly for such a check. Instead we
  can do it in debug mode only.
- `rb_enc_get` is slow because it accepts arbitrary objects, even immediates,
  so it has to do numerous type checks. Add a much faster `rb_str_enc_get`
  when we know we're dealing with a string.
- `rb_enc_copy` is slow for the same reasons, since we already have the
  encoding, we can use `rb_enc_str_new` instead.
This commit is contained in:
Jean Boussier 2026-01-19 07:32:09 +01:00
parent 826dbcfb2b
commit 27bb1623cd
Notes: git 2026-01-20 07:34:25 +00:00
4 changed files with 41 additions and 18 deletions

View File

@ -0,0 +1,5 @@
prelude: |
# frozen_string_literal: true
benchmark:
long: File.dirname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
short: File.dirname("foo/bar")

39
file.c
View File

@ -214,15 +214,16 @@ file_path_convert(VALUE name)
return name; return name;
} }
static rb_encoding * static void
check_path_encoding(VALUE str) check_path_encoding(VALUE str)
{ {
rb_encoding *enc = rb_enc_get(str); if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
rb_encoding *enc = rb_str_enc_get(str);
if (!rb_enc_asciicompat(enc)) { if (!rb_enc_asciicompat(enc)) {
rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE, rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
rb_enc_name(enc), rb_str_inspect(str)); rb_enc_name(enc), rb_str_inspect(str));
} }
return enc; }
} }
VALUE VALUE
@ -250,7 +251,7 @@ rb_get_path_check_convert(VALUE obj)
rb_raise(rb_eArgError, "path name contains null byte"); rb_raise(rb_eArgError, "path name contains null byte");
} }
return rb_str_new4(obj); return rb_str_new_frozen(obj);
} }
VALUE VALUE
@ -265,6 +266,19 @@ rb_get_path(VALUE obj)
return rb_get_path_check_convert(rb_get_path_check_to_string(obj)); return rb_get_path_check_convert(rb_get_path_check_to_string(obj));
} }
static inline VALUE
check_path(VALUE obj, const char **cstr)
{
VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj));
#if RUBY_DEBUG
str = rb_str_new_frozen(str);
#endif
*cstr = RSTRING_PTR(str);
return str;
}
#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr);
VALUE VALUE
rb_str_encode_ospath(VALUE path) rb_str_encode_ospath(VALUE path)
{ {
@ -4952,7 +4966,8 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _)
if (rb_check_arity(argc, 1, 2) == 2) { if (rb_check_arity(argc, 1, 2) == 2) {
fext = argv[1]; fext = argv[1];
StringValue(fext); StringValue(fext);
enc = check_path_encoding(fext); check_path_encoding(fext);
enc = rb_str_enc_get(fext);
} }
fname = argv[0]; fname = argv[0];
FilePathStringValue(fname); FilePathStringValue(fname);
@ -5031,10 +5046,9 @@ rb_file_dirname_n(VALUE fname, int n)
const char **seps; const char **seps;
if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n); if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n);
FilePathStringValue(fname); CheckPath(fname, name);
name = StringValueCStr(fname);
end = name + RSTRING_LEN(fname); end = name + RSTRING_LEN(fname);
enc = rb_enc_get(fname); enc = rb_str_enc_get(fname);
root = skiproot(name, end, enc); root = skiproot(name, end, enc);
#ifdef DOSISH_UNC #ifdef DOSISH_UNC
if (root > name + 1 && isdirsep(*name)) if (root > name + 1 && isdirsep(*name))
@ -5077,24 +5091,21 @@ rb_file_dirname_n(VALUE fname, int n)
} }
} }
if (p == name) { if (p == name) {
dirname = rb_str_new(".", 1); return rb_enc_str_new(".", 1, enc);
rb_enc_copy(dirname, fname);
return dirname;
} }
#ifdef DOSISH_DRIVE_LETTER #ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && isdirsep(*(name + 2))) { if (has_drive_letter(name) && isdirsep(*(name + 2))) {
const char *top = skiproot(name + 2, end, enc); const char *top = skiproot(name + 2, end, enc);
dirname = rb_str_new(name, 3); dirname = rb_enc_str_new(name, 3, enc);
rb_str_cat(dirname, top, p - top); rb_str_cat(dirname, top, p - top);
} }
else else
#endif #endif
dirname = rb_str_new(name, p - name); dirname = rb_enc_str_new(name, p - name, enc);
#ifdef DOSISH_DRIVE_LETTER #ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && root == name + 2 && p - name == 2) if (has_drive_letter(name) && root == name + 2 && p - name == 2)
rb_str_cat(dirname, ".", 1); rb_str_cat(dirname, ".", 1);
#endif #endif
rb_enc_copy(dirname, fname);
return dirname; return dirname;
} }

View File

@ -50,6 +50,13 @@ rb_str_enc_fastpath(VALUE str)
return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str)); return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
} }
static inline rb_encoding *
rb_str_enc_get(VALUE str)
{
RUBY_ASSERT(RB_TYPE_P(str, T_STRING));
return rb_enc_from_index(ENCODING_GET(str));
}
/* string.c */ /* string.c */
VALUE rb_str_dup_m(VALUE str); VALUE rb_str_dup_m(VALUE str);
VALUE rb_fstring(VALUE); VALUE rb_fstring(VALUE);

View File

@ -2880,7 +2880,7 @@ str_null_check(VALUE str, int *w)
int minlen = 1; int minlen = 1;
if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) { if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
rb_encoding *enc = rb_enc_get(str); rb_encoding *enc = rb_str_enc_get(str);
minlen = rb_enc_mbminlen(enc); minlen = rb_enc_mbminlen(enc);
if (minlen > 1) { if (minlen > 1) {