file.c: Optimize rb_file_dirname_n fixed costs

- `str_null_check` was performed twice, once by `FilePathStringValue`
  and a second time by `StringValueCStr`.
- `StringValueCStr` was checking for the terminator presence, but we
  don't care about that.
- `FilePathStringValue` calls `rb_str_new_frozen` to ensure `fname`
  isn't mutated, but that's costly for such a check. Instead we
  can do it in debug mode only.
- `rb_enc_get` is slow because it accepts arbitrary objects, even immediates,
  so it has to do numerous type checks. Add a much faster `rb_str_enc_get`
  when we know we're dealing with a string.
- `rb_enc_copy` is slow for the same reasons, since we already have the
  encoding, we can use `rb_enc_str_new` instead.
This commit is contained in:
Jean Boussier 2026-01-19 07:32:09 +01:00
parent 826dbcfb2b
commit 27bb1623cd
Notes: git 2026-01-20 07:34:25 +00:00
4 changed files with 41 additions and 18 deletions

View File

@ -0,0 +1,5 @@
prelude: |
# frozen_string_literal: true
benchmark:
long: File.dirname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
short: File.dirname("foo/bar")

45
file.c
View File

@ -214,15 +214,16 @@ file_path_convert(VALUE name)
return name;
}
static rb_encoding *
static void
check_path_encoding(VALUE str)
{
rb_encoding *enc = rb_enc_get(str);
if (!rb_enc_asciicompat(enc)) {
rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
rb_enc_name(enc), rb_str_inspect(str));
if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
rb_encoding *enc = rb_str_enc_get(str);
if (!rb_enc_asciicompat(enc)) {
rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %"PRIsVALUE,
rb_enc_name(enc), rb_str_inspect(str));
}
}
return enc;
}
VALUE
@ -250,7 +251,7 @@ rb_get_path_check_convert(VALUE obj)
rb_raise(rb_eArgError, "path name contains null byte");
}
return rb_str_new4(obj);
return rb_str_new_frozen(obj);
}
VALUE
@ -265,6 +266,19 @@ rb_get_path(VALUE obj)
return rb_get_path_check_convert(rb_get_path_check_to_string(obj));
}
static inline VALUE
check_path(VALUE obj, const char **cstr)
{
VALUE str = rb_get_path_check_convert(rb_get_path_check_to_string(obj));
#if RUBY_DEBUG
str = rb_str_new_frozen(str);
#endif
*cstr = RSTRING_PTR(str);
return str;
}
#define CheckPath(str, cstr) RB_GC_GUARD(str) = check_path(str, &cstr);
VALUE
rb_str_encode_ospath(VALUE path)
{
@ -4952,7 +4966,8 @@ rb_file_s_basename(int argc, VALUE *argv, VALUE _)
if (rb_check_arity(argc, 1, 2) == 2) {
fext = argv[1];
StringValue(fext);
enc = check_path_encoding(fext);
check_path_encoding(fext);
enc = rb_str_enc_get(fext);
}
fname = argv[0];
FilePathStringValue(fname);
@ -5031,10 +5046,9 @@ rb_file_dirname_n(VALUE fname, int n)
const char **seps;
if (n < 0) rb_raise(rb_eArgError, "negative level: %d", n);
FilePathStringValue(fname);
name = StringValueCStr(fname);
CheckPath(fname, name);
end = name + RSTRING_LEN(fname);
enc = rb_enc_get(fname);
enc = rb_str_enc_get(fname);
root = skiproot(name, end, enc);
#ifdef DOSISH_UNC
if (root > name + 1 && isdirsep(*name))
@ -5077,24 +5091,21 @@ rb_file_dirname_n(VALUE fname, int n)
}
}
if (p == name) {
dirname = rb_str_new(".", 1);
rb_enc_copy(dirname, fname);
return dirname;
return rb_enc_str_new(".", 1, enc);
}
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && isdirsep(*(name + 2))) {
const char *top = skiproot(name + 2, end, enc);
dirname = rb_str_new(name, 3);
dirname = rb_enc_str_new(name, 3, enc);
rb_str_cat(dirname, top, p - top);
}
else
#endif
dirname = rb_str_new(name, p - name);
dirname = rb_enc_str_new(name, p - name, enc);
#ifdef DOSISH_DRIVE_LETTER
if (has_drive_letter(name) && root == name + 2 && p - name == 2)
rb_str_cat(dirname, ".", 1);
#endif
rb_enc_copy(dirname, fname);
return dirname;
}

View File

@ -50,6 +50,13 @@ rb_str_enc_fastpath(VALUE str)
return rb_str_encindex_fastpath(ENCODING_GET_INLINED(str));
}
static inline rb_encoding *
rb_str_enc_get(VALUE str)
{
RUBY_ASSERT(RB_TYPE_P(str, T_STRING));
return rb_enc_from_index(ENCODING_GET(str));
}
/* string.c */
VALUE rb_str_dup_m(VALUE str);
VALUE rb_fstring(VALUE);

View File

@ -2880,7 +2880,7 @@ str_null_check(VALUE str, int *w)
int minlen = 1;
if (RB_UNLIKELY(!rb_str_enc_fastpath(str))) {
rb_encoding *enc = rb_enc_get(str);
rb_encoding *enc = rb_str_enc_get(str);
minlen = rb_enc_mbminlen(enc);
if (minlen > 1) {