Optimize File.extname for common encodings

Similar optimizations to the ones performed in GH-15907.

- Skip the expensive multi-byte encoding handling for the common
  encodings that are known to be safe.
- Use `CheckPath` to save on copying the argument and only scan it for
  NULL bytes once.
- Create the return string with rb_enc_str_new instead of rb_str_subseq
  as it's going to be a very small string anyway.

This could be optimized a little bit further by searching for both `.` and `dirsep`
in one pass,

```
compare-ruby: ruby 4.1.0dev (2026-01-19T03:51:30Z master 631bf19b37) +PRISM [arm64-darwin25]
built-ruby: ruby 4.1.0dev (2026-01-20T07:33:42Z master 6fb50434e3) +PRISM [arm64-darwin25]
```

|           |compare-ruby|built-ruby|
|:----------|-----------:|---------:|
|long       |      3.606M|   22.229M|
|           |           -|     6.17x|
|long_name  |      2.254M|   13.416M|
|           |           -|     5.95x|
|short      |     16.488M|   29.969M|
|           |           -|     1.82x|
This commit is contained in:
Jean Boussier 2026-01-20 08:56:17 +01:00
parent 6fb50434e3
commit 53fe9933fd
Notes: git 2026-01-20 08:59:20 +00:00
2 changed files with 40 additions and 27 deletions

View File

@ -0,0 +1,6 @@
prelude: |
# frozen_string_literal: true
benchmark:
long: File.extname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
long_name: File.extname("Users_george_src_github.com_ruby_ruby_benchmark_file_dirname.yml")
short: File.extname("foo/bar")

61
file.c
View File

@ -5126,24 +5126,12 @@ rb_file_dirname_n(VALUE fname, int n)
return dirname;
}
/*
* accept a String, and return the pointer of the extension.
* if len is passed, set the length of extension to it.
* returned pointer is in ``name'' or NULL.
* returns *len
* no dot NULL 0
* dotfile top 0
* end with dot dot 1
* .ext dot len of .ext
* .ext:stream dot len of .ext without :stream (NTFS only)
*
*/
const char *
ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
static inline const char *
enc_find_extname(const char *name, long *len, bool mb_enc, rb_encoding *enc)
{
const char *p, *e, *end = name + (len ? *len : (long)strlen(name));
p = strrdirsep(name, end, true, enc); /* get the last path component */
p = strrdirsep(name, end, mb_enc, enc); /* get the last path component */
if (!p)
p = name;
else
@ -5176,7 +5164,7 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
#endif
else if (isdirsep(*p))
break;
Inc(p, end, true, enc);
Inc(p, end, mb_enc, enc);
}
if (len) {
@ -5191,6 +5179,24 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
return e;
}
/*
* accept a String, and return the pointer of the extension.
* if len is passed, set the length of extension to it.
* returned pointer is in ``name'' or NULL.
* returns *len
* no dot NULL 0
* dotfile top 0
* end with dot dot 1
* .ext dot len of .ext
* .ext:stream dot len of .ext without :stream (NTFS only)
*
*/
const char *
ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
{
return enc_find_extname(name, len, true, enc);
}
/*
* call-seq:
* File.extname(path) -> string
@ -5220,18 +5226,19 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
static VALUE
rb_file_s_extname(VALUE klass, VALUE fname)
{
const char *name, *e;
long len;
VALUE extname;
const char *name;
CheckPath(fname, name);
long len = RSTRING_LEN(fname);
FilePathStringValue(fname);
name = StringValueCStr(fname);
len = RSTRING_LEN(fname);
e = ruby_enc_find_extname(name, &len, rb_enc_get(fname));
if (len < 1)
return rb_str_new(0, 0);
extname = rb_str_subseq(fname, e - name, len); /* keep the dot, too! */
return extname;
if (len < 1) {
return rb_enc_str_new(0, 0, rb_str_enc_get(fname));
}
bool mb_enc = !rb_str_enc_fastpath(fname);
rb_encoding *enc = rb_str_enc_get(fname);
const char *ext = enc_find_extname(name, &len, mb_enc, enc);
return rb_enc_str_new(ext, len, enc);
}
/*