mirror of
https://github.com/ruby/ruby.git
synced 2026-01-26 12:14:51 +00:00
Optimize File.extname for common encodings
Similar optimizations to the ones performed in GH-15907. - Skip the expensive multi-byte encoding handling for the common encodings that are known to be safe. - Use `CheckPath` to save on copying the argument and only scan it for NULL bytes once. - Create the return string with rb_enc_str_new instead of rb_str_subseq as it's going to be a very small string anyway. This could be optimized a little bit further by searching for both `.` and `dirsep` in one pass, ``` compare-ruby: ruby 4.1.0dev (2026-01-19T03:51:30Z master 631bf19b37) +PRISM [arm64-darwin25] built-ruby: ruby 4.1.0dev (2026-01-20T07:33:42Z master 6fb50434e3) +PRISM [arm64-darwin25] ``` | |compare-ruby|built-ruby| |:----------|-----------:|---------:| |long | 3.606M| 22.229M| | | -| 6.17x| |long_name | 2.254M| 13.416M| | | -| 5.95x| |short | 16.488M| 29.969M| | | -| 1.82x|
This commit is contained in:
parent
6fb50434e3
commit
53fe9933fd
Notes:
git
2026-01-20 08:59:20 +00:00
6
benchmark/file_extname.yml
Normal file
6
benchmark/file_extname.yml
Normal file
@ -0,0 +1,6 @@
|
||||
prelude: |
|
||||
# frozen_string_literal: true
|
||||
benchmark:
|
||||
long: File.extname("/Users/george/src/github.com/ruby/ruby/benchmark/file_dirname.yml")
|
||||
long_name: File.extname("Users_george_src_github.com_ruby_ruby_benchmark_file_dirname.yml")
|
||||
short: File.extname("foo/bar")
|
||||
61
file.c
61
file.c
@ -5126,24 +5126,12 @@ rb_file_dirname_n(VALUE fname, int n)
|
||||
return dirname;
|
||||
}
|
||||
|
||||
/*
|
||||
* accept a String, and return the pointer of the extension.
|
||||
* if len is passed, set the length of extension to it.
|
||||
* returned pointer is in ``name'' or NULL.
|
||||
* returns *len
|
||||
* no dot NULL 0
|
||||
* dotfile top 0
|
||||
* end with dot dot 1
|
||||
* .ext dot len of .ext
|
||||
* .ext:stream dot len of .ext without :stream (NTFS only)
|
||||
*
|
||||
*/
|
||||
const char *
|
||||
ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
|
||||
static inline const char *
|
||||
enc_find_extname(const char *name, long *len, bool mb_enc, rb_encoding *enc)
|
||||
{
|
||||
const char *p, *e, *end = name + (len ? *len : (long)strlen(name));
|
||||
|
||||
p = strrdirsep(name, end, true, enc); /* get the last path component */
|
||||
p = strrdirsep(name, end, mb_enc, enc); /* get the last path component */
|
||||
if (!p)
|
||||
p = name;
|
||||
else
|
||||
@ -5176,7 +5164,7 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
|
||||
#endif
|
||||
else if (isdirsep(*p))
|
||||
break;
|
||||
Inc(p, end, true, enc);
|
||||
Inc(p, end, mb_enc, enc);
|
||||
}
|
||||
|
||||
if (len) {
|
||||
@ -5191,6 +5179,24 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
|
||||
return e;
|
||||
}
|
||||
|
||||
/*
|
||||
* accept a String, and return the pointer of the extension.
|
||||
* if len is passed, set the length of extension to it.
|
||||
* returned pointer is in ``name'' or NULL.
|
||||
* returns *len
|
||||
* no dot NULL 0
|
||||
* dotfile top 0
|
||||
* end with dot dot 1
|
||||
* .ext dot len of .ext
|
||||
* .ext:stream dot len of .ext without :stream (NTFS only)
|
||||
*
|
||||
*/
|
||||
const char *
|
||||
ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
|
||||
{
|
||||
return enc_find_extname(name, len, true, enc);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* File.extname(path) -> string
|
||||
@ -5220,18 +5226,19 @@ ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc)
|
||||
static VALUE
|
||||
rb_file_s_extname(VALUE klass, VALUE fname)
|
||||
{
|
||||
const char *name, *e;
|
||||
long len;
|
||||
VALUE extname;
|
||||
const char *name;
|
||||
CheckPath(fname, name);
|
||||
long len = RSTRING_LEN(fname);
|
||||
|
||||
FilePathStringValue(fname);
|
||||
name = StringValueCStr(fname);
|
||||
len = RSTRING_LEN(fname);
|
||||
e = ruby_enc_find_extname(name, &len, rb_enc_get(fname));
|
||||
if (len < 1)
|
||||
return rb_str_new(0, 0);
|
||||
extname = rb_str_subseq(fname, e - name, len); /* keep the dot, too! */
|
||||
return extname;
|
||||
if (len < 1) {
|
||||
return rb_enc_str_new(0, 0, rb_str_enc_get(fname));
|
||||
}
|
||||
|
||||
bool mb_enc = !rb_str_enc_fastpath(fname);
|
||||
rb_encoding *enc = rb_str_enc_get(fname);
|
||||
|
||||
const char *ext = enc_find_extname(name, &len, mb_enc, enc);
|
||||
return rb_enc_str_new(ext, len, enc);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user