[ruby/uri] Improve performance of URI::MailTo::EMAIL_REGEXP

Fix the performance regression at #172 for valid emails.

``` yml
prelude: |
  require 'uri/mailto'
  n = 1000
  re = URI::MailTo::EMAIL_REGEXP
benchmark:
  n.t..t.: re.match?("n.t..t.@docomo.ne.jp")
  example: re.match?("example@example.info")
```

|         |released| 788274b| c5974f0|    this|
|:--------|-------:|-------:|-------:|-------:|
|n.t..t.  |  3.795M|  4.864M|  4.993M|  8.739M|
|         |       -|   1.28x|   1.32x|   2.30x|
|example  |  3.911M|  3.740M|  2.838M|  3.880M|
|         |   1.38x|   1.32x|       -|   1.37x|

https://github.com/ruby/uri/commit/7363a134ac
This commit is contained in:
Nobuyoshi Nakada 2025-07-12 19:31:31 +09:00 committed by git
parent 22b81b5bf5
commit cf7b871a94
2 changed files with 21 additions and 1 deletions

View File

@ -52,7 +52,11 @@ module URI
HEADER_REGEXP = /\A(?<hfield>(?:%\h\h|[!$'-.0-;@-Z_a-z~])*=(?:%\h\h|[!$'-.0-;@-Z_a-z~])*)(?:&\g<hfield>)*\z/
# practical regexp for email address
# https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
EMAIL_REGEXP = /\A(?!\.)(?!.*\.{2})[a-zA-Z0-9.!\#$%&'*+\/=?^_`{|}~-]+(?<!\.)@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z/
EMAIL_REGEXP = %r[\A#{
atext = %q[(?:[a-zA-Z0-9!\#$%&'*+\/=?^_`{|}~-]+)]
}(?:\.#{atext})*@#{
label = %q[(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)]
}(?:\.#{label})*\z]
# :startdoc:
#

View File

@ -210,6 +210,22 @@ class URI::TestMailTo < Test::Unit::TestCase
end
end
def test_email_regexp
re = URI::MailTo::EMAIL_REGEXP
rate = 1000
longlabel = '.' + 'invalid'.ljust(63, 'd')
endlabel = ''
pre = ->(n) {'a@invalid' + longlabel*(n*rate) + endlabel}
assert_linear_performance(1..10, pre: pre) do |to|
re =~ to or flunk
end
endlabel = '.' + 'email'.rjust(64, 'd')
assert_linear_performance(1..10, pre: pre) do |to|
re =~ to and flunk
end
end
def test_to_s
u = URI::MailTo.build([nil, 'subject=Ruby'])