mirror of
https://github.com/ruby/ruby.git
synced 2026-01-26 12:14:51 +00:00
Improve performance of UnicodeNormalize.canonical_ordering_one
Use array_of_integer.sort! instead of buble-sort-like algorithm
This commit is contained in:
parent
31e14ac7da
commit
377aa2a336
Notes:
git
2025-10-25 12:20:02 +00:00
@ -82,16 +82,22 @@ module UnicodeNormalize # :nodoc:
|
||||
|
||||
## Canonical Ordering
|
||||
def self.canonical_ordering_one(string)
|
||||
sorting = string.each_char.collect { |c| [c, CLASS_TABLE[c]] }
|
||||
(sorting.length-2).downto(0) do |i| # almost, but not exactly bubble sort
|
||||
(0..i).each do |j|
|
||||
later_class = sorting[j+1].last
|
||||
if 0<later_class and later_class<sorting[j].last
|
||||
sorting[j], sorting[j+1] = sorting[j+1], sorting[j]
|
||||
end
|
||||
result = ''
|
||||
unordered = []
|
||||
chars = string.chars
|
||||
n = chars.size
|
||||
chars.each_with_index do |char, i|
|
||||
ccc = CLASS_TABLE[char]
|
||||
if ccc == 0
|
||||
unordered.sort!.each { result << chars[it % n] }
|
||||
unordered.clear
|
||||
result << char
|
||||
else
|
||||
unordered << ccc * n + i
|
||||
end
|
||||
end
|
||||
return sorting.collect(&:first).join('')
|
||||
unordered.sort!.each { result << chars[it % n] }
|
||||
result
|
||||
end
|
||||
|
||||
## Normalization Forms for Patterns (not whole Strings)
|
||||
|
||||
@ -209,4 +209,23 @@ class TestUnicodeNormalize
|
||||
assert_equal true, ascii_string.unicode_normalized?(:nfkc)
|
||||
assert_equal true, ascii_string.unicode_normalized?(:nfkd)
|
||||
end
|
||||
|
||||
def test_canonical_ordering
|
||||
a = "\u03B1\u0313\u0300\u0345"
|
||||
a_unordered1 = "\u03B1\u0345\u0313\u0300"
|
||||
a_unordered2 = "\u03B1\u0313\u0345\u0300"
|
||||
u1 = "U\u0308\u0304"
|
||||
u2 = "U\u0304\u0308"
|
||||
s = "s\u0323\u0307"
|
||||
s_unordered = "s\u0307\u0323"
|
||||
o = "\u{1611e}\u{1611e}\u{1611f}"
|
||||
# Actual cases called through String#unicode_normalize
|
||||
assert_equal(s + o, UnicodeNormalize.canonical_ordering_one(s_unordered + o))
|
||||
assert_equal(a[1..], UnicodeNormalize.canonical_ordering_one(a_unordered1[1..]))
|
||||
assert_equal(a[1..] + o, UnicodeNormalize.canonical_ordering_one(a_unordered2[1..] + o))
|
||||
# Artificial cases
|
||||
assert_equal(a + u1 + o + u2 + s, UnicodeNormalize.canonical_ordering_one(a + u1 + o + u2 + s))
|
||||
assert_equal(s[1..] + a + a, UnicodeNormalize.canonical_ordering_one(s_unordered[1..] + a_unordered1 + a_unordered2))
|
||||
assert_equal(o + s + u1 + a + o + a + u2 + o, UnicodeNormalize.canonical_ordering_one(o + s_unordered + u1 + a_unordered1 + o + a_unordered2 + u2 + o))
|
||||
end
|
||||
end
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user