mirror of
https://github.com/Perl/perl5.git
synced 2026-01-26 08:38:23 +00:00
Exclude RtoL characters from paired string delimiters
Fixes #22228 Some scripts in the world are written right-to-left, such as Arabic and Hebrew. This can result in confusion for quote-like string delimitters that we have chosen based on left-to_right. Therefore exclude all such. Currently, the only pair that falls into this category that we don't exclude for other reasons are SYRIAC COLON SKEWED LEFT/RIGHT.
This commit is contained in:
parent
850214088a
commit
f3e2f6b80b
@ -3850,7 +3850,6 @@ The complete list of accepted paired delimiters as of Unicode 14.0 is:
|
||||
{ } U+007B, U+007D LEFT/RIGHT CURLY BRACKET
|
||||
« » U+00AB, U+00BB LEFT/RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
» « U+00BB, U+00AB RIGHT/LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
܆ ܇ U+0706, U+0707 SYRIAC COLON SKEWED LEFT/RIGHT
|
||||
༺ ༻ U+0F3A, U+0F3B TIBETAN MARK GUG RTAGS GYON, TIBETAN MARK GUG
|
||||
RTAGS GYAS
|
||||
༼ ༽ U+0F3C, U+0F3D TIBETAN MARK ANG KHANG GYON, TIBETAN MARK ANG
|
||||
@ -4231,5 +4230,4 @@ The complete list of accepted paired delimiters as of Unicode 14.0 is:
|
||||
🢩 🢨 U+1F8A9, U+1F8A8 RIGHT/LEFTWARDS BACK-TILTED SHADOWED WHITE ARROW
|
||||
🢫 🢪 U+1F8AB, U+1F8AA RIGHT/LEFTWARDS FRONT-TILTED SHADOWED WHITE
|
||||
ARROW
|
||||
|
||||
=cut
|
||||
|
||||
@ -378,6 +378,7 @@ my $unpaired = "Didn't find a mirror";
|
||||
my $illegal = "Mirror illegal";
|
||||
my $no_encoded_mate = "Mirrored, but Unicode has no encoded mirror";
|
||||
my $bidirectional = "Bidirectional";
|
||||
my $r2l = "Is in a Right to Left script";
|
||||
|
||||
my %unused_bidi_pairs;
|
||||
my %inverted_unused_bidi_pairs;
|
||||
@ -634,6 +635,15 @@ foreach my $list (qw(Punctuation Symbol)) {
|
||||
next;
|
||||
}
|
||||
|
||||
# Exclude characters that are R to L ordering, as this can cause
|
||||
# confusion. See GH #22228
|
||||
if ($chr =~ / (?[ \p{Bidi_Class:R} + \p{Bidi_Class:AL} ]) /x) {
|
||||
$discards{$code_point} = { reason => $r2l,
|
||||
mirror => $mirror_code_point
|
||||
};
|
||||
next;
|
||||
}
|
||||
|
||||
# We enter the pair with the original code point on the left; if it
|
||||
# should instead be on the R, swap. Most Symbols that contain the
|
||||
# word REVERSE go on the rhs, except those whose names explicitly
|
||||
|
||||
@ -133,6 +133,24 @@ EXPECT
|
||||
Use of '«' is deprecated as a string delimiter at - line 3.
|
||||
Can't find string terminator "«" anywhere before EOF at - line 5.
|
||||
########
|
||||
# NAME mirrored delimiters in R-to-L scripts are invalid
|
||||
BEGIN { binmode STDERR, ":utf8" }
|
||||
use utf8;
|
||||
use feature 'extra_paired_delimiters';
|
||||
my $good = q܈this string is delimitted by a symbol in a R-to-L script܈;
|
||||
$good = q܇this string is delimitted by a symbol in a R-to-L script܇;
|
||||
my $bad = q܈Can't use mirrored R-to-L script delimiters܇;
|
||||
EXPECT
|
||||
Can't find string terminator "܈" anywhere before EOF at - line 6.
|
||||
########
|
||||
# NAME mirrored delimiters in R-to-L scripts are invalid in the other order too
|
||||
BEGIN { binmode STDERR, ":utf8" }
|
||||
use utf8;
|
||||
use feature 'extra_paired_delimiters';
|
||||
my $bad = q܇Can't use mirrored R-to-L script delimiters܈;
|
||||
EXPECT
|
||||
Can't find string terminator "܇" anywhere before EOF at - line 4.
|
||||
########
|
||||
# NAME paired above Latin1 delimiters need feature enabled
|
||||
BEGIN { binmode STDERR, ":utf8" }
|
||||
use utf8;
|
||||
|
||||
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user