mirror of
https://github.com/Perl/perl5.git
synced 2026-01-26 16:39:36 +00:00
This was instituted by 1d74e8214dd53cf0fa9e8c5aab3e6187685eadcd, but ae3e9dd0b31e37357f00d0a83f0b3f4a2713f420 should have fixed this.
1774 lines
72 KiB
Perl
1774 lines
72 KiB
Perl
use strict;
|
|
use warnings;
|
|
|
|
# This file tests interactions with locale and threads
|
|
|
|
BEGIN {
|
|
$| = 1;
|
|
|
|
chdir 't' if -d 't';
|
|
require './test.pl';
|
|
set_up_inc('../lib');
|
|
|
|
skip_all_without_config('useithreads');
|
|
skip_all("Fails on threaded builds on OpenBSD")
|
|
if ($^O =~ m/^(openbsd)$/);
|
|
|
|
require './loc_tools.pl';
|
|
|
|
eval { require POSIX; POSIX->import(qw(errno_h locale_h unistd_h )) };
|
|
if ($@) {
|
|
skip_all("could not load the POSIX module"); # running minitest?
|
|
}
|
|
}
|
|
|
|
use Time::HiRes qw(time usleep);
|
|
|
|
use Devel::Peek;
|
|
$Devel::Peek::pv_limit = 0; $Devel::Peek::pv_limit = 0;
|
|
use Data::Dumper;
|
|
$Data::Dumper::Sortkeys=1;
|
|
$Data::Dumper::Useqq = 1;
|
|
$Data::Dumper::Deepcopy = 1;
|
|
|
|
my $debug = 0;
|
|
|
|
my %map_category_name_to_number;
|
|
my %map_category_number_to_name;
|
|
my @valid_categories = valid_locale_categories();
|
|
foreach my $category (@valid_categories) {
|
|
my $cat_num = eval "&POSIX::$category";
|
|
die "Can't determine ${category}'s number: $@" if $@;
|
|
|
|
$map_category_name_to_number{$category} = $cat_num;
|
|
$map_category_number_to_name{$cat_num} = $category;
|
|
}
|
|
|
|
my $LC_ALL;
|
|
my $LC_ALL_string;
|
|
if (defined $map_category_name_to_number{LC_ALL}) {
|
|
$LC_ALL_string = 'LC_ALL';
|
|
$LC_ALL = $map_category_name_to_number{LC_ALL};
|
|
}
|
|
elsif (defined $map_category_name_to_number{LC_CTYPE}) {
|
|
$LC_ALL_string = 'LC_CTYPE';
|
|
$LC_ALL = $map_category_name_to_number{LC_CTYPE};
|
|
}
|
|
else {
|
|
skip_all("No LC_ALL nor LC_CTYPE");
|
|
}
|
|
|
|
# reset the locale environment
|
|
delete local @ENV{'LANGUAGE', 'LANG', keys %map_category_name_to_number};
|
|
|
|
my @locales = find_locales($LC_ALL);
|
|
skip_all("Couldn't find any locales") if @locales == 0;
|
|
|
|
plan(2);
|
|
|
|
my ($utf8_locales_ref, $non_utf8_locales_ref)
|
|
= classify_locales_wrt_utf8ness(\@locales);
|
|
|
|
my $official_ascii_name = 'ansi_x341968';
|
|
|
|
my %lang_code_to_script = ( # ISO 639.2, but without the many codes that
|
|
# are for latin (but the few western European
|
|
# ones that are latin1 are included)
|
|
am => 'amharic',
|
|
amh => 'amharic',
|
|
amharic => 'amharic',
|
|
ar => 'arabic',
|
|
be => 'cyrillic',
|
|
bel => 'cyrillic',
|
|
ben => 'bengali',
|
|
bn => 'bengali',
|
|
bg => 'cyrillic',
|
|
bul => 'cyrillic',
|
|
bulgarski => 'cyrillic',
|
|
bulgarian => 'cyrillic',
|
|
c => $official_ascii_name,
|
|
cnr => 'cyrillic',
|
|
de => 'latin_1',
|
|
deu => 'latin_1',
|
|
deutsch => 'latin_1',
|
|
german => 'latin_1',
|
|
div => 'thaana',
|
|
dv => 'thaana',
|
|
dzo => 'tibetan',
|
|
dz => 'tibetan',
|
|
el => 'greek',
|
|
ell => 'greek',
|
|
ellada => 'greek',
|
|
en => $official_ascii_name,
|
|
eng => $official_ascii_name,
|
|
american => $official_ascii_name,
|
|
british => $official_ascii_name,
|
|
es => 'latin_1',
|
|
fa => 'arabic',
|
|
fas => 'arabic',
|
|
flamish => 'latin_1',
|
|
fra => 'latin_1',
|
|
fr => 'latin_1',
|
|
heb => 'hebrew',
|
|
he => 'hebrew',
|
|
hi => 'hindi',
|
|
hin => 'hindi',
|
|
hy => 'armenian',
|
|
hye => 'armenian',
|
|
ita => 'latin_1',
|
|
it => 'latin_1',
|
|
ja => 'katakana',
|
|
jpn => 'katakana',
|
|
nihongo => 'katakana',
|
|
japanese => 'katakana',
|
|
ka => 'georgian',
|
|
kat => 'georgian',
|
|
kaz => 'cyrillic',
|
|
khm => 'khmer',
|
|
kir => 'cyrillic',
|
|
kk => 'cyrillic',
|
|
km => 'khmer',
|
|
ko => 'hangul',
|
|
kor => 'hangul',
|
|
korean => 'hangul',
|
|
ku => 'arabic',
|
|
kur => 'arabic',
|
|
ky => 'cyrillic',
|
|
latin1 => 'latin_1',
|
|
lao => 'lao',
|
|
lo => 'lao',
|
|
mk => 'cyrillic',
|
|
mkd => 'cyrillic',
|
|
macedonian => 'cyrillic',
|
|
mn => 'cyrillic',
|
|
mon => 'cyrillic',
|
|
mya => 'myanmar',
|
|
my => 'myanmar',
|
|
ne => 'devanagari',
|
|
nep => 'devanagari',
|
|
nld => 'latin_1',
|
|
nl => 'latin_1',
|
|
nederlands => 'latin_1',
|
|
dutch => 'latin_1',
|
|
por => 'latin_1',
|
|
posix => $official_ascii_name,
|
|
ps => 'arabic',
|
|
pt => 'latin_1',
|
|
pus => 'arabic',
|
|
ru => 'cyrillic',
|
|
russki => 'cyrillic',
|
|
russian => 'cyrillic',
|
|
rus => 'cyrillic',
|
|
sin => 'sinhala',
|
|
si => 'sinhala',
|
|
so => 'arabic',
|
|
som => 'arabic',
|
|
spa => 'latin_1',
|
|
sr => 'cyrillic',
|
|
srp => 'cyrillic',
|
|
tam => 'tamil',
|
|
ta => 'tamil',
|
|
tg => 'cyrillic',
|
|
tgk => 'cyrillic',
|
|
tha => 'thai',
|
|
th => 'thai',
|
|
thai => 'thai',
|
|
ti => 'ethiopian',
|
|
tir => 'ethiopian',
|
|
uk => 'cyrillic',
|
|
ukr => 'cyrillic',
|
|
ur => 'arabic',
|
|
urd => 'arabic',
|
|
zgh => 'arabic',
|
|
zh => 'chinese',
|
|
zho => 'chinese',
|
|
);
|
|
my %codeset_to_script = (
|
|
88591 => 'latin_1',
|
|
88592 => 'latin_2',
|
|
88593 => 'latin_3',
|
|
88594 => 'latin_4',
|
|
88595 => 'cyrillic',
|
|
88596 => 'arabic',
|
|
88597 => 'greek',
|
|
88598 => 'hebrew',
|
|
88599 => 'latin_5',
|
|
885910 => 'latin_6',
|
|
885911 => 'thai',
|
|
885912 => 'devanagari',
|
|
885913 => 'latin_7',
|
|
885914 => 'latin_8',
|
|
885915 => 'latin_9',
|
|
885916 => 'latin_10',
|
|
cp1251 => 'cyrillic',
|
|
cp1255 => 'hebrew',
|
|
);
|
|
|
|
my %script_priorities = ( # In trying to make the results as distinct as
|
|
# possible, make the ones closest to Unicode,
|
|
# and ASCII lowest priority
|
|
$official_ascii_name => 15,
|
|
latin_1 => 14,
|
|
latin_9 => 13,
|
|
latin_2 => 12,
|
|
latin_4 => 12,
|
|
latin_5 => 12,
|
|
latin_6 => 12,
|
|
latin_7 => 12,
|
|
latin_8 => 12,
|
|
latin_10 => 12,
|
|
latin => 11, # Unknown latin version
|
|
);
|
|
|
|
my %script_instances; # Keys are scripts, values are how many locales use
|
|
# this script.
|
|
|
|
sub analyze_locale_name($) {
|
|
|
|
# Takes the input name of a locale and creates (and returns) a hash
|
|
# containing information about that locale
|
|
|
|
my %ret;
|
|
my $input_locale_name = shift;
|
|
|
|
my $old_locale = setlocale(LC_CTYPE);
|
|
|
|
# Often a locale has multiple aliases, and the base one is returned
|
|
# by setlocale() when called with an alias. The base is more likely to
|
|
# meet the XPG standards than the alias.
|
|
my $new_locale = setlocale(LC_CTYPE, $input_locale_name);
|
|
if (! $new_locale) {
|
|
diag "Unexpectedly can't setlocale(LC_CTYPE, $new_locale);"
|
|
. " \$!=$!, \$^E=$^E";
|
|
return;
|
|
}
|
|
|
|
$ret{locale_name} = $new_locale;
|
|
|
|
# XPG standard for locale names:
|
|
# language[_territory[.codeset]][@modifier]
|
|
# But, there are instances which violate this, where there is a codeset
|
|
# without a territory, so instead match:
|
|
# language[_territory][.codeset][@modifier]
|
|
$ret{locale_name} =~ / ^
|
|
( .+? ) # language
|
|
(?: _ ( .+? ) )? # territory
|
|
(?: \. ( .+? ) )? # codeset
|
|
(?: \@ ( .+ ) )? # modifier
|
|
$
|
|
/x;
|
|
|
|
$ret{language} = $1 // "";
|
|
$ret{territory} = $2 // "";
|
|
$ret{codeset} = $3 // "";
|
|
$ret{modifier} = $4 // "";
|
|
|
|
# Normalize all but 'territory' to lowercase
|
|
foreach my $key (qw(language codeset modifier)) {
|
|
$ret{$key} = lc $ret{$key};
|
|
}
|
|
|
|
# Often, the codeset is omitted from the locale name, but it is still
|
|
# discoverable (via langinfo() ) for the current locale on many platforms.
|
|
# We already have switched locales
|
|
use I18N::Langinfo qw(langinfo CODESET);
|
|
my $langinfo_codeset = lc langinfo(CODESET);
|
|
|
|
# Now can switch back to the locale current on entry to this sub
|
|
if (! setlocale(LC_CTYPE, $old_locale)) {
|
|
die "Unexpectedly can't restore locale to $old_locale from"
|
|
. " $new_locale; \$!=$!, \$^E=$^E";
|
|
}
|
|
|
|
# Normalize the codesets
|
|
foreach my $codeset_ref (\$langinfo_codeset, \$ret{codeset}) {
|
|
$$codeset_ref =~ s/\W//g;
|
|
$$codeset_ref =~ s/iso8859/8859/g;
|
|
$$codeset_ref =~ s/\b65001\b/utf8/; # Windows synonym
|
|
$$codeset_ref =~ s/\b646\b/$official_ascii_name/;
|
|
$$codeset_ref =~ s/\busascii\b/$official_ascii_name/;
|
|
}
|
|
|
|
# The langinfo codeset, if found, is considered more reliable than the one
|
|
# in the name. (This is because libc looks into the actual data
|
|
# definition.) So use it unconditionally when found. But note any
|
|
# discrepancy as an aid for improving this test.
|
|
if ($langinfo_codeset) {
|
|
if ($ret{codeset} && $ret{codeset} ne $langinfo_codeset) {
|
|
diag "In $ret{locale_name}, codeset from langinfo"
|
|
. " ($langinfo_codeset) doesn't match codeset in"
|
|
. " locale_name ($ret{codeset})";
|
|
}
|
|
$ret{codeset} = $langinfo_codeset;
|
|
}
|
|
|
|
$ret{is_utf8} = 0 + ($ret{codeset} eq 'utf8');
|
|
|
|
# If the '@' modifier is a known script, use it as the script.
|
|
if ( $ret{modifier}
|
|
and grep { $_ eq $ret{modifier} } values %lang_code_to_script)
|
|
{
|
|
$ret{script} = $ret{nominal_script} = $ret{modifier};
|
|
$ret{modifier} = "";
|
|
}
|
|
elsif ($ret{codeset} && ! $ret{is_utf8}) {
|
|
|
|
# The codeset determines the script being used, except if we don't
|
|
# have the codeset, or it is UTF-8 (which covers a multitude of
|
|
# scripts).
|
|
#
|
|
# We have hard-coded the scripts corresponding to a few of these
|
|
# non-UTF-8 codesets. See if this is one of them.
|
|
$ret{script} = $codeset_to_script{$ret{codeset}};
|
|
if ($ret{script}) {
|
|
|
|
# For these, the script is likely a combination of ASCII (from
|
|
# 0-127), and the script from (128-255). Reflect that in the name
|
|
# used (for distinguishing below)
|
|
$ret{script} .= '_' . $official_ascii_name;
|
|
}
|
|
elsif ($ret{codeset} =~ /^koi/) { # Another common set.
|
|
$ret{script} = "cyrillic_${official_ascii_name}";
|
|
}
|
|
else { # Here the codeset name is unknown to us. Just assume it
|
|
# means a whole new script. Add the language at the end of
|
|
# the name to further make it distinct
|
|
$ret{script} = $ret{codeset};
|
|
$ret{script} .= "_$ret{language}"
|
|
if $ret{codeset} !~ /$official_ascii_name/;
|
|
}
|
|
}
|
|
else { # Here, the codeset is unknown or is UTF-8.
|
|
|
|
# In these cases look up the script based on the language. The table
|
|
# is meant to be pretty complete, but omits the many scripts that are
|
|
# ASCII or Latin1. And it omits the fullnames of languages whose
|
|
# scripts are themselves. The grep below catches those. Defaulting
|
|
# to Latin means that a non-standard language name is considered to be
|
|
# latin -- maybe not the best outcome but what else is better?
|
|
$ret{script} = $lang_code_to_script{$ret{language}};
|
|
if (! $ret{script}) {
|
|
$ret{script} = (grep { $ret{language} eq $_ }
|
|
values %lang_code_to_script)
|
|
? $ret{language}
|
|
: 'latin';
|
|
}
|
|
}
|
|
|
|
# If we have @euro, and the script is ASCII or latin or latin1, change it
|
|
# into latin9, which is closer to what is going on. latin9 has a few
|
|
# other differences from latin1, but it's not worth creating a whole new
|
|
# script type that differs only in the currency symbol.
|
|
if ( ($ret{modifier} && $ret{modifier} eq 'euro')
|
|
&& $ret{script} =~ / ^ ($official_ascii_name | latin (_1)? ) $ /x)
|
|
{
|
|
$ret{script} = 'latin_9';
|
|
}
|
|
|
|
# Look up the priority of this script. All the non-listed ones have
|
|
# highest (0 or 1) priority. We arbitrarily make the ones higher
|
|
# priority (0) that aren't known to be half-ascii, simply because they
|
|
# might be entirely different than most locales.
|
|
$ret{priority} = $script_priorities{$ret{script}};
|
|
if (! $ret{priority}) {
|
|
$ret{priority} = ( $ret{script} ne $official_ascii_name
|
|
&& $ret{script} =~ $official_ascii_name)
|
|
? 0
|
|
: 1;
|
|
}
|
|
|
|
# Script names have been set up so that anything after an underscore is a
|
|
# modifier of the main script. We keep a counter of which occurence of
|
|
# this script this is. This is used along with the priority to order the
|
|
# locales so that the characters are as varied as possible.
|
|
my $script_root = ($ret{script} =~ s/_.*//r) . "_$ret{is_utf8}";
|
|
$ret{script_instance} = $script_instances{$script_root}++;
|
|
|
|
return \%ret;
|
|
}
|
|
|
|
# Prioritize locales that are most unlike the standard C/Latin1-ish ones.
|
|
# This is to minimize getting passes for tests on a category merely because
|
|
# they share many of the same characteristics as the locale of another
|
|
# category simultaneously in effect.
|
|
sub sort_locales ()
|
|
{
|
|
my $cmp = $a->{script_instance} <=> $b->{script_instance};
|
|
return $cmp if $cmp;
|
|
|
|
$cmp = $a->{priority} <=> $b->{priority};
|
|
return $cmp if $cmp;
|
|
|
|
$cmp = $a->{script} cmp $b->{script};
|
|
return $cmp if $cmp;
|
|
|
|
$cmp = $a->{modifier} cmp $b->{modifier};
|
|
return $cmp if $cmp;
|
|
|
|
$cmp = $a->{codeset} cmp $b->{codeset};
|
|
return $cmp if $cmp;
|
|
|
|
$cmp = $a->{territory} cmp $b->{territory};
|
|
return $cmp if $cmp;
|
|
|
|
return lc $a cmp lc $b;
|
|
}
|
|
|
|
# Find out extra info about each locale
|
|
my @cleaned_up_locales;
|
|
for my $locale (@locales) {
|
|
my $locale_struct = analyze_locale_name($locale);
|
|
|
|
next unless $locale_struct;
|
|
|
|
my $name = $locale_struct->{locale_name};
|
|
next if grep { $name eq $_->{locale_name} } @cleaned_up_locales;
|
|
|
|
push @cleaned_up_locales, $locale_struct;
|
|
}
|
|
|
|
@locales = @cleaned_up_locales;
|
|
|
|
# Without a proper codeset, we can't really know how to test. This should
|
|
# only happen on platforms that lack the ability to determine the codeset.
|
|
@locales = grep { $_->{codeset} ne "" } @locales;
|
|
|
|
# Sort into priority order.
|
|
@locales = sort sort_locales @locales;
|
|
|
|
# First test
|
|
SKIP: { # perl #127708
|
|
my $locale = $locales[0];
|
|
skip("No valid locale to test with", 1) if $locale->{codeset} eq
|
|
$official_ascii_name;
|
|
local $ENV{LC_MESSAGES} = $locale->{locale_name};
|
|
|
|
# We're going to try with all possible error numbers on this platform
|
|
my $error_count = keys(%!) + 1;
|
|
|
|
print fresh_perl("
|
|
use threads;
|
|
use strict;
|
|
use warnings;
|
|
use Time::HiRes qw(usleep);
|
|
|
|
my \$errnum = 1;
|
|
|
|
my \@threads = map +threads->create(sub {
|
|
usleep 0.1;
|
|
'threads'->yield();
|
|
|
|
for (1..5_000) {
|
|
\$errnum = (\$errnum + 1) % $error_count;
|
|
\$! = \$errnum;
|
|
|
|
# no-op to trigger stringification
|
|
next if \"\$!\" eq \"\";
|
|
}
|
|
}), (0..1);
|
|
\$_->join for splice \@threads;",
|
|
{}
|
|
);
|
|
|
|
pass("Didn't segfault");
|
|
}
|
|
|
|
# Second test setup
|
|
my %locale_name_to_object;
|
|
for my $locale (@locales) {
|
|
$locale_name_to_object{$locale->{locale_name}} = $locale;
|
|
}
|
|
|
|
sub sort_by_hashed_locale {
|
|
local $a = $locale_name_to_object{$a};
|
|
local $b = $locale_name_to_object{$b};
|
|
|
|
return sort_locales;
|
|
}
|
|
|
|
sub min {
|
|
my ($a, $b) = @_;
|
|
return $a if $a <= $b;
|
|
return $b;
|
|
}
|
|
|
|
# Smokes have shown this to be about the maximum numbers some platforms can
|
|
# handle. khw has tried 500 threads/1000 iterations on Linux
|
|
my $thread_count = 15;
|
|
my $iterations = 100;
|
|
|
|
my $alarm_clock = (1 * 10 * 60); # A long time, just to prevent hanging
|
|
|
|
# Chunk the iterations, so that every so often the test comes up for air.
|
|
my $iterations_per_test_set = min(30, int($iterations / 5));
|
|
$iterations_per_test_set = 1 if $iterations_per_test_set == 0;
|
|
|
|
# Sometimes the test calls setlocale() for each individual locale category.
|
|
# But every this many threads, it will be called just once, using LC_ALL to
|
|
# specify the categories. This way both setting individual categories and
|
|
# LC_ALL get tested. But skip this nicety on platforms where we are restricted from
|
|
# using all the available categories, as it would make the code more complex
|
|
# for not that much gain.
|
|
my @platform_categories = platform_locale_categories();
|
|
my $lc_all_frequency = scalar @platform_categories == scalar @valid_categories
|
|
? 3
|
|
: -1;
|
|
|
|
# To avoid things getting too big; skip tests whose results are larger than
|
|
# this many characters.
|
|
my $max_result_length = 10000;
|
|
|
|
# Estimate as to how long in seconds to allow a thread to be ready to roll
|
|
# after creation, so as to try to get all the threads to start as
|
|
# simultaneously as possible
|
|
my $per_thread_startup = .18;
|
|
|
|
# For use in experimentally tuning the above value
|
|
my $die_on_negative_sleep = 1;
|
|
|
|
# We don't need to test every possible errno, but you could change this to do
|
|
# so by setting it to negative
|
|
my $max_message_catalog_entries = 10;
|
|
|
|
# December 18, 1987
|
|
my $strftime_args = "'%c', 0, 0, , 12, 18, 11, 87";
|
|
|
|
my %distincts; # The distinct 'operation => result' cases
|
|
my %op_counts; # So we can bail early if more test cases than threads
|
|
my $separator = '____'; # The operation and result are often melded into a
|
|
# string separated by this.
|
|
|
|
sub pack_op_result($$) {
|
|
my ($op, $result) = @_;
|
|
return $op . $separator
|
|
. (0 + utf8::is_utf8($op)) . $separator
|
|
. $result . $separator
|
|
. (0 + utf8::is_utf8($result));
|
|
}
|
|
|
|
sub fixup_utf8ness($$) {
|
|
my ($operand, $utf8ness) = @_;
|
|
|
|
# Make sure $operand is encoded properly
|
|
|
|
if ($utf8ness + 0 != 0 + utf8::is_utf8($$operand)) {
|
|
if ($utf8ness) {
|
|
utf8::upgrade($$operand);
|
|
}
|
|
else {
|
|
utf8::downgrade($$operand);
|
|
}
|
|
}
|
|
}
|
|
|
|
sub unpack_op_result($) {
|
|
my $op_result = shift;
|
|
|
|
my ($op, $op_utf8ness, $result, $result_utf8ness) =
|
|
split $separator, $op_result;
|
|
fixup_utf8ness(\$op, $op_utf8ness);
|
|
fixup_utf8ness(\$result, $result_utf8ness);
|
|
|
|
return ($op, $result);
|
|
}
|
|
|
|
sub add_trials($$;$)
|
|
{
|
|
# Add a test case for category $1.
|
|
# $2 is the test case operation to perform
|
|
# $3 is a constraint, optional.
|
|
|
|
my $category_name = shift;
|
|
my $input_op = shift; # The eval string to perform
|
|
my $locale_constraint = shift // ""; # If defined, the test will be
|
|
# created only for locales that
|
|
# match this
|
|
LOCALE:
|
|
foreach my $locale (@locales) {
|
|
my $locale_name = $locale->{locale_name};
|
|
my $op = $input_op;
|
|
|
|
# All categories should be set to the same locale to make sure
|
|
# this test gets the valid results.
|
|
next unless setlocale($LC_ALL, $locale_name);
|
|
|
|
# As of NetBSD 10, it doesn't implement LC_COLLATE, and setting that
|
|
# category to anything but C or POSIX fails. But setting LC_ALL to
|
|
# other locales (as we just did) returns success, while leaving
|
|
# LC_COLLATE untouched. Therefore, also set the category individually
|
|
# to catch such things. This problem may not be confined to NetBSD.
|
|
# This also works if the platform lacks LC_ALL. We at least set
|
|
# LC_CTYPE (via '$LC_ALL' above) besides the category.
|
|
next unless setlocale($map_category_name_to_number{$category_name},
|
|
$locale_name);
|
|
|
|
# Use a placeholder if this test requires a particular constraint,
|
|
# which isn't met in this case.
|
|
if ($locale_constraint) {
|
|
if ($locale_constraint eq 'utf8_only') {
|
|
next if ! $locale->{is_utf8};
|
|
}
|
|
elsif ($locale_constraint eq 'a<b') {
|
|
my $result = eval "use locale; 'a' lt 'B'";
|
|
die "$category_name: '$op (a lt B)': $@" if $@;
|
|
next unless $result;
|
|
}
|
|
else {
|
|
die "Only accepted locale constraints are 'utf8_only' and 'a<b'"
|
|
}
|
|
}
|
|
|
|
# Calculate what the expected value of the test should be. We're
|
|
# doing this here in the main thread and with all the locales set to
|
|
# be the same thing. The test will be that we should get this value
|
|
# under stress, with each thread using different locales for each
|
|
# category, and multiple threads simultaneously executing with
|
|
# disparate locales
|
|
my $eval_string = ($op) ? "use locale; $op;" : "";
|
|
my $result = eval $eval_string;
|
|
die "$category_name: '$op': $@" if $@;
|
|
if (! defined $result) {
|
|
if ($debug) {
|
|
print STDERR __FILE__, ": ", __LINE__,
|
|
": Undefined result for $locale_name",
|
|
" $category_name: '$op'\n";
|
|
}
|
|
next;
|
|
}
|
|
elsif ($debug > 1) {
|
|
print STDERR "\n", __FILE__, ": ", __LINE__, ": $category_name:",
|
|
" $locale_name: Op = ", Dumper($op), "; Returned ";
|
|
Dump $result;
|
|
}
|
|
if (length $result > $max_result_length) {
|
|
diag("For $locale_name, '$op', result is too long; skipped");
|
|
next;
|
|
}
|
|
|
|
# It seems best to not include tests with mojibake results, which here
|
|
# is checked for by two question marks in a row. (strxfrm is excluded
|
|
# from this restriction, as the result is really binary, so '??' could
|
|
# and does come up, not meaning mojibake.) A concrete example of this
|
|
# is in Mingw the locale Yi_China.1252. CP 1252 is for a Latin
|
|
# script; just about anything from an East Asian script is bound to
|
|
# fail. It makes no sense to have this locale, but it exists.
|
|
if ($eval_string !~ /xfrm/ && $result =~ /\?\?/) {
|
|
if ($debug) {
|
|
print STDERR __FILE__, ": ", __LINE__,
|
|
" For $locale_name, op=$op, result has mojibake: $result\n";
|
|
}
|
|
|
|
next;
|
|
}
|
|
|
|
# Some systems are buggy in that setlocale() gives non-deterministic
|
|
# results for some locales. Here we try to exclude those from our
|
|
# test by trying the setlocale this many times to see if it varies:
|
|
my $deterministic_trial_count = 5;
|
|
|
|
# To do this, we set the locale to an 'alternate' locale between
|
|
# trials. This defeats any attempt by the implementation to skip the
|
|
# setlocale if it is already in said locale.
|
|
my $alternate;
|
|
my @alternate;
|
|
|
|
# If possible, the alternate is chosen to be of the opposite UTF8ness,
|
|
# so as to reset internal states about that.
|
|
if (! $utf8_locales_ref || ! $utf8_locales_ref->@*) {
|
|
|
|
# If no UTF-8 locales, must choose one that is non-UTF-8.
|
|
@alternate = grep { $_ ne $locale_name } $non_utf8_locales_ref->@*;
|
|
}
|
|
elsif (! $non_utf8_locales_ref || ! $non_utf8_locales_ref->@*) {
|
|
|
|
# If no non-UTF-8 locales, must choose one that is UTF-8.
|
|
@alternate = grep { $_ ne $locale_name } $utf8_locales_ref->@*;
|
|
}
|
|
elsif (grep { $_ eq $locale_name } $utf8_locales_ref->@*) {
|
|
@alternate = $non_utf8_locales_ref->@*;
|
|
}
|
|
else {
|
|
@alternate = $utf8_locales_ref->@*;
|
|
}
|
|
|
|
# Now do the trials. For each, we choose the next alternate on the
|
|
# list, rotating the list so the following iteration will choose a
|
|
# different alternate.
|
|
for my $i (1 .. $deterministic_trial_count - 1) {
|
|
my $other = shift @alternate;
|
|
push @alternate, $other;
|
|
|
|
# Run the test on the alternate locale
|
|
if (! setlocale($LC_ALL, $other)) {
|
|
if ( $LC_ALL_string eq 'LC_ALL'
|
|
|| ! setlocale($map_category_name_to_number{$category_name},
|
|
$other))
|
|
{
|
|
die "Unexpectedly can't set locale to $other:"
|
|
. " \$!=$!, \$^E=$^E";
|
|
}
|
|
}
|
|
|
|
eval $eval_string;
|
|
|
|
# Then run it on the one we are hoping to test
|
|
if (! setlocale($LC_ALL, $locale_name)) {
|
|
if ( $LC_ALL_string eq 'LC_ALL'
|
|
|| ! setlocale($map_category_name_to_number{$category_name},
|
|
$locale_name))
|
|
{
|
|
die "Unexpectedly can't set locale to $locale_name from "
|
|
. setlocale($LC_ALL)
|
|
. "; \$!=$!, \$^E=$^E";
|
|
}
|
|
}
|
|
|
|
my $got = eval $eval_string;
|
|
next if $got eq $result
|
|
&& utf8::is_utf8($got) == utf8::is_utf8($result);
|
|
|
|
# If the result varied from the expected value, this is a
|
|
# non-deterministic locale, so, don't test it.
|
|
diag("For '$eval_string',\nresults in iteration $i differed from"
|
|
. " the original\ngot");
|
|
Dump($got);
|
|
diag("expected");
|
|
Dump($result);
|
|
next LOCALE;
|
|
}
|
|
|
|
# Here, the setlocale for this locale appears deterministic. Use it.
|
|
my $op_result = pack_op_result($op, $result);
|
|
push $distincts{$category_name}{$op_result}{locales}->@*, $locale_name;
|
|
# No point in looking beyond this if we already have all the tests we
|
|
# need. Note this assumes that the same op isn't used in two
|
|
# categories.
|
|
if (defined $op_counts{$op} && $op_counts{$op} >= $thread_count)
|
|
{
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
|
|
use Config;
|
|
|
|
# Figure out from config how to represent disparate LC_ALL
|
|
my @valid_category_numbers = sort { $a <=> $b }
|
|
map { $map_category_name_to_number{$_} } @valid_categories;
|
|
|
|
my $use_name_value_pairs = defined $Config{d_perl_lc_all_uses_name_value_pairs};
|
|
my $lc_all_separator = ($use_name_value_pairs)
|
|
? ";"
|
|
: $Config{perl_lc_all_separator} =~ s/"//gr;
|
|
my @position_to_category_number;
|
|
if (! $use_name_value_pairs) {
|
|
my $positions = $Config{perl_lc_all_category_positions_init} =~ s/[{}]//gr;
|
|
$positions =~ s/,//g;
|
|
$positions =~ s/^ +//;
|
|
$positions =~ s/ +$//;
|
|
@position_to_category_number = split / \s+ /x, $positions
|
|
}
|
|
|
|
sub get_next_category() {
|
|
use feature 'state';
|
|
state $index;
|
|
|
|
# Called to rotate all the legal locale categories
|
|
|
|
my $which = ($use_name_value_pairs)
|
|
? \@valid_category_numbers
|
|
: \@position_to_category_number;
|
|
|
|
$index = -1 unless defined $index;
|
|
$index++;
|
|
|
|
if (! defined $which->[$index]) {
|
|
undef $index;
|
|
return;
|
|
}
|
|
|
|
my $category_number = $which->[$index];
|
|
return $category_number if $category_number != $LC_ALL;
|
|
|
|
# If this was LC_ALL, the next one won't be
|
|
return &get_next_category();
|
|
}
|
|
|
|
SKIP: {
|
|
skip("Unsafe locale threads", 1) unless ${^SAFE_LOCALES};
|
|
|
|
# The second test is several threads nearly simulataneously executing
|
|
# locale-sensitive operations with the categories set to disparate
|
|
# locales. This catches cases where the results of a given category is
|
|
# related to what the locale is of another category. (As an example, this
|
|
# test showed that some platforms require LC_CTYPE to be the same as
|
|
# LC_COLLATION, and/or LC_MESSAGES for proper results, so that Perl had to
|
|
# change to bring these into congruence under the hood). And it also
|
|
# catches where there is interference between multiple threads.
|
|
#
|
|
# This test tries to exercise every underlying locale-dependent operation
|
|
# available in Perl. It doesn't test every use of the operation, but
|
|
# includes some Perl construct that uses each. For example, it tests lc
|
|
# but not lcfirst. That would be redundant for this test; it wants to
|
|
# know if lowercasing works under threads and locales. But if the
|
|
# implementations were disjoint at the time this test was written, it
|
|
# would try each implementation. So, various things in the POSIX module
|
|
# have separate tests from the ones in core.
|
|
#
|
|
# For each such underlying locale-dependent operation, a Perl-visible
|
|
# construct is chosen that uses it. And a typical input or set of inputs
|
|
# is passed to that and the results are noted for every available locale
|
|
# on the platform. Many locales will have identical results, so the
|
|
# duplicates are stored separately.
|
|
#
|
|
# There will be N simultaneous threads. Each thread is configured to set
|
|
# a locale for each category, to run operations whose results depend on
|
|
# that locale, then check that the result matches the expected value, and
|
|
# to immediately repeat some largish number of iterations. The goal is to
|
|
# see if the locales on each thread are truly independent of those on the
|
|
# other threads.
|
|
#
|
|
# To that end, the locales are chosen so that the results differ from
|
|
# every other locale. Otherwise, the thread results wouldn't be truly
|
|
# independent. But if there are more threads than there are distinct
|
|
# results, duplicates are used to fill up what would otherwise be empty
|
|
# slots. That is the best we can do on those platforms.
|
|
#
|
|
# Having lots of locales to continually switch between stresses things so
|
|
# as to find potential segfaults where locale changing isn't really thread
|
|
# safe.
|
|
|
|
# There is a bug in older Windows runtimes in which locales in CP1252 and
|
|
# similar code pages whose names aren't entirely ASCII aren't recognized
|
|
# by later setlocales. Some names that are all ASCII are synonyms for
|
|
# such names. Weed those out by doing a setlocale of the original name,
|
|
# and then a setlocale of the resulting one. Discard locales which have
|
|
# any unacceptable name
|
|
if (${^O} eq "MSWin32" && $Config{'libc'} !~ /ucrt/) {
|
|
@locales = grep {
|
|
my $locale_name = $_->{locale_name};
|
|
my $underlying_name = setlocale(&LC_CTYPE, $locale_name);
|
|
|
|
# Defeat any attempt to skip the setlocale if the same as current,
|
|
# by switching to a locale very unlikey to be the current one.
|
|
setlocale($LC_ALL, "Albanian");
|
|
|
|
defined($underlying_name) && setlocale(&LC_CTYPE, $underlying_name)
|
|
} @locales;
|
|
}
|
|
|
|
# Create a hash of the errnos:
|
|
# "1" => "Operation\\ not\\ permitted",
|
|
# "2" => "No\\ such\\ file\\ or\\ directory",
|
|
# etc.
|
|
my %msg_catalog;
|
|
foreach my $error (sort keys %!) {
|
|
my $number = eval "Errno::$error";
|
|
$! = $number;
|
|
my $description = "$!";
|
|
next unless "$description";
|
|
$msg_catalog{$number} = quotemeta "$description";
|
|
}
|
|
|
|
# Then just the errnos.
|
|
my @msg_catalog = sort { $a <=> $b } keys %msg_catalog;
|
|
|
|
# Remove the excess ones.
|
|
splice @msg_catalog, $max_message_catalog_entries
|
|
if $max_message_catalog_entries >= 0;
|
|
my $msg_catalog = join ',', @msg_catalog;
|
|
|
|
eval { my $discard = POSIX::localeconv()->{currency_symbol}; };
|
|
my $has_localeconv = $@ eq "";
|
|
|
|
# Now go through and create tests for each locale category on the system.
|
|
# These tests were determined by grepping through the code base for
|
|
# locale-sensitive operations, and then figuring out something to exercise
|
|
# them.
|
|
foreach my $category (@valid_categories) {
|
|
no warnings 'uninitialized';
|
|
|
|
next if $category eq 'LC_ALL'; # Tested below as a combination of the
|
|
# individual categories
|
|
if ($category eq 'LC_COLLATE') {
|
|
add_trials('LC_COLLATE',
|
|
# 'reverse' causes it to be definitely out of order for
|
|
# the 'sort' to correct
|
|
'quotemeta join "", sort reverse map { chr } (1..255)');
|
|
|
|
# We pass an re to exclude testing locales that don't necessarily
|
|
# have a lt b.
|
|
add_trials('LC_COLLATE', '"a" lt "B"', 'a<b');
|
|
add_trials('LC_COLLATE', 'my $a = "a"; my $b = "B";'
|
|
. ' POSIX::strcoll($a, $b) < 0;',
|
|
'a<b');
|
|
|
|
# Doesn't include NUL because our memcollxfrm implementation of it
|
|
# isn't perfect
|
|
add_trials('LC_COLLATE', 'my $string = quotemeta join "",'
|
|
. ' map { chr } (1..255);'
|
|
. ' POSIX::strxfrm($string)');
|
|
next;
|
|
}
|
|
|
|
if ($category eq 'LC_CTYPE') {
|
|
add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta lc'
|
|
. ' join "" , map { chr } (0..255)');
|
|
add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta uc'
|
|
. ' join "", map { chr } (0..255)');
|
|
add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta CORE::fc'
|
|
. ' join "", map { chr } (0..255)');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/\d/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/\s/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/\w/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:alpha:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:alnum:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:ascii:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:blank:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:cntrl:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:graph:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:lower:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:print:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:punct:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:upper:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'no warnings "locale";'
|
|
. ' my $string = join "", map { chr } 0..255;'
|
|
. ' $string =~ s|(.)|$1=~/[[:xdigit:]]/?1:0|gers');
|
|
add_trials('LC_CTYPE', 'use I18N::Langinfo qw(langinfo CODESET);'
|
|
. ' no warnings "uninitialized";'
|
|
. ' langinfo(CODESET);');
|
|
|
|
# In the multibyte functions, the non-reentrant ones can't be made
|
|
# thread safe
|
|
if ($Config{'d_mbrlen'} eq 'define') {
|
|
add_trials('LC_CTYPE', 'my $string = chr 0x100;'
|
|
. ' utf8::encode($string);'
|
|
. ' no warnings "uninitialized";'
|
|
. ' POSIX::mblen(undef);'
|
|
. ' POSIX::mblen($string)',
|
|
'utf8_only');
|
|
}
|
|
if ($Config{'d_mbrtowc'} eq 'define') {
|
|
add_trials('LC_CTYPE', 'my $value; my $str = "\x{100}";'
|
|
. ' utf8::encode($str);'
|
|
. ' no warnings "uninitialized";'
|
|
. ' POSIX::mbtowc(undef, undef);'
|
|
. ' POSIX::mbtowc($value, $str); $value;',
|
|
'utf8_only');
|
|
}
|
|
if ($Config{'d_wcrtomb'} eq 'define') {
|
|
add_trials('LC_CTYPE', 'my $value;'
|
|
. ' no warnings "uninitialized";'
|
|
. ' POSIX::wctomb(undef, undef);'
|
|
. ' POSIX::wctomb($value, 0xFF);'
|
|
. ' $value;',
|
|
'utf8_only');
|
|
}
|
|
|
|
add_trials('LC_CTYPE',
|
|
'no warnings "locale";'
|
|
. ' my $uc = CORE::uc join "", map { chr } (0..255);'
|
|
. ' my $fc = quotemeta CORE::fc $uc;'
|
|
. ' $uc =~ / \A $fc \z /xi;');
|
|
next;
|
|
}
|
|
|
|
if ($category eq 'LC_MESSAGES') {
|
|
add_trials('LC_MESSAGES',
|
|
"join \"\n\", map { \$! = \$_; \"\$!\" } ($msg_catalog)");
|
|
add_trials('LC_MESSAGES',
|
|
'use I18N::Langinfo qw(langinfo YESSTR NOSTR YESEXPR NOEXPR);'
|
|
. ' no warnings "uninitialized";'
|
|
. ' join ",",'
|
|
. ' map { langinfo($_) } YESSTR, NOSTR, YESEXPR, NOEXPR;');
|
|
next;
|
|
}
|
|
|
|
if ($category eq 'LC_MONETARY') {
|
|
if ($has_localeconv) {
|
|
add_trials('LC_MONETARY', "localeconv()->{currency_symbol}");
|
|
}
|
|
add_trials('LC_MONETARY',
|
|
'use I18N::Langinfo qw(langinfo CRNCYSTR);'
|
|
. ' no warnings "uninitialized";'
|
|
. ' join "|", map { langinfo($_) } CRNCYSTR;');
|
|
next;
|
|
}
|
|
|
|
if ($category eq 'LC_NUMERIC') {
|
|
if ($has_localeconv) {
|
|
add_trials('LC_NUMERIC', "no warnings; 'uninitialised';"
|
|
. " join '|',"
|
|
. " localeconv()->{decimal_point},"
|
|
. " localeconv()->{thousands_sep}");
|
|
}
|
|
add_trials('LC_NUMERIC',
|
|
'use I18N::Langinfo qw(langinfo RADIXCHAR THOUSEP);'
|
|
. ' no warnings "uninitialized";'
|
|
. ' join "|", map { langinfo($_) } RADIXCHAR, THOUSEP;');
|
|
|
|
# Use a variable to avoid runtime bugs being hidden by constant
|
|
# folding
|
|
add_trials('LC_NUMERIC', 'my $in = 4.2; sprintf("%g", $in)');
|
|
next;
|
|
}
|
|
|
|
if ($category eq 'LC_TIME') {
|
|
add_trials('LC_TIME', "POSIX::strftime($strftime_args)");
|
|
add_trials('LC_TIME', <<~'END_OF_CODE');
|
|
use I18N::Langinfo qw(langinfo
|
|
ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
|
|
ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
|
|
ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
|
|
DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
|
|
MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
|
|
MON_7 MON_8 MON_9 MON_10 MON_11 MON_12
|
|
D_FMT D_T_FMT T_FMT);
|
|
no warnings "uninitialized";
|
|
join "|",
|
|
map { langinfo($_) }
|
|
ABDAY_1,ABDAY_2,ABDAY_3,ABDAY_4,ABDAY_5,
|
|
ABDAY_6,ABDAY_7,
|
|
ABMON_1,ABMON_2,ABMON_3,ABMON_4,ABMON_5,
|
|
ABMON_6, ABMON_7,ABMON_8,ABMON_9,ABMON_10,
|
|
ABMON_11,ABMON_12,
|
|
DAY_1,DAY_2,DAY_3,DAY_4,DAY_5,DAY_6,DAY_7,
|
|
MON_1,MON_2,MON_3,MON_4,MON_5,MON_6, MON_7,
|
|
MON_8,MON_9,MON_10,MON_11,MON_12,
|
|
D_FMT,D_T_FMT,T_FMT;
|
|
END_OF_CODE
|
|
next;
|
|
}
|
|
} # End of creating test cases.
|
|
|
|
|
|
# Now analyze the test cases
|
|
my %all_tests;
|
|
foreach my $category (keys %distincts) {
|
|
my %results;
|
|
my %distinct_results_count;
|
|
|
|
# Find just the distinct test operations; sort for repeatibility
|
|
my %distinct_ops;
|
|
for my $op_result (sort keys $distincts{$category}->%*) {
|
|
my ($op, $result) = unpack_op_result($op_result);
|
|
|
|
$distinct_ops{$op}++;
|
|
push $results{$op}->@*, $result;
|
|
$distinct_results_count{$result} +=
|
|
scalar $distincts{$category}{$op_result}{locales}->@*;
|
|
}
|
|
|
|
# And get a sorted list of all the test operations
|
|
my @ops = sort keys %distinct_ops;
|
|
|
|
sub gen_combinations {
|
|
|
|
# Generate all the non-empty combinations of operations and
|
|
# results (for the current category) possible on this platform.
|
|
# That is, if a category has N operations, it will generate a list
|
|
# of entries. Each entry will itself have N elements, one for
|
|
# each operation, and when all the entries are considered
|
|
# together, every possible outcome is represented.
|
|
|
|
my $op_ref = shift; # Reference to list of operations
|
|
my $results_ref = shift; # Reference to hash; key is operation;
|
|
# value is an array of all possible
|
|
# outcomes of this operation.
|
|
my $distincts_ref = shift; # Reference to %distincts of this
|
|
# category
|
|
|
|
# Get the first operation on the list
|
|
my $op = shift $op_ref->@*;
|
|
|
|
# The return starts out as a list of hashes of all possible
|
|
# outcomes for executing 'op'. Each hash has two keys:
|
|
# 'op_results' is an array of one element: 'op => result',
|
|
# packed into a string.
|
|
# 'locales' is an array of all the locales which have the
|
|
# same result for 'op'
|
|
my @return;
|
|
foreach my $result ($results_ref->{$op}->@*) {
|
|
my $op_result = pack_op_result($op, $result);
|
|
push @return, {
|
|
op_results => [ $op_result ],
|
|
locales => $distincts_ref->{$op_result}{locales},
|
|
};
|
|
}
|
|
|
|
# If this is the final element of the list, we are done.
|
|
return (\@return) unless $op_ref->@*;
|
|
|
|
# Otherwise recurse to generate the combinations for the remainder
|
|
# of the list.
|
|
my $recurse_return = &gen_combinations($op_ref,
|
|
$results_ref,
|
|
$distincts_ref);
|
|
# Now we have to generate the combinations of the current item
|
|
# with the ones returned by the recursion. Each element of the
|
|
# current item is combined with each element of the recursed.
|
|
my @combined;
|
|
foreach my $this (@return) {
|
|
my @this_locales = $this->{locales}->@*;
|
|
foreach my $recursed ($recurse_return->@*) {
|
|
my @recursed_locales = $recursed->{locales}->@*;
|
|
|
|
# @this_locales is a list of locales this op => result is
|
|
# valid for. @recursed_locales is similarly a list of the
|
|
# valid ones for the recursed return. Their intersection
|
|
# is a list of the locales valid for this combination.
|
|
my %seen;
|
|
$seen{$_}++ foreach @this_locales, @recursed_locales;
|
|
my @intersection = grep $seen{$_} == 2, keys %seen;
|
|
|
|
# An alternative intersection algorithm:
|
|
# my (%set1, %set2);
|
|
# @set1{@list1} = ();
|
|
# @set2{@list2} = ();
|
|
# my @intersection = grep exists $set1{$_}, keys %set2;
|
|
|
|
# If the intersection is empty, this combination can't
|
|
# actually happen on this platform.
|
|
next unless @intersection;
|
|
|
|
# Append the recursed list to the current list to form the
|
|
# combined list.
|
|
my @combined_result = $this->{op_results}->@*;
|
|
push @combined_result, $recursed->{op_results}->@*;
|
|
# And create the hash for the combined result, including
|
|
# the locales it is valid for
|
|
push @combined, {
|
|
op_results => \@combined_result,
|
|
locales => \@intersection,
|
|
};
|
|
}
|
|
}
|
|
|
|
return \@combined;
|
|
} # End of gen_combinations() definition
|
|
|
|
# The result of calling gen_combinations() will be an array of hashes.
|
|
#
|
|
# The main value in each hash is an array (whose key is 'op_results')
|
|
# containing all the tests for this category for a thread. If there
|
|
# were N calls to 'add_trial' for this category, there will be 'N'
|
|
# elements in the array. Each element is a string packed with the
|
|
# operation to eval in a thread and the operation's expected result.
|
|
#
|
|
# The other data structure in each hash is an array with the key
|
|
# 'locales'. That array is a list of every locale which yields the
|
|
# identical results in 'op_results'.
|
|
#
|
|
# Effectively, each hash gives all the tests for this category for a
|
|
# thread. The total array of hashes gives the complete list of
|
|
# distinct tests possible on this system. So later, a thread will
|
|
# pluck the next available one from the array..
|
|
my $combinations_ref = gen_combinations(\@ops, \%results,
|
|
$distincts{$category});
|
|
|
|
# Fix up the entries ...
|
|
foreach my $test ($combinations_ref->@*) {
|
|
|
|
# Sort the locale names; this makes it work for later comparisons
|
|
# to look at just the first element of each list.
|
|
$test->{locales}->@* =
|
|
sort sort_by_hashed_locale $test->{locales}->@*;
|
|
|
|
# And for each test, calculate and store how many locales have the
|
|
# same result (saves recomputation later in a sort). This adds
|
|
# another data structure to each hash in the main array.
|
|
my @individual_tests = $test->{op_results}->@*;
|
|
my @in_common_locale_counts;
|
|
foreach my $this_test (@individual_tests) {
|
|
|
|
# Each test came from %distincts, and there we have stored the
|
|
# list of all locales that yield the same result
|
|
push @in_common_locale_counts,
|
|
scalar $distincts{$category}{$this_test}{locales}->@*;
|
|
}
|
|
push $test->{in_common_locale_counts}->@*, @in_common_locale_counts;
|
|
}
|
|
|
|
# Make a copy
|
|
my @cat_tests = $combinations_ref->@*;
|
|
|
|
# This sorts the test cases so that the ones with the least overlap
|
|
# with other cases are first.
|
|
sub sort_test_order {
|
|
my $a_tests_count = scalar $a->{in_common_locale_counts}->@*;
|
|
my $b_tests_count = scalar $b->{in_common_locale_counts}->@*;
|
|
my $tests_count = min($a_tests_count, $b_tests_count);
|
|
|
|
# Choose the one that is most distinctive (least overlap); that is
|
|
# the one that has the most tests whose results are not shared by
|
|
# any other locale.
|
|
my $a_nondistincts = 0;
|
|
my $b_nondistincts = 0;
|
|
for my $i (0 .. $tests_count - 1) {
|
|
$a_nondistincts += ($a->{in_common_locale_counts}[$i] != 1);
|
|
$b_nondistincts += ($b->{in_common_locale_counts}[$i] != 1);
|
|
}
|
|
|
|
my $cmp = $a_nondistincts <=> $b_nondistincts;
|
|
return $cmp if $cmp;
|
|
|
|
# If they have the same number of those, choose the one with the
|
|
# fewest total number of locales that have the same result
|
|
my $a_count = 0;
|
|
my $b_count = 0;
|
|
for my $i (0 .. $tests_count - 1) {
|
|
$a_count += $a->{in_common_locale_counts}[$i];
|
|
$b_count += $b->{in_common_locale_counts}[$i];
|
|
}
|
|
|
|
$cmp = $a_count <=> $b_count;
|
|
return $cmp if $cmp;
|
|
|
|
# If that still doesn't yield a winner, use the general sort order.
|
|
local $a = $a->{locales}[0];
|
|
local $b = $b->{locales}[0];
|
|
return sort_by_hashed_locale;
|
|
}
|
|
|
|
# Actually perform the sort.
|
|
@cat_tests = sort sort_test_order @cat_tests;
|
|
|
|
# This category will now have all the distinct tests possible for it
|
|
# on this platform, with the first test being the one with the least
|
|
# overlap with other test cases
|
|
push $all_tests{$category}->@*, @cat_tests;
|
|
} # End of loop through the categories creating and sorting the test
|
|
# cases
|
|
|
|
my %thread_already_used_locales;
|
|
|
|
# Now generate the tests for each thread.
|
|
my @tests_by_thread;
|
|
for my $i (0 .. $thread_count - 1) {
|
|
foreach my $category (sort keys %all_tests) {
|
|
my $skipped = 0; # Used below to not loop infinitely
|
|
|
|
# Get the next test case
|
|
NEXT_CANDIDATE:
|
|
my $candidate = shift $all_tests{$category}->@*;
|
|
|
|
my $locale_name = $candidate->{locales}[0];
|
|
|
|
# Avoid, if possible, using the same locale name twice (for
|
|
# different categories) in the same thread.
|
|
if (defined $thread_already_used_locales{$locale_name =~ s/\W.*//r})
|
|
{
|
|
# Look through the synonyms of this locale for an
|
|
# as-yet-unused one
|
|
for my $j (1 .. $candidate->{locales}->@* - 1) {
|
|
my $synonym = $candidate->{locales}[$j];
|
|
next if defined $thread_already_used_locales{$synonym =~
|
|
s/\W.*//r};
|
|
$locale_name = $synonym;
|
|
goto found_synonym;
|
|
}
|
|
|
|
# Here, no synonym was found. If we haven't cycled through
|
|
# all the possible tests, try another (putting this one at the
|
|
# end as a last resort in the future).
|
|
$skipped++;
|
|
if ($skipped < scalar $all_tests{$category}->@*) {
|
|
push $all_tests{$category}->@*, $candidate;
|
|
goto NEXT_CANDIDATE;
|
|
}
|
|
|
|
# Here no synonym was found, this test has already been used,
|
|
# but there are no unused ones, so have to re-use it.
|
|
|
|
found_synonym:
|
|
}
|
|
|
|
# Here, we have found a test case. The thread needs to know what
|
|
# locale to use,
|
|
$tests_by_thread[$i]->{$category}{locale_name} = $locale_name;
|
|
|
|
# And it needs to know each test to run, and the expected result.
|
|
my @cases;
|
|
for my $j (0 .. $candidate->{op_results}->@* - 1) {
|
|
my ($op, $result) =
|
|
unpack_op_result($candidate->{op_results}[$j]);
|
|
push @cases, { op => $op, expected => $result };
|
|
}
|
|
push $tests_by_thread[$i]->{$category}{locale_tests}->@*, @cases;
|
|
|
|
# Done with this category in this thread. Setup for subsequent
|
|
# categories in this thread, and subsequent threads.
|
|
#
|
|
# It's best to not have two categories in a thread use the same
|
|
# locale. Save this locale name so that later iterations handling
|
|
# other categories can avoid using it, if possible.
|
|
$thread_already_used_locales{$locale_name =~ s/\W.*//r} = 1;
|
|
|
|
# In pursuit of using as many different locales as possible, the
|
|
# first shall be last in line next time, and eventually the last
|
|
# shall be first
|
|
push $candidate->{locales}->@*, shift $candidate->{locales}->@*;
|
|
|
|
# Similarly, this test case is added back at the end of the list,
|
|
# so will be used only as a last resort in the next thread, and as
|
|
# the penultimate resort in the thread following that, etc. as the
|
|
# test cases are cycled through.
|
|
push $all_tests{$category}->@*, $candidate;
|
|
} # End of looping through the categories for this thread
|
|
} # End of generating all threads
|
|
|
|
# Now reformat the tests to a form convenient for the actual test file
|
|
# script to use; minimizing the amount of ancillary work it needs to do.
|
|
my @cooked_tests;
|
|
for my $i (0 .. $#tests_by_thread) {
|
|
|
|
my $this_tests = $tests_by_thread[$i];
|
|
my @this_cooked_tests;
|
|
my (@this_categories, @this_locales); # Parallel arrays
|
|
|
|
# Every so often we use LC_ALL instead of individual locales, provided
|
|
# it is available on the platform
|
|
if ( ($i % $lc_all_frequency == $lc_all_frequency - 1)
|
|
&& $LC_ALL_string eq 'LC_ALL')
|
|
{
|
|
my $lc_all= "";
|
|
my $category_number;
|
|
|
|
# Compute the LC_ALL string for the syntax accepted by this
|
|
# platform from the locale each category is to be set to.
|
|
while (defined($category_number = get_next_category())) {
|
|
my $category_name =
|
|
$map_category_number_to_name{$category_number};
|
|
my $locale = $this_tests->{$category_name}{locale_name};
|
|
$locale = "C" unless defined $locale;
|
|
$category_name =~ s/\@/\\@/g;
|
|
|
|
$lc_all .= $lc_all_separator if $lc_all ne "";
|
|
|
|
if ($use_name_value_pairs) {
|
|
$lc_all .= $category_name . "=";
|
|
}
|
|
|
|
$lc_all .= $locale;
|
|
}
|
|
|
|
$this_categories[0] = $LC_ALL;
|
|
$this_locales[0] = $lc_all;
|
|
}
|
|
else { # The other times, just set each category to its locale
|
|
# individually
|
|
foreach my $category_name (sort keys $this_tests->%*) {
|
|
push @this_categories,
|
|
$map_category_name_to_number{$category_name};
|
|
push @this_locales,
|
|
$this_tests->{$category_name}{locale_name};
|
|
}
|
|
}
|
|
|
|
while (keys $this_tests->%*) {
|
|
foreach my $category_name (sort keys $this_tests->%*) {
|
|
my $this_category_tests = $this_tests->{$category_name};
|
|
my $test = shift
|
|
$this_category_tests->{locale_tests}->@*;
|
|
print STDERR __FILE__, ': ', __LINE__, ': ', Dumper $test
|
|
if $debug;
|
|
if (! $test) {
|
|
delete $this_tests->{$category_name};
|
|
next;
|
|
}
|
|
|
|
$test->{category_name} = $category_name;
|
|
my $locale_name = $this_category_tests->{locale_name};
|
|
$test->{locale_name} = $locale_name;
|
|
$test->{codeset} =
|
|
$locale_name_to_object{$locale_name}{codeset};
|
|
|
|
push @this_cooked_tests, $test;
|
|
}
|
|
}
|
|
|
|
push @cooked_tests, {
|
|
thread => $i,
|
|
categories => \@this_categories,
|
|
locales => \@this_locales,
|
|
tests => \@this_cooked_tests,
|
|
};
|
|
}
|
|
|
|
my $all_tests_ref = \@cooked_tests;
|
|
my $all_tests_file = tempfile();
|
|
|
|
# Store the tests into a file, retrievable by the subprocess
|
|
use Storable;
|
|
if (! defined store($all_tests_ref, $all_tests_file)) {
|
|
die "Could not save the built-up data structure";
|
|
}
|
|
|
|
my $category_number_to_name = Data::Dumper->Dump(
|
|
[ \%map_category_number_to_name ],
|
|
[ 'map_category_number_to_name']);
|
|
|
|
my $switches = "";
|
|
$switches = "switches => [ -DLv ]" if $debug > 2;
|
|
|
|
# Build up the program to run. This stresses locale thread safety. We
|
|
# start a bunch of threads. Each sets the locale of each category being
|
|
# tested to the value determined in the code above. Then each sleeps to a
|
|
# common start time, at which point they awaken and iterate their
|
|
# respective loops. Each iteration runs a set of tests and checks that
|
|
# the results are as expected. This should catch any instances of other
|
|
# threads interfering. Every so often, each thread shifts to instead use
|
|
# the locales and tests of another thread. This catches bugs dealing with
|
|
# changing the locale on the fly.
|
|
#
|
|
# The code above has set up things so that each thread has as disparate
|
|
# results from the other threads as possible, so to more likely catch any
|
|
# bleed-through.
|
|
my $program = <<EOT;
|
|
|
|
BEGIN { \$| = 1; }
|
|
my \$debug = $debug;
|
|
my \$thread_count = $thread_count;
|
|
my \$iterations_per_test_set = $iterations_per_test_set;
|
|
my \$iterations = $iterations;
|
|
my \$die_on_negative_sleep = $die_on_negative_sleep;
|
|
my \$per_thread_startup = $per_thread_startup;
|
|
my \$all_tests_file = $all_tests_file;
|
|
my \$alarm_clock = $alarm_clock;
|
|
EOT
|
|
|
|
$program .= <<'EOT';
|
|
use threads;
|
|
use strict;
|
|
use warnings;
|
|
use POSIX qw(locale_h);
|
|
use utf8;
|
|
use Time::HiRes qw(time usleep);
|
|
$|=1;
|
|
|
|
use Data::Dumper;
|
|
$Data::Dumper::Sortkeys=1;
|
|
$Data::Dumper::Useqq = 1;
|
|
$Data::Dumper::Deepcopy = 1;
|
|
|
|
# Get the tests stored for us by the setup process
|
|
use Storable;
|
|
my $all_tests_ref = retrieve($all_tests_file);
|
|
if (! defined $all_tests_ref) {
|
|
die "Could not restore the built-up data structure";
|
|
}
|
|
|
|
my %corrects;
|
|
|
|
sub output_test_failure_prefix {
|
|
my ($iteration, $category_name, $test) = @_;
|
|
my $tid = threads->tid();
|
|
print STDERR "\nthread ", $tid,
|
|
" failed in iteration $iteration",
|
|
" for locale $test->{locale_name}",
|
|
" codeset='$test->{codeset}'",
|
|
" $category_name",
|
|
"\nop='$test->{op}'",
|
|
"\nafter getting ", ($corrects{$category_name}
|
|
{$test->{locale_name}}
|
|
{all} // 0),
|
|
" previous correct results for this category and",
|
|
" locale,\nincluding ", ($corrects{$category_name}
|
|
{$test->{locale_name}}
|
|
{$tid} // 0),
|
|
" in this thread\n";
|
|
}
|
|
|
|
sub output_test_result($$$) {
|
|
my ($type, $result, $utf8_matches) = @_;
|
|
|
|
no locale;
|
|
|
|
print STDERR "$type";
|
|
|
|
my $copy = $result;
|
|
if (! $utf8_matches) {
|
|
if (utf8::is_utf8($copy)) {
|
|
print STDERR " (result already was in UTF-8)";
|
|
}
|
|
else {
|
|
utf8::upgrade($copy);
|
|
print STDERR " (result wasn't in UTF-8; converted for easier",
|
|
" comparison)";
|
|
}
|
|
}
|
|
print STDERR ":\n";
|
|
|
|
use Devel::Peek;
|
|
Dump $copy;
|
|
}
|
|
|
|
sub iterate { # Run some chunk of iterations of the tests
|
|
my ($tid, # Which thread
|
|
$initial_iteration, # The number of the first iteration
|
|
$count, # How many
|
|
$tests_ref) # The tests
|
|
= @_;
|
|
|
|
my $iteration = $initial_iteration;
|
|
$count += $initial_iteration;
|
|
|
|
# Repeatedly ...
|
|
while ($iteration < $count) {
|
|
my $errors = 0;
|
|
|
|
use locale;
|
|
|
|
# ... execute the tests
|
|
foreach my $test ($tests_ref->@*) {
|
|
|
|
# We know what we are expecting
|
|
my $expected = $test->{expected};
|
|
|
|
my $category_name = $test->{category_name};
|
|
|
|
# And do the test.
|
|
my $got = eval $test->{op};
|
|
|
|
if (! defined $got) {
|
|
output_test_failure_prefix($iteration,
|
|
$category_name,
|
|
$test);
|
|
output_test_result("expected", $expected,
|
|
1 # utf8ness matches, since only one
|
|
);
|
|
$errors++;
|
|
next;
|
|
}
|
|
|
|
my $utf8ness_matches = ( utf8::is_utf8($got)
|
|
== utf8::is_utf8($expected));
|
|
|
|
my $matched = ($got eq $expected);
|
|
if ($matched) {
|
|
if ($utf8ness_matches) {
|
|
no warnings 'uninitialized';
|
|
$corrects{$category_name}{$test->{locale_name}}{all}++;
|
|
$corrects{$category_name}{$test->{locale_name}}{$tid}++;
|
|
next; # Complete success!
|
|
}
|
|
}
|
|
|
|
$errors++;
|
|
output_test_failure_prefix($iteration, $category_name, $test);
|
|
|
|
if ($matched) {
|
|
print STDERR "Only difference is UTF8ness of results\n";
|
|
}
|
|
output_test_result("expected", $expected, $utf8ness_matches);
|
|
output_test_result("got", $got, $utf8ness_matches);
|
|
|
|
} # Loop to do the remaining tests for this iteration
|
|
|
|
return 0 if $errors;
|
|
|
|
$iteration++;
|
|
|
|
# A way to set a gdb break point pp_study
|
|
#study if $iteration % 10 == 0;
|
|
|
|
threads->yield();
|
|
}
|
|
|
|
return 1;
|
|
} # End of iterate() definition
|
|
|
|
EOT
|
|
|
|
$program .= "my $category_number_to_name\n";
|
|
|
|
$program .= <<'EOT';
|
|
sub setlocales {
|
|
# Set each category to the appropriate locale for this test set
|
|
my ($categories, $locales) = @_;
|
|
for my $i (0 .. $categories->@* - 1) {
|
|
if (! setlocale($categories->[$i], $locales->[$i])) {
|
|
my $category_name =
|
|
$map_category_number_to_name->{$categories->[$i]};
|
|
print STDERR "\nthread ", threads->tid(),
|
|
" setlocale($category_name ($categories->[$i]),",
|
|
" $locales->[$i]) failed\n";
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
my $startup_insurance = 1;
|
|
my $future = $startup_insurance + $thread_count * $per_thread_startup;
|
|
my $starting_time = time() + $future;
|
|
|
|
sub wait_until_time {
|
|
|
|
# Sleep until the time when all the threads are due to wake up, so
|
|
# they run as simultaneously as we can make it.
|
|
my $sleep_time = ($starting_time - time());
|
|
#printf STDERR "thread %d started, sleeping %g sec\n",
|
|
# threads->tid, $sleep_time;
|
|
if ($sleep_time < 0 && $die_on_negative_sleep) {
|
|
# What the start time should have been
|
|
my $a_better_future = $future - $sleep_time;
|
|
|
|
my $better_per_thread =
|
|
($a_better_future - $startup_insurance) / $thread_count;
|
|
printf STDERR "$per_thread_startup would need to be %g",
|
|
" for thread %d to have started\nin sync with",
|
|
" the other threads\n",
|
|
$better_per_thread, threads->tid;
|
|
die "Thread started too late";
|
|
}
|
|
else {
|
|
usleep($sleep_time * 1_000_000) if $sleep_time > 0;
|
|
}
|
|
}
|
|
|
|
# Create all the subthreads: 1..n
|
|
my @threads = map +threads->create(sub {
|
|
$SIG{'KILL'} = sub { threads->exit(); };
|
|
|
|
my $thread = shift;
|
|
|
|
# Start out with the set of tests whose number is the same as the
|
|
# thread number
|
|
my $test_set = $thread;
|
|
|
|
wait_until_time();
|
|
|
|
# Loop through all the iterations for this thread
|
|
my $this_iteration_start = 1;
|
|
do {
|
|
# Set up each category with its locale;
|
|
my $this_ref = $all_tests_ref->[$test_set];
|
|
return 0 unless setlocales($this_ref->{categories},
|
|
$this_ref->{locales});
|
|
# Then run one batch of iterations
|
|
my $result = iterate($thread,
|
|
$this_iteration_start,
|
|
$iterations_per_test_set,
|
|
$this_ref->{tests});
|
|
return 0 if $result == 0; # Quit if failed
|
|
|
|
# Next iteration will shift to use a different set of locales for
|
|
# each category
|
|
$test_set++;
|
|
$test_set = 0 if $test_set >= $thread_count;
|
|
$this_iteration_start += $iterations_per_test_set;
|
|
} while ($this_iteration_start <= $iterations);
|
|
|
|
return 1; # Success
|
|
|
|
}, $_), (1..$thread_count - 1); # For each non-0 thread
|
|
|
|
# Here is thread 0. We do a smaller chunk of iterations in it; then
|
|
# join whatever threads have finished so far, then do another chunk.
|
|
# This tests for bugs that arise as a result of joining.
|
|
|
|
my %thread0_corrects = ();
|
|
my $this_iteration_start = 1;
|
|
my $result = 1; # So far, everything is ok
|
|
my $test_set = -1; # Start with 0th test set
|
|
|
|
wait_until_time();
|
|
alarm($alarm_clock); # Guard against hangs
|
|
|
|
do {
|
|
# Next time, we'll use the next test set
|
|
$test_set++;
|
|
$test_set = 0 if $test_set >= $thread_count;
|
|
|
|
my $this_ref = $all_tests_ref->[$test_set];
|
|
|
|
# set the locales for this test set. Do this even if we
|
|
# are going to bail, so that it will be set correctly for the final
|
|
# batch after the loop.
|
|
$result &= setlocales($this_ref->{categories}, $this_ref->{locales});
|
|
|
|
if ($debug > 1) {
|
|
my @joinable = threads->list(threads::joinable);
|
|
if (@joinable) {
|
|
print STDERR "In thread 0, before iteration ",
|
|
$this_iteration_start,
|
|
" these threads are done: ",
|
|
join (", ", map { $_->tid() } @joinable),
|
|
"\n";
|
|
}
|
|
}
|
|
|
|
# Join anything already finished.
|
|
for my $thread (threads->list(threads::joinable)) {
|
|
my $thread_result = $thread->join;
|
|
if ($debug > 1) {
|
|
print STDERR "In thread 0, before iteration ",
|
|
$this_iteration_start,
|
|
" joining thread ", $thread->tid(),
|
|
"; result=", ((defined $thread_result)
|
|
? $thread_result
|
|
: "undef"),
|
|
"\n";
|
|
}
|
|
|
|
# If the thread failed badly, stop testing anything else.
|
|
if (! defined $thread_result) {
|
|
$_->kill('KILL')->detach() for threads->list();
|
|
print 0;
|
|
exit;
|
|
}
|
|
|
|
# Update the status
|
|
$result &= $thread_result;
|
|
}
|
|
|
|
# Do a chunk of iterations on this thread 0.
|
|
$result &= iterate(0,
|
|
$this_iteration_start,
|
|
$iterations_per_test_set,
|
|
$this_ref->{tests},
|
|
\%thread0_corrects);
|
|
$this_iteration_start += $iterations_per_test_set;
|
|
|
|
# And repeat as long as there are other tests
|
|
} while (threads->list(threads::all));
|
|
|
|
print $result;
|
|
EOT
|
|
|
|
# Finally ready to run the test.
|
|
fresh_perl_is($program,
|
|
1,
|
|
{ eval $switches },
|
|
"Verify there were no failures with simultaneous running threads"
|
|
);
|
|
}
|