mirror of
https://github.com/Perl/perl5.git
synced 2026-01-26 16:39:36 +00:00
mktables: Add duplicate tables
This is for backwards compatibility. Future commits will change these tables that are generated by mktables to be more efficient. But the existence of them was advertised in v5.12 and v5.14, as something a Perl program could use because the Perl core did not provide access to their contents. We can't change the format of those without some notice. The solution adopted is to have two versions of the tables, one kept in the original file name has the original format; and the other is free to change formats at will. This commit just creates copies of the original, with the same format. Later commits will change the format to be more efficient. We state in v5.16 that using these files is now deprecated, as the information is now available through Unicode::UCD in a stable API. But we don't test for whether someone is opening and reading these files; so the deprecation cycle should be somewhat long; they will be unused, and the only drawbacks to having them are some extra disk space and the time spent in having to generate them at Perl build time. This commit also changes the Perl core to use the original tables, so that the new format can be gradually developed in a series of patches without having to cut over the whole thing at once.
This commit is contained in:
parent
6647445930
commit
cdc18eb6b4
@ -8631,15 +8631,6 @@ sub finish_property_setup {
|
||||
# Perl adds this alias.
|
||||
$gc->add_alias('Category');
|
||||
|
||||
# For backwards compatibility, these property files have particular names.
|
||||
property_ref('Uppercase_Mapping')->set_file('Upper'); # This is what
|
||||
# utf8.c calls it
|
||||
property_ref('Lowercase_Mapping')->set_file('Lower');
|
||||
property_ref('Titlecase_Mapping')->set_file('Title');
|
||||
|
||||
my $fold = property_ref('Case_Folding');
|
||||
$fold->set_file('Fold') if defined $fold;
|
||||
|
||||
# Unicode::Normalize expects this file with this name and directory.
|
||||
my $ccc = property_ref('Canonical_Combining_Class');
|
||||
if (defined $ccc) {
|
||||
@ -10000,7 +9991,6 @@ END
|
||||
my $Decimal_Digit = Property->new("Perl_Decimal_Digit",
|
||||
Default_Map => "",
|
||||
Perl_Extension => 1,
|
||||
File => 'Digit', # Trad. location
|
||||
Directory => $map_directory,
|
||||
Type => $STRING,
|
||||
Range_Size_1 => 1,
|
||||
@ -10728,6 +10718,19 @@ sub filter_arabic_shaping_line {
|
||||
Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing. Only special casing will be generated.");
|
||||
}
|
||||
|
||||
# Create a table in the old-style format and with the original
|
||||
# file name for backwards compatibility with applications that
|
||||
# read it directly.
|
||||
my $legacy = Property->new("Legacy_" . $full_table->full_name,
|
||||
File => $full_table->full_name =~
|
||||
s/case_Mapping//r,
|
||||
Range_Size_1 => 1,
|
||||
Format => $HEX_FORMAT,
|
||||
Default_Map => $CODE_POINT,
|
||||
UCD => 0,
|
||||
Initialize => $full_table,
|
||||
);
|
||||
|
||||
# The simple version's name in each mapping merely has an 's' in
|
||||
# front of the full one's
|
||||
my $simple_name = 's' . $full_name;
|
||||
@ -10738,6 +10741,7 @@ sub filter_arabic_shaping_line {
|
||||
Type => $STRING,
|
||||
Default_Map => $CODE_POINT,
|
||||
Perl_Extension => 1,
|
||||
Range_Size_1 => 1,
|
||||
Fate => $INTERNAL_ONLY,
|
||||
Description => "This contains the simple mappings for $full_name for just the code points that have different full mappings");
|
||||
$simple_only->set_to_output_map($INTERNAL_MAP);
|
||||
@ -10840,10 +10844,18 @@ END
|
||||
}
|
||||
}
|
||||
else {
|
||||
$file->insert_adjusted_lines("$fields[0]; "
|
||||
|
||||
# The mapping goes into both the legacy table ...
|
||||
$file->insert_adjusted_lines("$fields[0]; Legacy_"
|
||||
. $object->full_name
|
||||
. "; $fields[$i]");
|
||||
|
||||
# ... and, the The regular table
|
||||
$file->insert_adjusted_lines("$fields[0]; "
|
||||
. $object->name
|
||||
. "; "
|
||||
. $fields[$i]);
|
||||
|
||||
# Copy any simple case change to the special tables
|
||||
# constructed if being overridden by a multi-character case
|
||||
# change.
|
||||
@ -11751,6 +11763,38 @@ END
|
||||
}
|
||||
}
|
||||
|
||||
# Create digit and case fold tables with the original file names for
|
||||
# backwards compatibility with applications that read them directly.
|
||||
my $Digit = Property->new("Legacy_Perl_Decimal_Digit",
|
||||
Default_Map => "",
|
||||
Perl_Extension => 1,
|
||||
File => 'Digit', # Trad. location
|
||||
Directory => $map_directory,
|
||||
UCD => 0,
|
||||
Type => $STRING,
|
||||
Range_Size_1 => 1,
|
||||
Initialize => property_ref('Perl_Decimal_Digit'),
|
||||
);
|
||||
$Digit->add_comment(join_lines(<<END
|
||||
This file gives the mapping of all code points which represent a single
|
||||
decimal digit [0-9] to their respective digits. For example, the code point
|
||||
U+0031 (an ASCII '1') is mapped to a numeric 1. These code points are those
|
||||
that have Numeric_Type=Decimal; not special things, like subscripts nor Roman
|
||||
numerals.
|
||||
END
|
||||
));
|
||||
|
||||
Property->new('Legacy_Case_Folding',
|
||||
File => "Fold",
|
||||
Directory => $map_directory,
|
||||
Default_Map => $CODE_POINT,
|
||||
UCD => 0,
|
||||
Range_Size_1 => 1,
|
||||
Type => $STRING,
|
||||
Format => $HEX_FORMAT,
|
||||
Initialize => property_ref('cf'),
|
||||
);
|
||||
|
||||
# The Script_Extensions property started out as a clone of the Script
|
||||
# property. But processing its data file caused some elements to be
|
||||
# replaced with different data. (These elements were for the Common and
|
||||
|
||||
8
utf8.c
8
utf8.c
@ -1375,14 +1375,14 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_
|
||||
* LENP will be set to the length in bytes of the string of changed characters
|
||||
*
|
||||
* The functions return the ordinal of the first character in the string of OUTP */
|
||||
#define CALL_UPPER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_toupper, "ToUc", "utf8::ToSpecUpper")
|
||||
#define CALL_TITLE_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_totitle, "ToTc", "utf8::ToSpecTitle")
|
||||
#define CALL_LOWER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tolower, "ToLc", "utf8::ToSpecLower")
|
||||
#define CALL_UPPER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_toupper, "ToLegacyUpperCaseMapping", "utf8::ToSpecUpper")
|
||||
#define CALL_TITLE_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_totitle, "ToLegacyTitleCaseMapping", "utf8::ToSpecTitle")
|
||||
#define CALL_LOWER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tolower, "ToLegacyLowerCaseMapping", "utf8::ToSpecLower")
|
||||
|
||||
/* This additionally has the input parameter SPECIALS, which if non-zero will
|
||||
* cause this to use the SPECIALS hash for folding (meaning get full case
|
||||
* folding); otherwise, when zero, this implies a simple case fold */
|
||||
#define CALL_FOLD_CASE(INP, OUTP, LENP, SPECIALS) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tofold, "ToCf", (SPECIALS) ? "utf8::ToSpecFold" : NULL)
|
||||
#define CALL_FOLD_CASE(INP, OUTP, LENP, SPECIALS) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tofold, "ToLegacyCaseFolding", (SPECIALS) ? "utf8::ToSpecFold" : NULL)
|
||||
|
||||
UV
|
||||
Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user