mktables: Add duplicate tables

This is for backwards compatibility.  Future commits will change these
tables that are generated by mktables to be more efficient.  But the
existence of them was advertised in v5.12 and v5.14, as something a Perl
program could use because the Perl core did not provide access to their
contents.  We can't change the format of those without some notice.

The solution adopted is to have two versions of the tables, one kept in
the original file name has the original format; and the other is free to
change formats at will.

This commit just creates copies of the original, with the same format.
Later commits will change the format to be more efficient.

We state in v5.16 that using these files is now deprecated, as the
information is now available through Unicode::UCD in a stable API.  But
we don't test for whether someone is opening and reading these files; so
the deprecation cycle should be somewhat long;  they will be unused, and
the only drawbacks to having them are some extra disk space and the time
spent in having to generate them at Perl build time.

This commit also changes the Perl core to use the original tables, so
that the new format can be gradually developed in a series of patches
without having to cut over the whole thing at once.
This commit is contained in:
Karl Williamson 2012-01-21 15:27:00 -07:00
parent 6647445930
commit cdc18eb6b4
2 changed files with 59 additions and 15 deletions

View File

@ -8631,15 +8631,6 @@ sub finish_property_setup {
# Perl adds this alias.
$gc->add_alias('Category');
# For backwards compatibility, these property files have particular names.
property_ref('Uppercase_Mapping')->set_file('Upper'); # This is what
# utf8.c calls it
property_ref('Lowercase_Mapping')->set_file('Lower');
property_ref('Titlecase_Mapping')->set_file('Title');
my $fold = property_ref('Case_Folding');
$fold->set_file('Fold') if defined $fold;
# Unicode::Normalize expects this file with this name and directory.
my $ccc = property_ref('Canonical_Combining_Class');
if (defined $ccc) {
@ -10000,7 +9991,6 @@ END
my $Decimal_Digit = Property->new("Perl_Decimal_Digit",
Default_Map => "",
Perl_Extension => 1,
File => 'Digit', # Trad. location
Directory => $map_directory,
Type => $STRING,
Range_Size_1 => 1,
@ -10728,6 +10718,19 @@ sub filter_arabic_shaping_line {
Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing. Only special casing will be generated.");
}
# Create a table in the old-style format and with the original
# file name for backwards compatibility with applications that
# read it directly.
my $legacy = Property->new("Legacy_" . $full_table->full_name,
File => $full_table->full_name =~
s/case_Mapping//r,
Range_Size_1 => 1,
Format => $HEX_FORMAT,
Default_Map => $CODE_POINT,
UCD => 0,
Initialize => $full_table,
);
# The simple version's name in each mapping merely has an 's' in
# front of the full one's
my $simple_name = 's' . $full_name;
@ -10738,6 +10741,7 @@ sub filter_arabic_shaping_line {
Type => $STRING,
Default_Map => $CODE_POINT,
Perl_Extension => 1,
Range_Size_1 => 1,
Fate => $INTERNAL_ONLY,
Description => "This contains the simple mappings for $full_name for just the code points that have different full mappings");
$simple_only->set_to_output_map($INTERNAL_MAP);
@ -10840,10 +10844,18 @@ END
}
}
else {
$file->insert_adjusted_lines("$fields[0]; "
# The mapping goes into both the legacy table ...
$file->insert_adjusted_lines("$fields[0]; Legacy_"
. $object->full_name
. "; $fields[$i]");
# ... and, the The regular table
$file->insert_adjusted_lines("$fields[0]; "
. $object->name
. "; "
. $fields[$i]);
# Copy any simple case change to the special tables
# constructed if being overridden by a multi-character case
# change.
@ -11751,6 +11763,38 @@ END
}
}
# Create digit and case fold tables with the original file names for
# backwards compatibility with applications that read them directly.
my $Digit = Property->new("Legacy_Perl_Decimal_Digit",
Default_Map => "",
Perl_Extension => 1,
File => 'Digit', # Trad. location
Directory => $map_directory,
UCD => 0,
Type => $STRING,
Range_Size_1 => 1,
Initialize => property_ref('Perl_Decimal_Digit'),
);
$Digit->add_comment(join_lines(<<END
This file gives the mapping of all code points which represent a single
decimal digit [0-9] to their respective digits. For example, the code point
U+0031 (an ASCII '1') is mapped to a numeric 1. These code points are those
that have Numeric_Type=Decimal; not special things, like subscripts nor Roman
numerals.
END
));
Property->new('Legacy_Case_Folding',
File => "Fold",
Directory => $map_directory,
Default_Map => $CODE_POINT,
UCD => 0,
Range_Size_1 => 1,
Type => $STRING,
Format => $HEX_FORMAT,
Initialize => property_ref('cf'),
);
# The Script_Extensions property started out as a clone of the Script
# property. But processing its data file caused some elements to be
# replaced with different data. (These elements were for the Common and

8
utf8.c
View File

@ -1375,14 +1375,14 @@ Perl__to_upper_title_latin1(pTHX_ const U8 c, U8* p, STRLEN *lenp, const char S_
* LENP will be set to the length in bytes of the string of changed characters
*
* The functions return the ordinal of the first character in the string of OUTP */
#define CALL_UPPER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_toupper, "ToUc", "utf8::ToSpecUpper")
#define CALL_TITLE_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_totitle, "ToTc", "utf8::ToSpecTitle")
#define CALL_LOWER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tolower, "ToLc", "utf8::ToSpecLower")
#define CALL_UPPER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_toupper, "ToLegacyUpperCaseMapping", "utf8::ToSpecUpper")
#define CALL_TITLE_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_totitle, "ToLegacyTitleCaseMapping", "utf8::ToSpecTitle")
#define CALL_LOWER_CASE(INP, OUTP, LENP) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tolower, "ToLegacyLowerCaseMapping", "utf8::ToSpecLower")
/* This additionally has the input parameter SPECIALS, which if non-zero will
* cause this to use the SPECIALS hash for folding (meaning get full case
* folding); otherwise, when zero, this implies a simple case fold */
#define CALL_FOLD_CASE(INP, OUTP, LENP, SPECIALS) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tofold, "ToCf", (SPECIALS) ? "utf8::ToSpecFold" : NULL)
#define CALL_FOLD_CASE(INP, OUTP, LENP, SPECIALS) Perl_to_utf8_case(aTHX_ INP, OUTP, LENP, &PL_utf8_tofold, "ToLegacyCaseFolding", (SPECIALS) ? "utf8::ToSpecFold" : NULL)
UV
Perl_to_uni_upper(pTHX_ UV c, U8* p, STRLEN *lenp)