mirror of
https://github.com/Perl/perl5.git
synced 2026-01-26 08:38:23 +00:00
Add class for underscore character to l1_char_class_tab.h
l1_char_class_tab.h categorizes characters in the Latin1 range into various classes, mostly into the POSIX classes like [:word:]. Each character has a bit set corresponding to every class it is a member of. These values are placed in a 256-element array and the ordinal value of a character is used as an index into it for quick determination of if a character is a member of a given class. Besides the POSIX classes, there are some classes that make it more convenient and/or faster for our code. For example, there is a class that allows us to quickly know if a given character is one that needs to be preceded by a backslash by quotemeta(). This commit adds a class for the single character underscore '_', and a macro that allows for seeing if a character is either an underscore or a member of any other class, using a single conditional. This means code that checks for if character X is either an underscore or a member of class Y can change to eliminate one conditional. Thus the reason to do this is efficiency. Currently, the only places that do this explicitly are in non-hot code. But I have wip that has hot code that could benefit from this. The only downside of doing this is that it uses up one bit of the 32 available (without shenanigans) for such classes, leaving 4 spare. But before this release, the last time any new bit had been used up was 5.32, so the rate of using these spare up is quite low. This bit could be reclaimed because the IDFIRST class in the Latin1 range is identical to ALPHA plus the underscore, so it could be rewritten as that combination and its bit freed up. However, this would require adding some macros that take two class parameters instead of one. I briefly thought about doing that now, but since we have spare bits and the rate of using them up is low, I didn't think it was worth it at this time. \w in this range is ALPHANUMERIC plus underscore. But its use is more embedded than IDFIRST is, so an attempt to reclaim its bit would require more effort.
This commit is contained in:
parent
b116971bb0
commit
31c9996116
@ -492661,6 +492661,6 @@ static const U8 WB_dfa_table[] = {
|
||||
* 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables
|
||||
* 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version
|
||||
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
|
||||
* 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl
|
||||
* 8abaee16c84c1a61a69a77b6e8963675d99f515f3c2a34e449faeb9bfdec861a regen/mk_PL_charclass.pl
|
||||
* 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl
|
||||
* ex: set ro ft=c: */
|
||||
|
||||
2
embed.h
2
embed.h
@ -281,6 +281,8 @@
|
||||
# undef case_98_SBOX32
|
||||
# undef case_99_SBOX32
|
||||
# undef case_9_SBOX32
|
||||
# undef CC_UNDERSCORE_
|
||||
# undef isFOO_or_UNDERSCORE_
|
||||
# undef IV_BITS
|
||||
# undef USE_STDIO
|
||||
# undef UV_BITS
|
||||
|
||||
7
handy.h
7
handy.h
@ -1550,8 +1550,9 @@ or casts
|
||||
# define CC_OCTDIGIT_ 24
|
||||
# define CC_MNEMONIC_CNTRL_ 25
|
||||
# define CC_MAGICAL_ 26
|
||||
# define CC_UNDERSCORE_ 27
|
||||
|
||||
/* Unused: 27-31
|
||||
/* Unused: 28-31
|
||||
* If more bits are needed, one could add a second word for non-64bit
|
||||
* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd
|
||||
* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it
|
||||
@ -1660,6 +1661,10 @@ END_EXTERN_C
|
||||
# define isWORDCHAR_L1(c) generic_isCC_(c, CC_WORDCHAR_)
|
||||
# define isIDFIRST_L1(c) generic_isCC_(c, CC_IDFIRST_)
|
||||
|
||||
#define isFOO_or_UNDERSCORE_(c, lookup_bit) \
|
||||
(FITS_IN_8_BITS(c) && ( PL_charclass[(U8) (c)] \
|
||||
& (CC_mask_(lookup_bit)|CC_mask_(CC_UNDERSCORE_))))
|
||||
|
||||
# ifdef EBCDIC
|
||||
# define isASCII(c) generic_isCC_(c, CC_ASCII_)
|
||||
# endif
|
||||
|
||||
@ -103,7 +103,7 @@
|
||||
/* U+5C '\' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* U+5D ']' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* U+5E '^' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* U+5F '_' */ (1U<<CC_ASCII_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_WORDCHAR_),
|
||||
/* U+5F '_' */ (1U<<CC_ASCII_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_UNDERSCORE_)|(1U<<CC_WORDCHAR_),
|
||||
/* U+60 '`' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* U+61 'a' */ (1U<<CC_ALPHA_)|(1U<<CC_ALPHANUMERIC_)|(1U<<CC_ASCII_)|(1U<<CC_CASED_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_IS_IN_SOME_FOLD_)|(1U<<CC_LOWER_)|(1U<<CC_MAGICAL_)|(1U<<CC_NONLATIN1_FOLD_)|(1U<<CC_NON_FINAL_FOLD_)|(1U<<CC_PRINT_)|(1U<<CC_WORDCHAR_)|(1U<<CC_XDIGIT_),
|
||||
/* U+62 'b' */ (1U<<CC_ALPHA_)|(1U<<CC_ALPHANUMERIC_)|(1U<<CC_ASCII_)|(1U<<CC_CASED_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_IS_IN_SOME_FOLD_)|(1U<<CC_LOWER_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_WORDCHAR_)|(1U<<CC_XDIGIT_),
|
||||
@ -380,7 +380,7 @@
|
||||
/* 0x6A U+A6 I8=BB BROKEN BAR */ (1U<<CC_GRAPH_)|(1U<<CC_PRINT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6B U+2C ',' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6C U+25 '%' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6D U+5F '_' */ (1U<<CC_ASCII_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_WORDCHAR_),
|
||||
/* 0x6D U+5F '_' */ (1U<<CC_ASCII_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_UNDERSCORE_)|(1U<<CC_WORDCHAR_),
|
||||
/* 0x6E U+3E '>' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6F U+3F '?' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x70 U+F8 I8=BC o with '/' */ (1U<<CC_ALPHA_)|(1U<<CC_ALPHANUMERIC_)|(1U<<CC_CASED_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_IS_IN_SOME_FOLD_)|(1U<<CC_LOWER_)|(1U<<CC_PRINT_)|(1U<<CC_WORDCHAR_),
|
||||
@ -643,7 +643,7 @@
|
||||
/* 0x6A U+A6 I8=BC BROKEN BAR */ (1U<<CC_GRAPH_)|(1U<<CC_PRINT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6B U+2C ',' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6C U+25 '%' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6D U+5F '_' */ (1U<<CC_ASCII_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_WORDCHAR_),
|
||||
/* 0x6D U+5F '_' */ (1U<<CC_ASCII_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_UNDERSCORE_)|(1U<<CC_WORDCHAR_),
|
||||
/* 0x6E U+3E '>' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x6F U+3F '?' */ (1U<<CC_ASCII_)|(1U<<CC_GRAPH_)|(1U<<CC_MAGICAL_)|(1U<<CC_PRINT_)|(1U<<CC_PUNCT_)|(1U<<CC_QUOTEMETA_),
|
||||
/* 0x70 U+F8 I8=BD o with '/' */ (1U<<CC_ALPHA_)|(1U<<CC_ALPHANUMERIC_)|(1U<<CC_CASED_)|(1U<<CC_CHARNAME_CONT_)|(1U<<CC_GRAPH_)|(1U<<CC_IDFIRST_)|(1U<<CC_IS_IN_SOME_FOLD_)|(1U<<CC_LOWER_)|(1U<<CC_PRINT_)|(1U<<CC_WORDCHAR_),
|
||||
|
||||
2
lib/unicore/uni_keywords.pl
generated
2
lib/unicore/uni_keywords.pl
generated
@ -1427,6 +1427,6 @@
|
||||
# 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables
|
||||
# 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version
|
||||
# 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
|
||||
# 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl
|
||||
# 8abaee16c84c1a61a69a77b6e8963675d99f515f3c2a34e449faeb9bfdec861a regen/mk_PL_charclass.pl
|
||||
# 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl
|
||||
# ex: set ro ft=perl:
|
||||
|
||||
@ -69,6 +69,7 @@ my %bit_names = (
|
||||
ord(']'), ord('^'), ord('`'), ord('|'), ord('~'),
|
||||
ord('$'),
|
||||
],
|
||||
UNDERSCORE => [ ord('_') ],
|
||||
);
|
||||
|
||||
sub uniques {
|
||||
|
||||
@ -82,6 +82,6 @@
|
||||
* 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables
|
||||
* 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version
|
||||
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
|
||||
* 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl
|
||||
* 8abaee16c84c1a61a69a77b6e8963675d99f515f3c2a34e449faeb9bfdec861a regen/mk_PL_charclass.pl
|
||||
* 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl
|
||||
* ex: set ro ft=c: */
|
||||
|
||||
2
uni_keywords.h
generated
2
uni_keywords.h
generated
@ -8174,7 +8174,7 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) {
|
||||
* 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables
|
||||
* 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version
|
||||
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
|
||||
* 6c52efdee47313cfde75ff86376008ce53320ebc93176caab45c77ce086a256d regen/mk_PL_charclass.pl
|
||||
* 8abaee16c84c1a61a69a77b6e8963675d99f515f3c2a34e449faeb9bfdec861a regen/mk_PL_charclass.pl
|
||||
* 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl
|
||||
* d6987e01ad538d1567394851cf199f99815f7701bebd6092be4bc7a6d8f147c6 regen/mph.pl
|
||||
* ex: set ro ft=c: */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user