mirror of
https://github.com/Perl/perl5.git
synced 2026-01-26 08:38:23 +00:00
l1_char_class_tab.h categorizes characters in the Latin1 range into various classes, mostly into the POSIX classes like [:word:]. Each character has a bit set corresponding to every class it is a member of. These values are placed in a 256-element array and the ordinal value of a character is used as an index into it for quick determination of if a character is a member of a given class. Besides the POSIX classes, there are some classes that make it more convenient and/or faster for our code. For example, there is a class that allows us to quickly know if a given character is one that needs to be preceded by a backslash by quotemeta(). This commit adds a class for the single character underscore '_', and a macro that allows for seeing if a character is either an underscore or a member of any other class, using a single conditional. This means code that checks for if character X is either an underscore or a member of class Y can change to eliminate one conditional. Thus the reason to do this is efficiency. Currently, the only places that do this explicitly are in non-hot code. But I have wip that has hot code that could benefit from this. The only downside of doing this is that it uses up one bit of the 32 available (without shenanigans) for such classes, leaving 4 spare. But before this release, the last time any new bit had been used up was 5.32, so the rate of using these spare up is quite low. This bit could be reclaimed because the IDFIRST class in the Latin1 range is identical to ALPHA plus the underscore, so it could be rewritten as that combination and its bit freed up. However, this would require adding some macros that take two class parameters instead of one. I briefly thought about doing that now, but since we have spare bits and the rate of using them up is low, I didn't think it was worth it at this time. \w in this range is ALPHANUMERIC plus underscore. But its use is more embedded than IDFIRST is, so an attempt to reclaim its bit would require more effort.
88 lines
7.0 KiB
C
88 lines
7.0 KiB
C
/* -*- mode: C; buffer-read-only: t -*-
|
|
* !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
|
|
* This file is built by regen/mk_invlists.pl from Unicode::UCD.
|
|
* Any changes made here will be lost!
|
|
*/
|
|
|
|
/* This gives the number of code points that can be in the bitmap of an ANYOF
|
|
* node. The shift number must currently be one of: 8..12. It can't be less
|
|
* than 8 (256) because some code relies on it being at least that. Above 12
|
|
* (4096), and you start running into warnings that some data structure widths
|
|
* have been exceeded, though the test suite as of this writing still passes
|
|
* for up through 16, which is as high as anyone would ever want to go,
|
|
* encompassing all of the Unicode BMP, and thus including all the economically
|
|
* important world scripts. At 12 most of them are: including Arabic,
|
|
* Cyrillic, Greek, Hebrew, Indian subcontinent, Latin, and Thai; but not Han,
|
|
* Japanese, nor Korean. The regnode sizing data structure in regnodes.h currently
|
|
* uses a U8, and the trie types TRIEC and AHOCORASICKC are larger than U8 for
|
|
* shift values above 12.) Be sure to benchmark before changing, as larger sizes
|
|
* do significantly slow down the test suite. */
|
|
|
|
#define NUM_ANYOF_CODE_POINTS (1 << 8)
|
|
|
|
|
|
/* More than one code point may have the same code point as their fold. This
|
|
* gives the maximum number in the current Unicode release. (The folded-to
|
|
* code point is not included in this count.) For example, both 'S' and
|
|
* \x{17F} fold to 's', so the number for that fold is 2. Another way to
|
|
* look at it is the maximum length of all the IVCF_AUX_TABLE's */
|
|
#define MAX_FOLD_FROMS 3
|
|
|
|
/* Generated from:
|
|
* 198105a1b3637a4bc7240628bae2f65f6d0c31df2cee4f0242c2561421ea2e75 lib/Unicode/UCD.pm
|
|
* 39afa01e680e27d0fd10b67a9b27be13fbaa3d0efecfb5be45991de9a0d267d0 lib/unicore/ArabicShaping.txt
|
|
* dadbaf38a0d0246e5b805bf8725cb81b7c621f93d030595635f5ba2c2f179428 lib/unicore/BidiBrackets.txt
|
|
* a2f16fb873ab4fcdf3221cb1a8a85a134ddd6ed03603181823ff5206af3741ce lib/unicore/BidiMirroring.txt
|
|
* c0edefaf1a19771e830a82735472716af6bf3c3975f6c2a23ffbe2580fbbcb15 lib/unicore/Blocks.txt
|
|
* ff8d8fefbf123574205085d6714c36149eb946d717a0c585c27f0f4ef58c4183 lib/unicore/CaseFolding.txt
|
|
* 5e6e9c8f8e76561da04cb1703a9306c63707be2ed8ff2eb12cd3a942368a6f72 lib/unicore/CompositionExclusions.txt
|
|
* f8ecdf768bdc210f201abd271d9bc587825618a86a7046a8146cc816393f1998 lib/unicore/DAge.txt
|
|
* 24c7fed1195c482faaefd5c1e7eb821c5ee1fb6de07ecdbaa64b56a99da22c08 lib/unicore/DCoreProperties.txt
|
|
* 71fd6a206a2c0cdd41feb6b7f656aa31091db45e9cedc926985d718397f9e488 lib/unicore/DNormalizationProps.txt
|
|
* ea7ce50f3444a050333448dffef1cadd9325af55cbb764b4a2280faf52170a33 lib/unicore/EastAsianWidth.txt
|
|
* 38619c05a17e771554000fe604afee92e10eb49e0616ecf0c87af3c1eb0f4320 lib/unicore/EquivalentUnifiedIdeograph.txt
|
|
* 5a57450afde0d082bc5026f7458649eac3b615490cc7e3d916b0367f1593c0e3 lib/unicore/HangulSyllableType.txt
|
|
* 617228a16da13850bf8af28b6cd08f5e9b6595d2eb60404fe6eee2c85b4e4a35 lib/unicore/IdStatus.txt
|
|
* 924ac63faa97ed73420d6ac48d08279d90968c7da0502ab701e08bfbb9683c22 lib/unicore/IdType.txt
|
|
* 68cedc29a7e57f984d90fe2c7712f2e6d0c717e253db219607daea8997d6c480 lib/unicore/IndicPositionalCategory.txt
|
|
* 3fc122f4cf58b0c19268d5f810263b04ab4e1e67743386ec0e0ada9c76aec5be lib/unicore/IndicSyllabicCategory.txt
|
|
* 17bb07f5e37f995351ddcef393c04464a9e3891ed0c0bd56a03a5c5e400a6326 lib/unicore/Jamo.txt
|
|
* e6a18fa91f8f6a6f8e534b1d3f128c21ada45bfe152eb6b1bcc5e15fd8ac92e6 lib/unicore/LineBreak.txt
|
|
* 793f6f1e4d15fd90f05ae66460191dc4d75d1fea90136a25f30dd6a4cb950eac lib/unicore/NameAliases.txt
|
|
* 360dac27d5abafdcd8c03a8597f47acf92e8ebf7f6ee28020c173ed8b2ed0cc5 lib/unicore/NamedSequences.txt
|
|
* 5019ffd530751a741900c849c0e010332f142a3612234639bd200b82138a87db lib/unicore/NormTest.txt
|
|
* 130dcddcaadaf071008bdfce1e7743e04fdfbc910886f017d9f9ac931d8c64dd lib/unicore/PropList.txt
|
|
* 670d2bebb48649c04fabfbf033308073dcff47946324a8033237254c048b3b01 lib/unicore/PropValueAliases.txt
|
|
* 4441f573caf952ffece1d7c892e7715bd7136dfc26f96eb6f268bf1e474715fb lib/unicore/PropertyAliases.txt
|
|
* ec2107e58825a1586acee8e0911ce18260394ac8b87e535ca325f1ccbeb06bc6 lib/unicore/ScriptExtensions.txt
|
|
* 9f5e50d3abaee7d6ce09480f325c706f485ae3240912527e651954d2d6b035bf lib/unicore/Scripts.txt
|
|
* efc25faf19de21b92c1194c111c932e03d2a5eaf18194e33f1156e96de4c9588 lib/unicore/SpecialCasing.txt
|
|
* 2e1efc1dcb59c575eedf5ccae60f95229f706ee6d031835247d843c11d96470c lib/unicore/UnicodeData.txt
|
|
* 76a3081265e6eb673873f9c93d6f36062e82c7ed027c5c1a592accfbe48c20a5 lib/unicore/Unikemet.txt
|
|
* dcef09c3fb24d356b042569c328ec341efc5b53447700d799f2fb4834c3cd3cd lib/unicore/VerticalOrientation.txt
|
|
* e2d134d2c52919bace503ebb6a551c1855fe1a1faec18478c78fff254a1793ec lib/unicore/auxiliary/GCBTest.txt
|
|
* d6b51d1d2ae5c33b451b7ed994b48f1f4dc62b2272a5831e7fd418514a6bae89 lib/unicore/auxiliary/GraphemeBreakProperty.txt
|
|
* e69884e0dde6a8724873f885d68c52dc14518abf9ae4ca9e2283b8773db3b752 lib/unicore/auxiliary/LBTest.txt
|
|
* 12cb47d028ded0c1cb8a28558f95479cbcd24559c46977015c82f3b50a1cc6e4 lib/unicore/auxiliary/SBTest.txt
|
|
* 871c0c985ad95125e25b302414065a10839d068970bceb383ecec138f22a0a18 lib/unicore/auxiliary/SentenceBreakProperty.txt
|
|
* 1de23a75f37904abc7d206239ee8d34f8fdf0fb4ab32a7174dfbabbde25419b2 lib/unicore/auxiliary/WBTest.txt
|
|
* 72274cac1e6b919507db35655c3e175aa27274668a1ece95c28d2069f2ad9852 lib/unicore/auxiliary/WordBreakProperty.txt
|
|
* 2cb2bb9455cda83e8481541ecf5b6dfda66a3bb89efa3fa7c5297eccf607b72b lib/unicore/emoji/emoji.txt
|
|
* 4867b4b7f0731ed1bfcd34cc6251211ff1542541fce0734b6fbda139ee80b3a4 lib/unicore/extracted/DBidiClass.txt
|
|
* 13dd09d35a9377e33eb388a01e6581d4bfec6b2685316078c341982fa444071a lib/unicore/extracted/DBinaryProperties.txt
|
|
* 191463abfbd202703c6fd6776a92a23ac44ec65e0476a7f95aa91ca492cef29b lib/unicore/extracted/DCombiningClass.txt
|
|
* f44e5ceaf40edc1fe06ea0404e8bebc7d356dcc38aac076543b6874008a06e3e lib/unicore/extracted/DDecompositionType.txt
|
|
* 0b5523a2217cb318d20b329a05d31eec5af5686ba09d263b85bb75a28989a3a8 lib/unicore/extracted/DEastAsianWidth.txt
|
|
* d62e5bab70ca74f099343f71224fa051cb1fdd61a1ab45c0488c44cfc0b6102e lib/unicore/extracted/DGeneralCategory.txt
|
|
* bb67e0c00b88acfa5be633967b66b23326844a86e49c6fde7b57960d3af66cae lib/unicore/extracted/DJoinGroup.txt
|
|
* f39ebe974825d6736aee15582250307aa532b2cfab3caf3f86bd23fddc9c5c4d lib/unicore/extracted/DJoinType.txt
|
|
* dad3ef492d198d6f1dde4922b175f7371a27dfe62fce489f3e04807015a4c682 lib/unicore/extracted/DLineBreak.txt
|
|
* 7c83684d3336b698381745b78a971c3e1242cb3fcac58604469086c19b6edcee lib/unicore/extracted/DNumType.txt
|
|
* 139b976bdc288be01c80f018523da769cf2845109b5a7f0f8a432db64bfedcfa lib/unicore/extracted/DNumValues.txt
|
|
* 03640d8ad18fc65de766f2034a927f7442960e998d3243845ca9b9fe31bfe1ab lib/unicore/mktables
|
|
* 8c30575264b2772c7a69c5bb6069a28f0e0a7a0df735871bde2d99ee674316ac lib/unicore/version
|
|
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
|
|
* 8abaee16c84c1a61a69a77b6e8963675d99f515f3c2a34e449faeb9bfdec861a regen/mk_PL_charclass.pl
|
|
* 20a6e3d507a66f4594586485568134873485b08e23383f3dc4e6b3047569267b regen/mk_invlists.pl
|
|
* ex: set ro ft=c: */
|