don't install charclass_invlists.h

... by renaming it to charclass_invlists.inc, which no longer matches
the standard *.h pattern.

charclass_invlists.inc is a large (4.5MB) generated file that is only
needed at build time (specifically, by utf8.c as well as regcomp.c and
regexec.c). Skipping this single file makes a standard perl install
slightly smaller (from ~90MB to ~85MB in my tests).
This commit is contained in:
Lukas Mai 2025-03-01 13:34:24 +01:00 committed by mauke
parent 16cb53546f
commit 42e7ab19c7
14 changed files with 30 additions and 24 deletions

View File

@ -11,7 +11,7 @@ builtin.c Functions in the builtin:: namespace
caretx.c C file to create $^X
cflags.SH A script that emits C compilation flags per file
Changes Describe how to peruse changes between releases
charclass_invlists.h Compiled-in inversion lists
charclass_invlists.inc Generated compiled-in inversion lists
class.c Internals of the `use feature 'class'` object system
CODE_OF_CONDUCT.md Information on where to find the Standards of Conduct
config_h.SH Produces config.h
@ -5580,7 +5580,7 @@ lib/unicore/Scripts.txt Unicode character database
lib/unicore/SpecialCasing.txt Unicode character database
lib/unicore/StandardizedVariants.txt Unicode character database
lib/unicore/TestNorm.pl Unicode character database
lib/unicore/uni_keywords.pl Indices into array in charclass_invlists.h
lib/unicore/uni_keywords.pl Indices into array in charclass_invlists.inc
lib/unicore/UnicodeData.txt Unicode character database
lib/unicore/version The version of the Unicode
lib/unicore/VerticalOrientation.txt Unicode character database
@ -5956,7 +5956,7 @@ regen/lib_cleanup.pl Generate lib/.gitignore from MANIFEST
regen/locale.pl Program to write locale_table.h
regen/mg_vtable.pl generate mg_vtable.h
regen/miniperlmain.pl generate miniperlmain.c
regen/mk_invlists.pl Generates charclass_invlists.h
regen/mk_invlists.pl Generate charclass_invlists.inc, regexp_constants.h, uni_keywords.h
regen/mk_PL_charclass.pl Populate the PL_charclass table
regen/mph.pl Generate perfect hashes
regen/op_private Definitions of bits in an OP's op_private field

View File

@ -530,12 +530,11 @@ unidatadirs = lib/unicore/To lib/unicore/lib
h = \
EXTERN.h INTERN.h XSUB.h \
av.h charclass_invlists.h cop.h cv.h dosish.h embed.h form.h gv.h \
handy.h hv.h hv_func.h iperlsys.h keywords.h l1_char_class_tab.h \
mg.h mydtrace.h op.h op_reg_common.h opcode.h pad.h patchlevel.h \
perl.h perlapi.h perly.h pp.h proto.h regcomp.h regcomp_internal.h \
regexp.h regexp_constants.h scope.h sv.h thread.h unixish.h utf8.h \
util.h warnings.h \
av.h cop.h cv.h dosish.h embed.h form.h gv.h handy.h hv.h hv_func.h \
iperlsys.h keywords.h l1_char_class_tab.h mg.h mydtrace.h op.h \
op_reg_common.h opcode.h pad.h patchlevel.h perl.h perlapi.h perly.h pp.h \
proto.h regcomp.h regcomp_internal.h regexp.h regexp_constants.h scope.h \
sv.h thread.h unixish.h utf8.h util.h warnings.h \
$(CONFIGH)
c_base = \

View File

@ -436059,5 +436059,5 @@ static const U8 WB_table[23][23] = {
* 55d90fdc3f902e5c0b16b3378f9eaa36e970a1c09723c33de7d47d0370044012 lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl
* e7ba2c6c0577fbb8b767a1305dbebcfeec166d11aa010cfbad9001c5f5971ee6 regen/mk_invlists.pl
* 0c9dc59e0c05912141ae41bce0bc55e48cae956407b7a1f12807a3d7ffea3b19 regen/mk_invlists.pl
* ex: set ro ft=c: */

View File

@ -162,7 +162,7 @@ mv Unihan_Variants.txt UnihanVariants.txt
#
# perl regen/mk_L_charclass.pl
#
# and, regen charclass_invlists.h by
# and, regen charclass_invlists.inc by
#
# perl regen/mk_invlists.pl
#

View File

@ -1335,5 +1335,5 @@
# 55d90fdc3f902e5c0b16b3378f9eaa36e970a1c09723c33de7d47d0370044012 lib/unicore/version
# 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
# c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl
# e7ba2c6c0577fbb8b767a1305dbebcfeec166d11aa010cfbad9001c5f5971ee6 regen/mk_invlists.pl
# 0c9dc59e0c05912141ae41bce0bc55e48cae956407b7a1f12807a3d7ffea3b19 regen/mk_invlists.pl
# ex: set ro ft=perl:

2
perl.h
View File

@ -3294,7 +3294,7 @@ typedef struct padname PADNAME;
#include "handy.h"
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
# include "charclass_invlists.h"
# include "charclass_invlists.inc"
#endif
#if defined(USE_LARGE_FILES) && !defined(NO_64_BIT_RAWIO)

View File

@ -14919,9 +14919,9 @@ S_parse_uniprop_string(pTHX_
Titlecase Mapping (both full and simple)
Uppercase Mapping (both full and simple)
* Handle these the same way Name is done, using say, _wild.pm, but
* having both loose and full, like in charclass_invlists.h.
* having both loose and full, like in charclass_invlists.inc.
* Perhaps move block and script to that as they are somewhat large
* in charclass_invlists.h.
* in charclass_invlists.inc.
* For properties where the default is the code point itself, such
* as any of the case changing mappings, the string would otherwise
* consist of all Unicode code points in UTF-8 strung together.
@ -14935,7 +14935,7 @@ S_parse_uniprop_string(pTHX_
} /* End of is a wildcard subppattern */
/* \p{name=...} is handled specially. Instead of using the normal
* mechanism involving charclass_invlists.h, it uses _charnames.pm
* mechanism involving charclass_invlists.inc, it uses _charnames.pm
* which has the necessary (huge) data accessible to it, and which
* doesn't get loaded unless necessary. The legal syntax for names is
* somewhat different than other properties due both to the vagaries of

View File

@ -19,7 +19,7 @@ use re "/aa";
print "Starting...\n" if DEBUG;
# This program outputs charclass_invlists.h, which contains various inversion
# This program outputs charclass_invlists.inc, which contains various inversion
# lists in the form of C arrays that are to be used as-is for inversion lists.
# Thus, the lists it contains are essentially pre-compiled, and need only a
# light-weight fast wrapper to make them usable at run-time.
@ -32,7 +32,7 @@ print "Starting...\n" if DEBUG;
my $VERSION_DATA_STRUCTURE_TYPE = 148565664;
# charclass_invlists.h now also contains inversion maps and enum definitions
# charclass_invlists.inc now also contains inversion maps and enum definitions
# for those maps that have a finite number of possible values
# integer or float (no exponent)
@ -54,7 +54,7 @@ my $table_name_prefix = "UNI_";
# ASCII \w
my $enum_name_re = qr / ^ [[:alpha:]] \w* $ /ax;
my $out_fh = open_new('charclass_invlists.h', '>',
my $out_fh = open_new('charclass_invlists.inc', '>',
{style => '*', by => 'regen/mk_invlists.pl',
from => "Unicode::UCD"});

View File

@ -11543,7 +11543,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
* pointers to auxiliary tables for each such situation. Each aux array
* lists all the scripts for the given situation. There is another,
* parallel, table that gives the number of entries in each aux table.
* These are all defined in charclass_invlists.h */
* These are all defined in charclass_invlists.inc */
/* XXX Here are the additional things UTS 39 says could be done:
*

View File

@ -82,5 +82,5 @@
* 55d90fdc3f902e5c0b16b3378f9eaa36e970a1c09723c33de7d47d0370044012 lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl
* e7ba2c6c0577fbb8b767a1305dbebcfeec166d11aa010cfbad9001c5f5971ee6 regen/mk_invlists.pl
* 0c9dc59e0c05912141ae41bce0bc55e48cae956407b7a1f12807a3d7ffea3b19 regen/mk_invlists.pl
* ex: set ro ft=c: */

View File

@ -31,7 +31,7 @@ my $tests = 28; # I can't see a clean way to calculate this automatically.
my %skip = ("regen_perly.pl" => [qw(perly.act perly.h perly.tab)],
"regen/keywords.pl" => [qw(keywords.c keywords.h)],
"regen/mk_invlists.pl" => [qw(charclass_invlists.h regexp_constants.h uni_keywords.h)],
"regen/mk_invlists.pl" => [qw(charclass_invlists.inc regexp_constants.h uni_keywords.h)],
"regen/regcharclass.pl" => [qw(regcharclass.h)],
);

4
uni_keywords.h generated
View File

@ -10,7 +10,7 @@
#define MPH_VALt I16
/*
generator script: ./regen/mk_invlists.pl
generator script: regen/mk_invlists.pl
split strategy: squeeze
srand: 1785235451
rows: 7498
@ -7760,6 +7760,6 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) {
* 55d90fdc3f902e5c0b16b3378f9eaa36e970a1c09723c33de7d47d0370044012 lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl
* e7ba2c6c0577fbb8b767a1305dbebcfeec166d11aa010cfbad9001c5f5971ee6 regen/mk_invlists.pl
* 0c9dc59e0c05912141ae41bce0bc55e48cae956407b7a1f12807a3d7ffea3b19 regen/mk_invlists.pl
* d6987e01ad538d1567394851cf199f99815f7701bebd6092be4bc7a6d8f147c6 regen/mph.pl
* ex: set ro ft=c: */

View File

@ -1524,6 +1524,7 @@ MakePPPort : $(HAVEMINIPERL) $(CONFIGPM)
$(XCOPY) *.h $(COREDIR)\\*.*
$(RCOPY) include $(COREDIR)\\*.*
$(XCOPY) ..\\*.h $(COREDIR)\\*.*
$(XCOPY) ..\\charclass_invlists.inc $(COREDIR)\\*.*
rem. > $@
perlmain$(o) : runperl.c $(CONFIGPM)

View File

@ -896,6 +896,7 @@ regen_config_h:
$(CONFIGPM) : $(MINIPERL) ..\config.sh config_h.PL ..\git_version.h
$(MINIPERL) -I..\lib ..\configpm --chdir=..
$(XCOPY) ..\*.h $(COREDIR)\*.*
$(XCOPY) ..\charclass_invlists.inc $(COREDIR)\*.*
$(XCOPY) *.h $(COREDIR)\*.*
$(RCOPY) include $(COREDIR)\*.*
if not exist "$(FULLDIR)" mkdir "$(FULLDIR)"
@ -1095,23 +1096,28 @@ MakePPPort: $(MINIPERL) $(CONFIGPM) Extensions_nonxs
# DynaLoader.pm, so this will have to do
Extensions: ..\make_ext.pl ..\lib\buildcustomize.pl $(PERLDEP) $(CONFIGPM) $(DYNALOADER)
$(XCOPY) ..\*.h $(COREDIR)\*.*
$(XCOPY) ..\charclass_invlists.inc $(COREDIR)\*.*
$(MINIPERL) -I..\lib ..\make_ext.pl "MAKE=$(MAKE)" --dir=$(CPANDIR) --dir=$(DISTDIR) --dir=$(EXTDIR) --dynamic
Extensions_reonly: ..\make_ext.pl ..\lib\buildcustomize.pl $(PERLDEP) $(CONFIGPM) $(DYNALOADER)
$(XCOPY) ..\*.h $(COREDIR)\*.*
$(XCOPY) ..\charclass_invlists.inc $(COREDIR)\*.*
$(MINIPERL) -I..\lib ..\make_ext.pl "MAKE=$(MAKE)" --dir=$(CPANDIR) --dir=$(DISTDIR) --dir=$(EXTDIR) --dynamic +re
Extensions_static : ..\make_ext.pl ..\lib\buildcustomize.pl list_static_libs.pl $(PERLDEP) $(CONFIGPM) Extensions_nonxs
$(XCOPY) ..\*.h $(COREDIR)\*.*
$(XCOPY) ..\charclass_invlists.inc $(COREDIR)\*.*
$(MINIPERL) -I..\lib ..\make_ext.pl "MAKE=$(MAKE)" --dir=$(CPANDIR) --dir=$(DISTDIR) --dir=$(EXTDIR) --static
$(MINIPERL) -I..\lib list_static_libs.pl -o Extensions_static
Extensions_nonxs: ..\make_ext.pl ..\lib\buildcustomize.pl $(PERLDEP) $(CONFIGPM) ..\pod\perlfunc.pod
$(XCOPY) ..\*.h $(COREDIR)\*.*
$(XCOPY) ..\charclass_invlists.inc $(COREDIR)\*.*
$(MINIPERL) -I..\lib ..\make_ext.pl "MAKE=$(MAKE)" --dir=$(CPANDIR) --dir=$(DISTDIR) --dir=$(EXTDIR) --nonxs
$(DYNALOADER) : ..\make_ext.pl ..\lib\buildcustomize.pl $(PERLDEP) $(CONFIGPM) Extensions_nonxs
$(XCOPY) ..\*.h $(COREDIR)\*.*
$(XCOPY) ..\charclass_invlists.inc $(COREDIR)\*.*
$(MINIPERL) -I..\lib ..\make_ext.pl "MAKE=$(MAKE)" --dir=$(EXTDIR) --dynaloader
Extensions_clean: