From 3158c7f0721f8558a1acdcf5c2dad12be0fa0e2e Mon Sep 17 00:00:00 2001 From: "Eric S. Raymond" Date: Sat, 10 Oct 2020 19:25:32 -0400 Subject: [PATCH] Implement and document %option yylmax to replace #define YYLMAX. Test in tests/test-yylmax. --- doc/flex.texi | 38 ++++++++++++++++++++------------------ src/cpp-flex.skl | 5 ++++- src/flexdef.h | 4 +++- src/main.c | 6 +++++- src/parse.y | 4 +++- src/scan.l | 2 ++ tests/test-yylmax | 14 ++++++++++++++ 7 files changed, 51 insertions(+), 22 deletions(-) create mode 100755 tests/test-yylmax diff --git a/doc/flex.texi b/doc/flex.texi index 5c6c04e8..836018a5 100644 --- a/doc/flex.texi +++ b/doc/flex.texi @@ -1137,16 +1137,16 @@ for @code{%array}. The @code{%array} declaration defines @code{yytext} to be an array of @code{YYLMAX} characters, which defaults to a fairly large value. You -can change the size by simply #define'ing @code{YYLMAX} to a different -value in the first section of your @code{flex} input. As mentioned -above, with @code{%pointer} yytext grows dynamically to accommodate -large tokens. While this means your @code{%pointer} scanner can -accommodate very large tokens (such as matching entire blocks of -comments), bear in mind that each time the scanner must resize -@code{yytext} it also must rescan the entire token from the beginning, -so matching such tokens can prove slow. @code{yytext} presently does -@emph{not} dynamically grow if a call to @code{unput()} results in too -much text being pushed back; instead, a run-time error results. +can change the size to a different value with @code{%option yylmax += NNN}. As mentioned above, with @code{%pointer} yytext grows +dynamically to accommodate large tokens. While this means your +@code{%pointer} scanner can accommodate very large tokens (such as +matching entire blocks of comments), bear in mind that each time the +scanner must resize @code{yytext} it also must rescan the entire token +from the beginning, so matching such tokens can prove slow. +@code{yytext} presently does @emph{not} dynamically grow if a call to +@code{unput()} results in too much text being pushed back; instead, a +run-time error results. @cindex %array, with C++ Also note that you cannot use @code{%array} with C++ scanner classes @@ -2398,11 +2398,11 @@ lengthened (you cannot append characters to the end). @cindex yytext, default array size @cindex array, default size for yytext -@vindex YYLMAX +@vindex yylmax If the special directive @code{%array} appears in the first section of the scanner description, then @code{yytext} is instead declared -@code{char yytext[YYLMAX]}, where @code{YYLMAX} is a macro definition -that you can redefine in the first section if you don't like the default +to be an array of YYLMAX characters, where @code{YYLMAX} is a parameter +that you can redefine with a @code{%yylmax} option if you don't like the default value (generally 8KB). Using @code{%array} results in somewhat slower scanners, but the value of @code{yytext} becomes immune to calls to @code{unput()}, which potentially destroy its value when @code{yytext} is @@ -5292,9 +5292,8 @@ its rules. This error can also occur due to internal problems. @item @samp{token too large, exceeds YYLMAX}. your scanner uses @code{%array} and one of its rules matched a string longer than the @code{YYLMAX} -constant (8K bytes by default). You can increase the value by -#define'ing @code{YYLMAX} in the definitions section of your @code{flex} -input. +constant (8K bytes by default). You can increase the value with the +@code{%yylmax} option. @item @samp{scanner requires -8 flag to use the character 'x'}. Your scanner @@ -8721,10 +8720,13 @@ BEGIN: Replaced by yybegin() ECHO: Replaced by yyecho() @item -REJECT: Replaced by yyreject() +#define REJECT: Replaced by yyreject() @item -YY_DECL: Replaced by the %yydecl directive. +#define YY_DECL: Replaced by the %yydecl directive. + +@item +#define YYLMAX: Replaced by the %yylmax option. @end itemize @node Indices, , Appendices, Top diff --git a/src/cpp-flex.skl b/src/cpp-flex.skl index 59a717ea..c6b4c9a8 100644 --- a/src/cpp-flex.skl +++ b/src/cpp-flex.skl @@ -89,7 +89,10 @@ m4_define([[M4_HOOK_EOF_STATE_CASE_TERMINATE]], [[ yyterminate(); m4_define([[M4_HOOK_CONST_DEFINE]], [[#define $1 $2 ]]) -m4_define([[M4_HOOK_SET_YY_DECL]], [[#define YY_DECL $1]]) +m4_define([[M4_HOOK_SET_YY_DECL]], [[#define YY_DECL $1 +]]) +m4_define([[M4_HOOK_SET_YYLMAX]], [[#define YYLMAX $1 +]]) %% [0.0] Make hook macros available to Flex diff --git a/src/flexdef.h b/src/flexdef.h index 10bc90f5..a5f98b43 100644 --- a/src/flexdef.h +++ b/src/flexdef.h @@ -352,6 +352,7 @@ struct ctrl_bundle_t { // otherwise, use fread(). char *yyclass; // yyFlexLexer subclass to use for YY_DECL char *yydecl; // user-specfied prototype for yylex. + int yylmax; // Maximum buffer length if %array bool yytext_is_array; // if true (i.e., %array directive), then declare // yytext as array instead of a character pointer. // Nice and inefficient. @@ -669,6 +670,7 @@ extern unsigned char *ccltbl; /* Variables for miscellaneous information: * nmstr - last NAME scanned by the scanner + * nmval - last numeric scanned by the scanner * sectnum - section number currently being parsed * nummt - number of empty nxt/chk table entries * hshcol - number of hash collisions detected by snstods @@ -688,7 +690,7 @@ extern unsigned char *ccltbl; */ extern char nmstr[MAXLINE]; -extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs, nmval; extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; extern int num_backing_up, bol_needed; diff --git a/src/main.c b/src/main.c index 2122bae6..391d5a12 100644 --- a/src/main.c +++ b/src/main.c @@ -86,7 +86,7 @@ int lastccl, *cclmap, *ccllen, *cclng, cclreuse; int current_maxccls, current_max_ccl_tbl_size; unsigned char *ccltbl; char nmstr[MAXLINE]; -int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; +int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs, nmval; int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; int num_backing_up, bol_needed; int end_of_buffer_state; @@ -1288,6 +1288,10 @@ void readin (void) out_str ("M4_HOOK_SET_YY_DECL(%s)\n", cp); } + if (ctrl.yylmax != 0) { + out_dec ("M4_HOOK_SET_YYLMAX(%d)\n", ctrl.yylmax); + } + /* Dump the user defined preproc directives. */ if (userdef_buf.elts) outn ((char *) (userdef_buf.elts)); diff --git a/src/parse.y b/src/parse.y index 1a7e1bbe..f13a36d9 100644 --- a/src/parse.y +++ b/src/parse.y @@ -2,7 +2,7 @@ %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE -%token TOK_TABLES_FILE +%token TOK_TABLES_FILE TOK_YYLMAX TOK_NUMERIC %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT @@ -206,6 +206,8 @@ option : TOK_OUTFILE '=' NAME { ctrl.yyclass = xstrdup(nmstr); } | TOK_HEADER_FILE '=' NAME { env.headerfilename = xstrdup(nmstr); } + | TOK_YYLMAX '=' TOK_NUMERIC + { ctrl.yylmax = nmval; } | TOK_TABLES_FILE '=' NAME { tablesext = true; tablesfilename = xstrdup(nmstr); } ; diff --git a/src/scan.l b/src/scan.l index c24c6802..975ba414 100644 --- a/src/scan.l +++ b/src/scan.l @@ -351,6 +351,7 @@ M4QEND "]""]" {WS} option_sense = true; "=" return '='; + [[:digit:]]+ {nmval = atoi(yytext); return TOK_NUMERIC;} no option_sense = ! option_sense; @@ -450,6 +451,7 @@ M4QEND "]""]" outfile return TOK_OUTFILE; prefix return TOK_PREFIX; yyclass return TOK_YYCLASS; + yylmax return TOK_YYLMAX; header(-file)? return TOK_HEADER_FILE; tables-file return TOK_TABLES_FILE; tables-verify { diff --git a/tests/test-yylmax b/tests/test-yylmax new file mode 100755 index 00000000..a89fea9d --- /dev/null +++ b/tests/test-yylmax @@ -0,0 +1,14 @@ +#! /bin/sh +# Test %yylmax option of flex in any cpp-based back end + +trap 'rm /tmp/td$$' EXIT HUP INT QUIT TERM +cat >/tmp/td$$ </dev/null ) || (echo "%yylmax test failed." >&2; exit 1) + + +