Implement and document %option yylmax to replace #define YYLMAX.

Test in tests/test-yylmax.
This commit is contained in:
Eric S. Raymond 2020-10-10 19:25:32 -04:00
parent 0d1959c595
commit 3158c7f072
7 changed files with 51 additions and 22 deletions

View File

@ -1137,16 +1137,16 @@ for @code{%array}.
The @code{%array} declaration defines @code{yytext} to be an array of
@code{YYLMAX} characters, which defaults to a fairly large value. You
can change the size by simply #define'ing @code{YYLMAX} to a different
value in the first section of your @code{flex} input. As mentioned
above, with @code{%pointer} yytext grows dynamically to accommodate
large tokens. While this means your @code{%pointer} scanner can
accommodate very large tokens (such as matching entire blocks of
comments), bear in mind that each time the scanner must resize
@code{yytext} it also must rescan the entire token from the beginning,
so matching such tokens can prove slow. @code{yytext} presently does
@emph{not} dynamically grow if a call to @code{unput()} results in too
much text being pushed back; instead, a run-time error results.
can change the size to a different value with @code{%option yylmax
= NNN}. As mentioned above, with @code{%pointer} yytext grows
dynamically to accommodate large tokens. While this means your
@code{%pointer} scanner can accommodate very large tokens (such as
matching entire blocks of comments), bear in mind that each time the
scanner must resize @code{yytext} it also must rescan the entire token
from the beginning, so matching such tokens can prove slow.
@code{yytext} presently does @emph{not} dynamically grow if a call to
@code{unput()} results in too much text being pushed back; instead, a
run-time error results.
@cindex %array, with C++
Also note that you cannot use @code{%array} with C++ scanner classes
@ -2398,11 +2398,11 @@ lengthened (you cannot append characters to the end).
@cindex yytext, default array size
@cindex array, default size for yytext
@vindex YYLMAX
@vindex yylmax
If the special directive @code{%array} appears in the first section of
the scanner description, then @code{yytext} is instead declared
@code{char yytext[YYLMAX]}, where @code{YYLMAX} is a macro definition
that you can redefine in the first section if you don't like the default
to be an array of YYLMAX characters, where @code{YYLMAX} is a parameter
that you can redefine with a @code{%yylmax} option if you don't like the default
value (generally 8KB). Using @code{%array} results in somewhat slower
scanners, but the value of @code{yytext} becomes immune to calls to
@code{unput()}, which potentially destroy its value when @code{yytext} is
@ -5292,9 +5292,8 @@ its rules. This error can also occur due to internal problems.
@item
@samp{token too large, exceeds YYLMAX}. your scanner uses @code{%array}
and one of its rules matched a string longer than the @code{YYLMAX}
constant (8K bytes by default). You can increase the value by
#define'ing @code{YYLMAX} in the definitions section of your @code{flex}
input.
constant (8K bytes by default). You can increase the value with the
@code{%yylmax} option.
@item
@samp{scanner requires -8 flag to use the character 'x'}. Your scanner
@ -8721,10 +8720,13 @@ BEGIN: Replaced by yybegin()
ECHO: Replaced by yyecho()
@item
REJECT: Replaced by yyreject()
#define REJECT: Replaced by yyreject()
@item
YY_DECL: Replaced by the %yydecl directive.
#define YY_DECL: Replaced by the %yydecl directive.
@item
#define YYLMAX: Replaced by the %yylmax option.
@end itemize
@node Indices, , Appendices, Top

View File

@ -89,7 +89,10 @@ m4_define([[M4_HOOK_EOF_STATE_CASE_TERMINATE]], [[ yyterminate();
m4_define([[M4_HOOK_CONST_DEFINE]], [[#define $1 $2
]])
m4_define([[M4_HOOK_SET_YY_DECL]], [[#define YY_DECL $1]])
m4_define([[M4_HOOK_SET_YY_DECL]], [[#define YY_DECL $1
]])
m4_define([[M4_HOOK_SET_YYLMAX]], [[#define YYLMAX $1
]])
%% [0.0] Make hook macros available to Flex

View File

@ -352,6 +352,7 @@ struct ctrl_bundle_t {
// otherwise, use fread().
char *yyclass; // yyFlexLexer subclass to use for YY_DECL
char *yydecl; // user-specfied prototype for yylex.
int yylmax; // Maximum buffer length if %array
bool yytext_is_array; // if true (i.e., %array directive), then declare
// yytext as array instead of a character pointer.
// Nice and inefficient.
@ -669,6 +670,7 @@ extern unsigned char *ccltbl;
/* Variables for miscellaneous information:
* nmstr - last NAME scanned by the scanner
* nmval - last numeric scanned by the scanner
* sectnum - section number currently being parsed
* nummt - number of empty nxt/chk table entries
* hshcol - number of hash collisions detected by snstods
@ -688,7 +690,7 @@ extern unsigned char *ccltbl;
*/
extern char nmstr[MAXLINE];
extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
extern int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs, nmval;
extern int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
extern int num_backing_up, bol_needed;

View File

@ -86,7 +86,7 @@ int lastccl, *cclmap, *ccllen, *cclng, cclreuse;
int current_maxccls, current_max_ccl_tbl_size;
unsigned char *ccltbl;
char nmstr[MAXLINE];
int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs, nmval;
int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
int num_backing_up, bol_needed;
int end_of_buffer_state;
@ -1288,6 +1288,10 @@ void readin (void)
out_str ("M4_HOOK_SET_YY_DECL(%s)\n", cp);
}
if (ctrl.yylmax != 0) {
out_dec ("M4_HOOK_SET_YYLMAX(%d)\n", ctrl.yylmax);
}
/* Dump the user defined preproc directives. */
if (userdef_buf.elts)
outn ((char *) (userdef_buf.elts));

View File

@ -2,7 +2,7 @@
%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
%token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
%token TOK_TABLES_FILE
%token TOK_TABLES_FILE TOK_YYLMAX TOK_NUMERIC
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
@ -206,6 +206,8 @@ option : TOK_OUTFILE '=' NAME
{ ctrl.yyclass = xstrdup(nmstr); }
| TOK_HEADER_FILE '=' NAME
{ env.headerfilename = xstrdup(nmstr); }
| TOK_YYLMAX '=' TOK_NUMERIC
{ ctrl.yylmax = nmval; }
| TOK_TABLES_FILE '=' NAME
{ tablesext = true; tablesfilename = xstrdup(nmstr); }
;

View File

@ -351,6 +351,7 @@ M4QEND "]""]"
{WS} option_sense = true;
"=" return '=';
[[:digit:]]+ {nmval = atoi(yytext); return TOK_NUMERIC;}
no option_sense = ! option_sense;
@ -450,6 +451,7 @@ M4QEND "]""]"
outfile return TOK_OUTFILE;
prefix return TOK_PREFIX;
yyclass return TOK_YYCLASS;
yylmax return TOK_YYLMAX;
header(-file)? return TOK_HEADER_FILE;
tables-file return TOK_TABLES_FILE;
tables-verify {

14
tests/test-yylmax Executable file
View File

@ -0,0 +1,14 @@
#! /bin/sh
# Test %yylmax option of flex in any cpp-based back end
trap 'rm /tmp/td$$' EXIT HUP INT QUIT TERM
cat >/tmp/td$$ <<EOF
%option yylmax=333
%%
%%
EOF
( ../src/flex -t /tmp/td$$ | grep "#define YYLMAX 333" >/dev/null ) || (echo "%yylmax test failed." >&2; exit 1)