adding the rest of vern's files

This commit is contained in:
Will Estes 2001-05-01 20:47:11 +00:00
parent 26e78464e7
commit 2eae880030
52 changed files with 19026 additions and 0 deletions

15
examples/README Normal file
View File

@ -0,0 +1,15 @@
This directory contains some examples of what you can do with
flex. These files are not tested regularly so you might have to tinker
a bit before they work for you. Updates, new files and patches are welcome.
- debflex.awk, an awk script for anotating flex debug output.
It presently only works with gawk and mawk, not with "old"
or "new" awk.
- testxxLexer.l, a sample C++ program that uses flex's scanner
class option ("-+").
- fastwc/, a subdirectory containing examples of how to use flex
to write progressively higher-performance versions of the Unix
"wc" utility. This certainly should work with 2.5, but hasn't
been tested.

119
examples/debflex.awk Normal file
View File

@ -0,0 +1,119 @@
# Clarify the flex debug trace by substituting first line of each rule.
# Francois Pinard <pinard@iro.umontreal.ca>, July 1990.
#
# Rewritten to process correctly \n's in scanner input.
# BEGIN section modified to correct a collection of rules.
# Michal Jaegermann <michal@phys.ualberta.ca>, December 1993
#
# Sample usage:
# flex -d PROGRAM.l
# gcc -o PROGRAM PROGRAM.c -lfl
# PROGRAM 2>&1 | gawk -f debflex.awk PROGRAM.l
#
# (VP's note: this script presently does not work with either "old" or
# "new" awk; fixes so it does will be welcome)
BEGIN {
# Insure proper usage.
if (ARGC != 2) {
print "usage: gawk -f debflex.awk FLEX_SOURCE <DEBUG_OUTPUT";
exit (1);
}
# Remove and save the name of flex source.
source = ARGV[1];
ARGC--;
# Swallow the flex source file.
line = 0;
section = 1;
while (getline <source) {
# Count the lines.
line++;
# Count the sections. When encountering section 3,
# break out of the awk BEGIN block.
if (match ($0, /^%%/)) {
section++;
if (section == 3) {
break;
}
}
else {
# Only the lines in section 2 which do not begin in a
# tab or space might be referred to by the flex debug
# trace. Save only those lines.
if (section == 2 && match ($0, /^[^ \t]/)) {
rules[line] = $0;
}
}
}
dashes = "-----------------------------------------------------------";
collect = "";
line = 0;
}
# collect complete rule output from a scanner
$0 !~ /^--/ {
collect = collect "\n" $0;
next;
}
# otherwise we have a new rule - process what we got so far
{
process();
}
# and the same thing if we hit EOF
END {
process();
}
function process() {
# splitting this way we loose some double dashes and
# left parentheses from echoed input - a small price to pay
n = split(collect, field, "\n--|[(]");
# this loop kicks in only when we already collected something
for (i = 1; i <= n; i++) {
if (0 != line) {
# we do not care for traces of newlines.
if (0 == match(field[i], /\"\n+\"[)]/)) {
if (rules[line]) {
text = field[i];
while ( ++i <= n) {
text = text field[i];
}
printf("%s:%d: %-8s -- %s\n",
source, line, text, rules[line]);
}
else {
print;
printf "%s:%d: *** No such rule.\n", source, line;
}
}
line = 0;
break;
}
if ("" != field[i]) {
if ("end of buffer or a NUL)" == field[i]) {
print dashes; # Simplify trace of buffer reloads
continue;
}
if (match(field[i], /accepting rule at line /)) {
# force interpretation of line as a number
line = 0 + substr(field[i], RLENGTH);
continue;
}
# echo everything else
printf("--%s\n", field[i]);
}
}
collect = "\n" $0; # ... and start next trace
}

24
examples/manual/ChangeLog Normal file
View File

@ -0,0 +1,24 @@
Tue Oct 5 21:51:59 1993 Vern Paxson
* Removed FILTER/ subdirectory.
* Removed alloca.c.
* Changed Makefile definition of CC to just "gcc -g", removed
assumption of alloca being present.
* Added pointer to MISC/fastwc/ to wc.lex.
Tue Jun 8 15:47:39 1993 Gavin Thomas Nicol (nick at sillybugs)
* Changed main() in wc.lex extensively. The old version would not
work correctly without the YY_NEW_FILE. (lex handles the older
version OK though).
* Added a rule to expr.lex to handle whitespace. The old version
reported an illegal character.
* Added -traditional to the gcc flags because the flex definition
for free() clashes with some systems that have old header files.

88
examples/manual/Makefile Normal file
View File

@ -0,0 +1,88 @@
#############################################################
#
# Makefile : Makefile for Flex examples.
# Author : G.T.Nicol
# Last Updated : 1993/10/05
#
# If you use bison, you may have to supply an alloca
#
#############################################################
CC = gcc -g
LEX = flex -i -I
YACC = bison -d -y
ALLOCA =
############################################################
#
# DO NOT CHANGE ANYTHING FROM HERE ON !!!!!!!!!
#
############################################################
all: expr front myname eof wc replace user_act string1\
string2 yymore numbers dates cat
expr: expr.y expr.lex
$(YACC) expr.y
$(LEX) expr.lex
$(CC) -o expr lex.yy.c y.tab.c $(ALLOCA) -ll -lm
front: front.y front.lex
$(YACC) front.y
$(LEX) front.lex
$(CC) -o front lex.yy.c y.tab.c $(ALLOCA) -ll -lm
numbers: numbers.lex
$(LEX) numbers.lex
$(CC) lex.yy.c -o numbers
dates: dates.lex
$(LEX) dates.lex
$(CC) lex.yy.c -o dates -ll
yymore: yymore.lex
$(LEX) yymore.lex
$(CC) lex.yy.c -o yymore -ll
string1: string1.lex
$(LEX) string1.lex
$(CC) lex.yy.c -o string1 -ll
string2: string2.lex
$(LEX) string2.lex
$(CC) lex.yy.c -o string2 -ll
myname: myname.lex
$(LEX) myname.lex
$(CC) lex.yy.c -o myname -ll
myname2: myname2.lex
$(LEX) myname2.lex
$(CC) lex.yy.c -o myname2 -ll
eof: eof_rules.lex
$(LEX) eof_rules.lex
$(CC) lex.yy.c -o eof -ll
wc: wc.lex
$(LEX) wc.lex
$(CC) lex.yy.c -o wc -ll
cat: cat.lex
$(LEX) cat.lex
$(CC) lex.yy.c -o cat -ll
replace: replace.lex
$(LEX) replace.lex
$(CC) lex.yy.c -o replace -ll
user_act: expr.y expr.lex
$(LEX) user_act.lex
$(CC) -o user_act lex.yy.c -ll
clean:
rm -f *.BAK *.o core *~* *.a
rm -f *.tab.h *.tab.c
rm -f myname expr lex.yy.c *.out eof wc yymore
rm -f replace front user_act string1 string2
rm -f dates numbers cat

17
examples/manual/README Normal file
View File

@ -0,0 +1,17 @@
This directory contains the example programs from the manual, and a few
other things as well. To make all the programs, simply type "make",
and assuming you have flex and gcc, all will be well.
To build the programs individually, type
make program_name
For example:
make expr
The subdirectory FILTER contains a collection of the silly filters
that have appeared on the Internet over the years. The author of the
flex manual has included them for fun, but does not guarantee they will
work with flex, or even work at all.

45
examples/manual/cat.lex Normal file
View File

@ -0,0 +1,45 @@
/*
* cat.lex: A demonstration of YY_NEW_FILE.
*/
%{
#include <stdio.h>
char **names = NULL;
int current = 1;
%}
%%
<<EOF>> {
current += 1;
if(names[current] != NULL){
yyin = fopen(names[current],"r");
if(yyin == NULL){
fprintf(stderr,"cat: unable to open %s\n",
names[current]);
yyterminate();
}
YY_NEW_FILE;
} else {
yyterminate();
}
}
%%
int main(int argc, char **argv)
{
if(argc < 2){
fprintf(stderr,"Usage: cat files....\n");
exit(1);
}
names = argv;
yyin = fopen(names[current],"r");
if(yyin == NULL){
fprintf(stderr,"cat: unable to open %s\n",
names[current]);
yyterminate();
}
yylex();
}

106
examples/manual/dates.lex Normal file
View File

@ -0,0 +1,106 @@
/*
* dates.lex: An example of using start states to
* distinguish between different date formats.
*/
%{
#include <ctype.h>
char month[20],dow[20],day[20],year[20];
%}
skip of|the|[ \t,]*
mon (mon(day)?)
tue (tue(sday)?)
wed (wed(nesday)?)
thu (thu(rsday)?)
fri (fri(day)?)
sat (sat(urday)?)
sun (sun(day)?)
day_of_the_week ({mon}|{tue}|{wed}|{thu}|{fri}|{sat}|{sun})
jan (jan(uary)?)
feb (feb(ruary)?)
mar (mar(ch)?)
apr (apr(il)?)
may (may)
jun (jun(e)?)
jul (jul(y)?)
aug (aug(ust)?)
sep (sep(tember)?)
oct (oct(ober)?)
nov (nov(ember)?)
dec (dec(ember)?)
first_half ({jan}|{feb}|{mar}|{apr}|{may}|{jun})
second_half ({jul}|{aug}|{sep}|{oct}|{nov}|{dec})
month {first_half}|{second_half}
nday [1-9]|[1-2][0-9]|3[0-1]
nmonth [1-9]|1[0-2]
nyear [0-9]{1,4}
year_ext (ad|AD|bc|BC)?
day_ext (st|nd|rd|th)?
%s LONG SHORT
%s DAY DAY_FIRST YEAR_FIRST YEAR_LAST YFMONTH YLMONTH
%%
/* the default is month-day-year */
<LONG>{day_of_the_week} strcpy(dow,yytext);
<LONG>{month} strcpy(month,yytext); BEGIN(DAY);
/* handle the form: day-month-year */
<LONG>{nday}{day_ext} strcpy(day,yytext); BEGIN(DAY_FIRST);
<DAY_FIRST>{month} strcpy(month,yytext); BEGIN(LONG);
<DAY>{nday}{day_ext} strcpy(day,yytext); BEGIN(LONG);
<LONG>{nyear}{year_ext} {
printf("Long:\n");
printf(" DOW : %s \n",dow);
printf(" Day : %s \n",day);
printf(" Month : %s \n",month);
printf(" Year : %s \n",yytext);
strcpy(dow,"");
strcpy(day,"");
strcpy(month,"");
}
/* handle dates of the form: day-month-year */
<SHORT>{nday} strcpy(day,yytext); BEGIN(YEAR_LAST);
<YEAR_LAST>{nmonth} strcpy(month,yytext);BEGIN(YLMONTH);
<YLMONTH>{nyear} strcpy(year,yytext); BEGIN(SHORT);
/* handle dates of the form: year-month-day */
<SHORT>{nyear} strcpy(year,yytext); BEGIN(YEAR_FIRST);
<YEAR_FIRST>{nmonth} strcpy(month,yytext);BEGIN(YFMONTH);
<YFMONTH>{nday} strcpy(day,yytext); BEGIN(SHORT);
<SHORT>\n {
printf("Short:\n");
printf(" Day : %s \n",day);
printf(" Month : %s \n",month);
printf(" Year : %s \n",year);
strcpy(year,"");
strcpy(day,"");
strcpy(month,"");
}
long\n BEGIN(LONG);
short\n BEGIN(SHORT);
{skip}*
\n
.

View File

@ -0,0 +1,28 @@
short
1989:12:23
1989:11:12
23:12:1989
11:12:1989
1989/12/23
1989/11/12
23/12/1989
11/12/1989
1989-12-23
1989-11-12
23-12-1989
11-12-1989
long
Friday the 5th of January, 1989
Friday, 5th of January, 1989
Friday, January 5th, 1989
Fri, January 5th, 1989
Fri, Jan 5th, 1989
Fri, Jan 5, 1989
FriJan 5, 1989
FriJan5, 1989
FriJan51989
Jan51989

View File

@ -0,0 +1,65 @@
/*
* eof_rules.lex : An example of using multiple buffers
* EOF rules, and start states
*/
%{
#define MAX_NEST 10
YY_BUFFER_STATE include_stack[MAX_NEST];
int include_count = -1;
%}
%x INCLUDE
%%
^"#include"[ \t]*\" BEGIN(INCLUDE);
<INCLUDE>\" BEGIN(INITIAL);
<INCLUDE>[^\"]+ { /* get the include file name */
if ( include_count >= MAX_NEST){
fprintf( stderr, "Too many include files" );
exit( 1 );
}
include_stack[++include_count] = YY_CURRENT_BUFFER;
yyin = fopen( yytext, "r" );
if ( ! yyin ){
fprintf( stderr, "Unable to open \"%s\"\n",yytext);
exit( 1 );
}
yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
BEGIN(INITIAL);
}
<INCLUDE><<EOF>>
{
fprintf( stderr, "EOF in include" );
yyterminate();
}
<<EOF>> {
if ( include_count <= 0 ){
yyterminate();
} else {
yy_delete_buffer(include_stack[include_count--] );
yy_switch_to_buffer(include_stack[include_count] );
BEGIN(INCLUDE);
}
}
[a-z]+ ECHO;
.|\n ECHO;

View File

@ -0,0 +1,17 @@
This is test file #1
-------------------------------------------------
We will now include test #2 in a standard way.
#include "eof_test02.txt"
-------------------------------------------------
And now we will include test # 2 with a different
format
#include "eof_test02.txt"
-------------------------------------------------
and this is the end of the test.

View File

@ -0,0 +1,8 @@
INCLUDE #2
This is the second file that will
be included.
>>> Foo are GNU?
#include "eof_test03.txt"

View File

@ -0,0 +1,7 @@
INCLUDE #3
This is the third file that will
be included.
>>> echo "I am `whoami`!!"

35
examples/manual/expr.lex Normal file
View File

@ -0,0 +1,35 @@
/*
* expr.lex : Scanner for a simple
* expression parser.
*/
%{
#include "y.tab.h"
%}
%%
[0-9]+ { yylval.val = atol(yytext);
return(NUMBER);
}
[0-9]+\.[0-9]+ {
sscanf(yytext,"%f",&yylval.val);
return(NUMBER);
}
"+" return(PLUS);
"-" return(MINUS);
"*" return(MULT);
"/" return(DIV);
"^" return(EXPON);
"(" return(LB);
")" return(RB);
\n return(EOL);
[\t ]* /* throw away whitespace */
. { yyerror("Illegal character");
return(EOL);
}
%%

64
examples/manual/expr.y Normal file
View File

@ -0,0 +1,64 @@
/*
* expr.y : A simple yacc expression parser
* Based on the Bison manual example.
*/
%{
#include <stdio.h>
#include <math.h>
%}
%union {
float val;
}
%token NUMBER
%token PLUS MINUS MULT DIV EXPON
%token EOL
%token LB RB
%left MINUS PLUS
%left MULT DIV
%right EXPON
%type <val> exp NUMBER
%%
input :
| input line
;
line : EOL
| exp EOL { printf("%g\n",$1);}
exp : NUMBER { $$ = $1; }
| exp PLUS exp { $$ = $1 + $3; }
| exp MINUS exp { $$ = $1 - $3; }
| exp MULT exp { $$ = $1 * $3; }
| exp DIV exp { $$ = $1 / $3; }
| MINUS exp %prec MINUS { $$ = -$2; }
| exp EXPON exp { $$ = pow($1,$3);}
| LB exp RB { $$ = $2; }
;
%%
yyerror(char *message)
{
printf("%s\n",message);
}
int main(int argc, char *argv[])
{
yyparse();
return(0);
}

40
examples/manual/front.lex Normal file
View File

@ -0,0 +1,40 @@
%{
#include <stdio.h>
#include <string.h>
#include "y.tab.h" /* this comes from bison */
#define TRUE 1
#define FALSE 0
#define copy_and_return(token_type) { strcpy(yylval.name,yytext); \
return(token_type); }
int yylexlinenum = 0; /* so we can count lines */
%}
%%
/* Lexical scanning rules begin from here. */
MEN|WOMEN|STOCKS|TREES copy_and_return(NOUN)
MISTAKES|GNUS|EMPLOYEES copy_and_return(NOUN)
LOSERS|USERS|CARS|WINDOWS copy_and_return(NOUN)
DATABASE|NETWORK|FSF|GNU copy_and_return(PROPER_NOUN)
COMPANY|HOUSE|OFFICE|LPF copy_and_return(PROPER_NOUN)
THE|THIS|THAT|THOSE copy_and_return(DECLARATIVE)
ALL|FIRST|LAST copy_and_return(CONDITIONAL)
FIND|SEARCH|SORT|ERASE|KILL copy_and_return(VERB)
ADD|REMOVE|DELETE|PRINT copy_and_return(VERB)
QUICKLY|SLOWLY|CAREFULLY copy_and_return(ADVERB)
IN|AT|ON|AROUND|INSIDE|ON copy_and_return(POSITIONAL)
"." return(PERIOD);
"\n" yylexlinenum++; return(NEWLINE);
.
%%

118
examples/manual/front.y Normal file
View File

@ -0,0 +1,118 @@
/* C code supplied at the beginning of the file. */
%{
#include <stdio.h>
#include <string.h>
extern int yylexlinenum; /* these are in YYlex */
extern char *yytext; /* current token */
%}
/* Keywords and reserved words begin here. */
%union{ /* this is the data union */
char name[128]; /* names */
}
/*-------------------- the reserved words -----------------------------*/
%token PERIOD
%token NEWLINE
%token POSITIONAL
%token VERB
%token ADVERB
%token PROPER_NOUN
%token NOUN
%token DECLARATIVE
%token CONDITIONAL
%type <name> declarative
%type <name> verb_phrase
%type <name> noun_phrase
%type <name> position_phrase
%type <name> adverb
%type <name> POSITIONAL VERB ADVERB PROPER_NOUN
%type <name> NOUN DECLARATIVE CONDITIONAL
%%
sentence_list : sentence
| sentence_list NEWLINE sentence
;
sentence : verb_phrase noun_phrase position_phrase adverb period
{
printf("I understand that sentence.\n");
printf("VP = %s \n",$1);
printf("NP = %s \n",$2);
printf("PP = %s \n",$3);
printf("AD = %s \n",$4);
}
| { yyerror("That's a strange sentence !!"); }
;
position_phrase : POSITIONAL declarative PROPER_NOUN
{
sprintf($$,"%s %s %s",$1,$2,$3);
}
| /* empty */ { strcpy($$,""); }
;
verb_phrase : VERB { strcpy($$,$1); strcat($$," "); }
| adverb VERB
{
sprintf($$,"%s %s",$1,$2);
}
;
adverb : ADVERB { strcpy($$,$1); }
| /* empty */ { strcpy($$,""); }
;
noun_phrase : DECLARATIVE NOUN
{
sprintf($$,"%s %s",$1,$2);
}
| CONDITIONAL declarative NOUN
{
sprintf($$,"%s %s %s",$1,$2,$3);
}
| NOUN { strcpy($$,$1); strcat($$," "); }
;
declarative : DECLARATIVE { strcpy($$,$1); }
| /* empty */ { strcpy($$,""); }
;
period : /* empty */
| PERIOD
;
%%
/* Supplied main() and yyerror() functions. */
int main(int argc, char *argv[])
{
yyparse(); /* parse the file */
return(0);
}
int yyerror(char *message)
{
extern FILE *yyout;
fprintf(yyout,"\nError at line %5d. (%s) \n",
yylexlinenum,message);
}

442
examples/manual/j2t.lex Normal file
View File

@ -0,0 +1,442 @@
/*
* j2t.lex : An example of the use (possibly abuse!)
* of start states.
*/
%{
#define MAX_STATES 1024
#define TRUE 1
#define FALSE 0
#define CHAPTER "@chapter"
#define SECTION "@section"
#define SSECTION "@subsection"
#define SSSECTION "@subsubsection"
int states[MAX_STATES];
int statep = 0;
int need_closing = FALSE;
char buffer[YY_BUF_SIZE];
extern char *yytext;
/*
* set up the head of the *.texinfo file the program
* will produce. This is a standard texinfo header.
*/
void print_header(void)
{
printf("\\input texinfo @c -*-texinfo-*-\n");
printf("@c %c**start of header\n",'%');
printf("@setfilename jargon.info\n");
printf("@settitle The New Hackers Dictionary\n");
printf("@synindex fn cp\n");
printf("@synindex vr cp\n");
printf("@c %c**end of header\n",'%');
printf("@setchapternewpage odd\n");
printf("@finalout\n");
printf("@c @smallbook\n");
printf("\n");
printf("@c ==========================================================\n\n");
printf("@c This file was produced by j2t. Any mistakes are *not* the\n");
printf("@c fault of the jargon file editors. \n");
printf("@c ==========================================================\n\n");
printf("@titlepage\n");
printf("@title The New Hackers Dictionary\n");
printf("@subtitle Version 2.9.10\n");
printf("@subtitle Generated by j2t\n");
printf("@author Eric S. Raymond, Guy L. Steel, Mark Crispin et al.\n");
printf("@end titlepage\n");
printf("@page\n");
printf("\n@c ==========================================================\n");
printf("\n\n");
printf("@unnumbered Preface\n");
printf("@c *******\n");
}
/*
* create the tail of the texinfo file produced.
*/
void print_trailer(void)
{
printf("\n@c ==========================================================\n");
printf("@contents\n"); /* print the table of contents */
printf("@bye\n\n");
}
/*
* write an underline under a section
* or chapter so we can find it later.
*/
void write_underline(int len, int space, char ch)
{
int loop;
printf("@c ");
for(loop=3; loop<space; loop++){
printf(" ");
}
while(len--){
printf("%c",ch);
}
printf("\n\n");
}
/*
* check for texinfo special characters
* and escape them
*/
char *check_and_convert(char *string)
{
int buffpos = 0;
int len,loop;
len = strlen(string);
for(loop=0; loop<len; loop++){
if(string[loop] == '@' || string[loop] == '{' || string[loop] == '}'){
buffer[buffpos++] = '@';
buffer[buffpos++] = string[loop];
} else {
buffer[buffpos++] = string[loop];
}
}
buffer[buffpos] = '\0';
return(buffer);
}
/*
* write out a chapter,section, or subsection
* header
*/
void write_block_header(char *type)
{
int loop;
int len;
(void)check_and_convert(yytext);
len = strlen(buffer);
for(loop=0; buffer[loop] != '\n';loop++)
;
buffer[loop] = '\0';
printf("%s %s\n",type,buffer);
write_underline(strlen(buffer),strlen(type)+1,'*');
}
%}
/*
* the flex description starts here
*/
%x HEADING EXAMPLE ENUM EXAMPLE2
%x BITEM BITEM_ITEM
%s LITEM LITEM2
%%
^#[^#]*"#" /* skip the header & trailer */
/* chapters have asterisks under them
* and are terminated by a colon
*/
^[^\n:]+\n[*]+\n write_block_header(CHAPTER);
^"= "[A-Z]" ="\n"="* { /* we create a seciton for each category */
if(need_closing == TRUE){
printf("@end table\n\n\n");
}
need_closing = TRUE;
write_block_header(SECTION);
printf("\n\n@table @b\n");
}
"Examples:"[^\.]+ ECHO;
"*"[^*\n]+"*" { /* @emph{}(emphasized) text */
yytext[yyleng-1] = '\0';
(void)check_and_convert(&yytext[1]);
printf("@i{%s}",buffer);
}
"{{"[^}]+"}}" { /* special emphasis */
yytext[yyleng-2] = '\0';
(void)check_and_convert(&yytext[2]);
printf("@b{%s}",buffer);
}
"{"[^}]+"}" { /* special emphasis */
yytext[yyleng-1] = '\0';
(void)check_and_convert(&yytext[1]);
printf("@b{%s}",buffer);
}
/* escape some special texinfo characters */
<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"@" printf("@@");
<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"{" printf("@{");
<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"}" printf("@}");
/*
* reproduce @example code
*/
":"\n+[^\n0-9*]+\n" "[^ ] {
int loop;
int len;
int cnt;
printf(":\n\n@example \n");
strcpy(buffer,yytext);
len = strlen(buffer);
cnt = 0;
for(loop=len; loop > 0;loop--){
if(buffer[loop] == '\n')
cnt++;
if(cnt == 2)
break;
}
yyless(loop+1);
statep++;
states[statep] = EXAMPLE2;
BEGIN(EXAMPLE2);
}
<EXAMPLE,EXAMPLE2>^\n {
printf("@end example\n\n");
statep--;
BEGIN(states[statep]);
}
/*
* repoduce @enumerate lists
*/
":"\n+[ \t]*[0-9]+"." {
int loop;
int len;
printf(":\n\n@enumerate \n");
strcpy(buffer,yytext);
len = strlen(buffer);
for(loop=len; loop > 0;loop--){
if(buffer[loop] == '\n')
break;
}
yyless(loop);
statep++;
states[statep] = ENUM;
BEGIN(ENUM);
}
<ENUM>"@" printf("@@");
<ENUM>":"\n+" "[^0-9] {
printf(":\n\n@example\n");
statep++;
states[statep] = EXAMPLE;
BEGIN(EXAMPLE);
}
<ENUM>\n[ \t]+[0-9]+"." {
printf("\n\n@item ");
}
<ENUM>^[^ ] |
<ENUM>\n\n\n[ \t]+[^0-9] {
printf("\n\n@end enumerate\n\n");
statep--;
BEGIN(states[statep]);
}
/*
* reproduce one kind of @itemize list
*/
":"\n+":" {
int loop;
int len;
printf(":\n\n@itemize @bullet \n");
yyless(2);
statep++;
states[statep] = LITEM2;
BEGIN(LITEM2);
}
<LITEM2>^":".+":" {
(void)check_and_convert(&yytext[1]);
buffer[strlen(buffer)-1]='\0';
printf("@item @b{%s:}\n",buffer);
}
<LITEM2>\n\n\n+[^:\n] {
printf("\n\n@end itemize\n\n");
ECHO;
statep--;
BEGIN(states[statep]);
}
/*
* create a list out of the revision history part.
* We need the "Version" for this because it
* clashes with other rules otherwise.
*/
:[\n]+"Version"[^:\n*]+":" {
int loop;
int len;
printf(":\n\n@itemize @bullet \n");
strcpy(buffer,yytext);
len = strlen(buffer);
for(loop=len; loop > 0;loop--){
if(buffer[loop] == '\n')
break;
}
yyless(loop);
statep++;
states[statep] = LITEM;
BEGIN(LITEM);
}
<LITEM>^.+":" {
(void)check_and_convert(yytext);
buffer[strlen(buffer)-1]='\0';
printf("@item @b{%s}\n\n",buffer);
}
<LITEM>^[^:\n]+\n\n[^:\n]+\n {
int loop;
strcpy(buffer,yytext);
for(loop=0; buffer[loop] != '\n'; loop++);
buffer[loop] = '\0';
printf("%s\n",buffer);
printf("@end itemize\n\n");
printf("%s",&buffer[loop+1]);
statep--;
BEGIN(states[statep]);
}
/*
* reproduce @itemize @bullet lists
*/
":"\n[ ]*"*" {
int loop;
int len;
printf(":\n\n@itemize @bullet \n");
len = strlen(buffer);
for(loop=0; loop < len;loop++){
if(buffer[loop] == '\n')
break;
}
yyless((len-loop)+2);
statep++;
states[statep] = BITEM;
BEGIN(BITEM);
}
<BITEM>^" "*"*" {
printf("@item");
statep++;
states[statep] = BITEM_ITEM;
BEGIN(BITEM_ITEM);
}
<BITEM>"@" printf("@@");
<BITEM>^\n {
printf("@end itemize\n\n");
statep--;
BEGIN(states[statep]);
}
<BITEM_ITEM>[^\:]* {
printf(" @b{%s}\n\n",check_and_convert(yytext));
}
<BITEM_ITEM>":" {
statep--;
BEGIN(states[statep]);
}
/*
* recreate @chapter, @section etc.
*/
^:[^:]* {
(void)check_and_convert(&yytext[1]);
statep++;
states[statep] = HEADING;
BEGIN(HEADING);
}
<HEADING>:[^\n] {
printf("@item @b{%s}\n",buffer);
write_underline(strlen(buffer),6,'~');
statep--;
BEGIN(states[statep]);
}
<HEADING>:\n"*"* {
if(need_closing == TRUE){
printf("@end table\n\n\n");
need_closing = FALSE;
}
printf("@chapter %s\n",buffer);
write_underline(strlen(buffer),9,'*');
statep--;
BEGIN(states[statep]);
}
<HEADING>:\n"="* {
if(need_closing == TRUE){
printf("@end table\n\n\n");
need_closing = FALSE;
}
printf("@section %s\n",buffer);
write_underline(strlen(buffer),9,'=');
statep--;
BEGIN(states[statep]);
}
<HEADING>"@" printf("@@");
<HEADING>:\n"-"* {
if(need_closing == TRUE){
printf("@end table\n\n\n");
need_closing = FALSE;
}
printf("@subsection %s\n",buffer);
write_underline(strlen(buffer),12,'-');
statep--;
BEGIN(states[statep]);
}
/*
* recreate @example text
*/
^" " {
printf("@example\n");
statep++;
states[statep] = EXAMPLE;
BEGIN(EXAMPLE);
}
<EXAMPLE>^" "
. ECHO;
%%
/*
* initialise and go.
*/
int main(int argc, char *argv[])
{
states[0] = INITIAL;
statep = 0;
print_header();
yylex();
print_trailer();
return(0);
}

View File

@ -0,0 +1,15 @@
/*
*
* myname.lex : A sample Flex program
* that does token replacement.
*/
%%
%NAME { printf("%s",getenv("LOGNAME")); }
%HOST { printf("%s",getenv("HOST")); }
%HOSTTYPE { printf("%s",getenv("HOSTTYPE"));}
%HOME { printf("%s",getenv("HOME")); }
%%

View File

@ -0,0 +1,6 @@
Hello, my name name is %NAME. Actually
"%NAME" isn't my real name, it is the
alias I use when I'm on %HOST, which
is the %HOSTTYPE I use. My HOME
directory is %HOME.

View File

@ -0,0 +1,19 @@
/*
* myname2.lex : A sample Flex program
* that does token replacement.
*/
%{
#include <stdio.h>
%}
%x STRING
%%
\" ECHO; BEGIN(STRING);
<STRING>[^\"\n]* ECHO;
<STRING>\" ECHO; BEGIN(INITIAL);
%NAME { printf("%s",getenv("LOGNAME")); }
%HOST { printf("%s",getenv("HOST")); }
%HOSTTYPE { printf("%s",getenv("HOSTTYPE"));}
%HOME { printf("%s",getenv("HOME")); }

145
examples/manual/numbers.lex Normal file
View File

@ -0,0 +1,145 @@
/*
* numbers.lex : An example of the definitions and techniques
* for scanning numbers
*/
%{
#include <stdio.h>
#define UNSIGNED_LONG_SYM 1
#define SIGNED_LONG_SYM 2
#define UNSIGNED_SYM 3
#define SIGNED_SYM 4
#define LONG_DOUBLE_SYM 5
#define FLOAT_SYM 6
union _yylval {
long double ylong_double;
float yfloat;
unsigned long yunsigned_long;
unsigned yunsigned;
long ysigned_long;
int ysigned;
} yylval;
%}
digit [0-9]
hex_digit [0-9a-fA-F]
oct_digit [0-7]
exponent [eE][+-]?{digit}+
i {digit}+
float_constant ({i}\.{i}?|{i}?\.{i}){exponent}?
hex_constant 0[xX]{hex_digit}+
oct_constant 0{oct_digit}*
int_constant {digit}+
long_ext [lL]
unsigned_ext [uU]
float_ext [fF]
ulong_ext {long_ext}{unsigned_ext}|{unsigned_ext}{long_ext}
%%
{hex_constant}{ulong_ext} { /* we need to skip the "0x" part */
sscanf(&yytext[2],"%lx",&yylval.yunsigned_long);
return(UNSIGNED_LONG_SYM);
}
{hex_constant}{long_ext} {
sscanf(&yytext[2],"%lx",&yylval.ysigned_long);
return(SIGNED_LONG_SYM);
}
{hex_constant}{unsigned_ext} {
sscanf(&yytext[2],"%x",&yylval.yunsigned);
return(UNSIGNED_SYM);
}
{hex_constant} { /* use %lx to protect against overflow */
sscanf(&yytext[2],"%lx",&yylval.ysigned_long);
return(SIGNED_LONG_SYM);
}
{oct_constant}{ulong_ext} {
sscanf(yytext,"%lo",&yylval.yunsigned_long);
return(UNSIGNED_LONG_SYM);
}
{oct_constant}{long_ext} {
sscanf(yytext,"%lo",&yylval.ysigned_long);
return(SIGNED_LONG_SYM);
}
{oct_constant}{unsigned_ext} {
sscanf(yytext,"%o",&yylval.yunsigned);
return(UNSIGNED_SYM);
}
{oct_constant} { /* use %lo to protect against overflow */
sscanf(yytext,"%lo",&yylval.ysigned_long);
return(SIGNED_LONG_SYM);
}
{int_constant}{ulong_ext} {
sscanf(yytext,"%ld",&yylval.yunsigned_long);
return(UNSIGNED_LONG_SYM);
}
{int_constant}{long_ext} {
sscanf(yytext,"%ld",&yylval.ysigned_long);
return(SIGNED_LONG_SYM);
}
{int_constant}{unsigned_ext} {
sscanf(yytext,"%d",&yylval.yunsigned);
return(UNSIGNED_SYM);
}
{int_constant} { /* use %ld to protect against overflow */
sscanf(yytext,"%ld",&yylval.ysigned_long);
return(SIGNED_LONG_SYM);
}
{float_constant}{long_ext} {
sscanf(yytext,"%lf",&yylval.ylong_double);
return(LONG_DOUBLE_SYM);
}
{float_constant}{float_ext} {
sscanf(yytext,"%f",&yylval.yfloat);
return(FLOAT_SYM);
}
{float_constant} { /* use %lf to protect against overflow */
sscanf(yytext,"%lf",&yylval.ylong_double);
return(LONG_DOUBLE_SYM);
}
%%
int main(void)
{
int code;
while((code = yylex())){
printf("yytext : %s\n",yytext);
switch(code){
case UNSIGNED_LONG_SYM:
printf("Type of number : UNSIGNED LONG\n");
printf("Value of number : %lu\n",yylval.yunsigned_long);
break;
case SIGNED_LONG_SYM:
printf("Type of number : SIGNED LONG\n");
printf("Value of number : %ld\n",yylval.ysigned_long);
break;
case UNSIGNED_SYM:
printf("Type of number : UNSIGNED\n");
printf("Value of number : %u\n",yylval.yunsigned);
break;
case SIGNED_SYM:
printf("Type of number : SIGNED\n");
printf("Value of number : %d\n",yylval.ysigned);
break;
case LONG_DOUBLE_SYM:
printf("Type of number : LONG DOUBLE\n");
printf("Value of number : %lf\n",yylval.ylong_double);
break;
case FLOAT_SYM:
printf("Type of number : FLOAT\n");
printf("Value of number : %f\n",yylval.yfloat);
break;
default:
printf("Type of number : UNDEFINED\n");
printf("Value of number : UNDEFINED\n");
break;
}
}
return(0);
}

View File

@ -0,0 +1,78 @@
/*
* eof_rules.lex : An example of using multiple buffers
* EOF rules, and start states
*/
%{
#define MAX_NEST 10
YY_BUFFER_STATE include_stack[MAX_NEST];
int include_count = -1;
%}
%x INCLUDE
%x COMMENT
%%
"{" BEGIN(COMMENT);
<COMMENT>"}" BEGIN(INITIAL);
<COMMENT>"$include"[ \t]*"(" BEGIN(INCLUDE);
<COMMENT>[ \t]* /* skip whitespace */
<INCLUDE>")" BEGIN(COMMENT);
<INCLUDE>[ \t]* /* skip whitespace */
<INCLUDE>[^ \t\n() ]+ { /* get the include file name */
if ( include_count >= MAX_NEST){
fprintf( stderr, "Too many include files" );
exit( 1 );
}
include_stack[++include_count] = YY_CURRENT_BUFFER;
yyin = fopen( yytext, "r" );
if ( ! yyin ){
fprintf( stderr, "Unable to open %s",yytext);
exit( 1 );
}
yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
BEGIN(INITIAL);
}
<INCLUDE><<EOF>>
{
fprintf( stderr, "EOF in include" );
yyterminate();
}
<COMMENT><<EOF>>
{
fprintf( stderr, "EOF in comment" );
yyterminate();
}
<<EOF>> {
if ( include_count <= 0 ){
yyterminate();
} else {
yy_delete_buffer(include_stack[include_count--] );
yy_switch_to_buffer(include_stack[include_count] );
BEGIN(INCLUDE);
}
}
[a-z]+ ECHO;
.|\n ECHO;

120
examples/manual/pascal.lex Normal file
View File

@ -0,0 +1,120 @@
/*
* pascal.lex: An example PASCAL scanner
*
*/
%{
#include <stdio.h>
#include "y.tab.h"
int line_number = 0;
void yyerror(char *message);
%}
%x COMMENT1 COMMENT2
white_space [ \t]*
digit [0-9]
alpha [A-Za-z_]
alpha_num ({alpha}|{digit})
hex_digit [0-9A-F]
identifier {alpha}{alpha_num}*
unsigned_integer {digit}+
hex_integer ${hex_digit}{hex_digit}*
exponent e[+-]?{digit}+
i {unsigned_integer}
real ({i}\.{i}?|{i}?\.{i}){exponent}?
string \'([^'\n]|\'\')+\'
bad_string \'([^'\n]|\'\')+
%%
"{" BEGIN(COMMENT1);
<COMMENT1>[^}\n]+
<COMMENT1>\n ++line_number;
<COMMENT1><<EOF>> yyerror("EOF in comment");
<COMMENT1>"}" BEGIN(INITIAL);
"(*" BEGIN(COMMENT2);
<COMMENT2>[^)*\n]+
<COMMENT2>\n ++line_number;
<COMMENT2><<EOF>> yyerror("EOF in comment");
<COMMENT2>"*)" BEGIN(INITIAL);
<COMMENT2>[*)]
/* note that FILE and BEGIN are already
* defined in FLEX or C so they can't
* be used. This can be overcome in
* a cleaner way by defining all the
* tokens to start with TOK_ or some
* other prefix.
*/
and return(AND);
array return(ARRAY);
begin return(_BEGIN);
case return(CASE);
const return(CONST);
div return(DIV);
do return(DO);
downto return(DOWNTO);
else return(ELSE);
end return(END);
file return(_FILE);
for return(FOR);
function return(FUNCTION);
goto return(GOTO);
if return(IF);
in return(IN);
label return(LABEL);
mod return(MOD);
nil return(NIL);
not return(NOT);
of return(OF);
packed return(PACKED);
procedure return(PROCEDURE);
program return(PROGRAM);
record return(RECORD);
repeat return(REPEAT);
set return(SET);
then return(THEN);
to return(TO);
type return(TYPE);
until return(UNTIL);
var return(VAR);
while return(WHILE);
with return(WITH);
"<="|"=<" return(LEQ);
"=>"|">=" return(GEQ);
"<>" return(NEQ);
"=" return(EQ);
".." return(DOUBLEDOT);
{unsigned_integer} return(UNSIGNED_INTEGER);
{real} return(REAL);
{hex_integer} return(HEX_INTEGER);
{string} return{STRING};
{bad_string} yyerror("Unterminated string");
{identifier} return(IDENTIFIER);
[*/+\-,^.;:()\[\]] return(yytext[0]);
{white_space} /* do nothing */
\n line_number += 1;
. yyerror("Illegal input");
%%
void yyerror(char *message)
{
fprintf(stderr,"Error: \"%s\" in line %d. Token = %s\n",
message,line_number,yytext);
exit(1);
}

View File

@ -0,0 +1,12 @@
/*
* reject.lex: An example of REJECT and unput()
* misuse.
*/
%%
UNIX {
unput('U'); unput('N'); unput('G'); unput('\0');
REJECT;
}
GNU printf("GNU is Not Unix!\n");
%%

View File

@ -0,0 +1,33 @@
/*
* replace.lex : A simple filter for renaming
* parts of flex of bison generated
* scanners or parsers.
*/
%{
#include <stdio.h>
char lower_replace[1024];
char upper_replace[1024];
%}
%%
"yy" printf("%s",lower_replace);
"YY" printf("%s",upper_replace);
, ECHO;
%%
int main(int argc, char *argv[])
{
if(argc < 2){
printf("Usage %s lower UPPER\n",argv[0]);
exit(1);
}
strcpy(lower_replace,argv[1]);
strcpy(upper_replace,argv[2]);
yylex();
return(0);
}

View File

@ -0,0 +1,98 @@
/*
* string1.lex: Handling strings by using input()
*/
%{
#include <ctype.h>
#include <malloc.h>
#define ALLOC_SIZE 32 /* for (re)allocating the buffer */
#define isodigit(x) ((x) >= '0' && (x) <= '7')
#define hextoint(x) (isdigit((x)) ? (x) - '0' : ((x) - 'A') + 10)
void yyerror(char *message)
{
printf("\nError: %s\n",message);
}
%}
%%
\" {
int inch,count,max_size;
char *buffer;
int temp;
buffer = malloc(ALLOC_SIZE);
max_size = ALLOC_SIZE;
inch = input();
count = 0;
while(inch != EOF && inch != '"' && inch != '\n'){
if(inch == '\\'){
inch = input();
switch(inch){
case '\n': inch = input(); break;
case 'b' : inch = '\b'; break;
case 't' : inch = '\t'; break;
case 'n' : inch = '\n'; break;
case 'v' : inch = '\v'; break;
case 'f' : inch = '\f'; break;
case 'r' : inch = '\r'; break;
case 'X' :
case 'x' : inch = input();
if(isxdigit(inch)){
temp = hextoint(toupper(inch));
inch = input();
if(isxdigit(inch)){
temp = (temp << 4) + hextoint(toupper(inch));
} else {
unput(inch);
}
inch = temp;
} else {
unput(inch);
inch = 'x';
}
break;
default:
if(isodigit(inch)){
temp = inch - '0';
inch = input();
if(isodigit(inch)){
temp = (temp << 3) + (inch - '0');
} else {
unput(inch);
goto done;
}
inch = input();
if(isodigit(inch)){
temp = (temp << 3) + (inch - '0');
} else {
unput(inch);
}
done:
inch = temp;
}
}
}
buffer[count++] = inch;
if(count >= max_size){
buffer = realloc(buffer,max_size + ALLOC_SIZE);
max_size += ALLOC_SIZE;
}
inch = input();
}
if(inch == EOF || inch == '\n'){
yyerror("Unterminated string.");
}
buffer[count] = '\0';
printf("String = \"%s\"\n",buffer);
free(buffer);
}
.
\n
%%

View File

@ -0,0 +1,94 @@
/*
* string2.lex: An example of using scanning strings
* by using start states.
*/
%{
#include <ctype.h>
#include <malloc.h>
#define isodigit(x) ((x) >= '0' && (x) <= '7')
#define hextoint(x) (isdigit((x)) ? (x) - '0' : ((x) - 'A') + 10)
char *buffer = NULL;
int buffer_size = 0;
void yyerror(char *message)
{
printf("\nError: %s\n",message);
}
%}
%x STRING
hex (x|X)[0-9a-fA-F]{1,2}
oct [0-7]{1,3}
%%
\" {
buffer = malloc(1);
buffer_size = 1; strcpy(buffer,"");
BEGIN(STRING);
}
<STRING>\n {
yyerror("Unterminated string");
free(buffer);
BEGIN(INITIAL);
}
<STRING><<EOF>> {
yyerror("EOF in string");
free(buffer);
BEGIN(INITIAL);
}
<STRING>[^\\\n"] {
buffer = realloc(buffer,buffer_size+yyleng+1);
buffer_size += yyleng;
strcat(buffer,yytext);
}
<STRING>\\\n /* ignore this */
<STRING>\\{hex} {
int temp =0,loop = 0;
for(loop=yyleng-2; loop>0; loop--){
temp <<= 4;
temp += hextoint(toupper(yytext[yyleng-loop]));
}
buffer = realloc(buffer,buffer_size+1);
buffer[buffer_size-1] = temp;
buffer[buffer_size] = '\0';
buffer_size += 1;
}
<STRING>\\{oct} {
int temp =0,loop = 0;
for(loop=yyleng-1; loop>0; loop--){
temp <<= 3;
temp += (yytext[yyleng-loop] - '0');
}
buffer = realloc(buffer,buffer_size+1);
buffer[buffer_size-1] = temp;
buffer[buffer_size] = '\0';
buffer_size += 1;
}
<STRING>\\[^\n] {
buffer = realloc(buffer,buffer_size+1);
switch(yytext[yyleng-1]){
case 'b' : buffer[buffer_size-1] = '\b'; break;
case 't' : buffer[buffer_size-1] = '\t'; break;
case 'n' : buffer[buffer_size-1] = '\n'; break;
case 'v' : buffer[buffer_size-1] = '\v'; break;
case 'f' : buffer[buffer_size-1] = '\f'; break;
case 'r' : buffer[buffer_size-1] = '\r'; break;
default : buffer[buffer_size-1] = yytext[yyleng-1];
}
buffer[buffer_size] = '\0';
buffer_size += 1;
}
<STRING>\" {
printf("string = \"%s\"",buffer);
free(buffer);
BEGIN(INITIAL);
}
%%

View File

@ -0,0 +1,21 @@
"This is a string"
"The next string will be empty"
""
"This is a string with a \b(\\b) in it"
"This is a string with a \t(\\t) in it"
"This is a string with a \n(\\n) in it"
"This is a string with a \v(\\v) in it"
"This is a string with a \f(\\f) in it"
"This is a string with a \r(\\r) in it"
"This is a string with a \"(\\\") in it"
"This is a string with a \z(\\z) in it"
"This is a string with a \X4a(\\X4a) in it"
"This is a string with a \x4a(\\x4a) in it"
"This is a string with a \x7(\\x7) in it"
"This is a string with a \112(\\112) in it"
"This is a string with a \043(\\043) in it"
"This is a string with a \7(\\7) in it"
"This is a multi-line \
string"
"This is an unterminated string
"This is an unterminated string too

32
examples/manual/unput.lex Normal file
View File

@ -0,0 +1,32 @@
/*
* unput.l : An example of what *not*
* to do with unput().
*/
%{
#include <stdio.h>
void putback_yytext(void);
%}
%%
foobar putback_yytext();
raboof putback_yytext();
%%
void putback_yytext(void)
{
int i;
int l = strlen(yytext);
char buffer[YY_BUF_SIZE];
strcpy(buffer,yytext);
printf("Got: %s\n",yytext);
for(i=0; i<l; i++){
unput(buffer[i]);
}
}

View File

@ -0,0 +1,31 @@
%{
#include <ctype.h>
void user_action(void);
#define YY_USER_ACTION user_action();
%}
%%
.* ECHO;
\n ECHO;
%%
void user_action(void)
{
int loop;
for(loop=0; loop<yyleng; loop++){
if(islower(yytext[loop])){
yytext[loop] = toupper(yytext[loop]);
}
}
}

View File

@ -0,0 +1,30 @@
%{
#define YY_USER_INIT open_input_file()
extern FILE *yyin;
void open_input_file(void)
{
char *file_name,buffer[1024];
yyin = NULL;
while(yyin == NULL){
printf("Input file: ");
file_name = fgets(buffer,1024,stdin);
if(file_name){
file_name[strlen(file_name)-1] = '\0';
yyin = fopen(file_name,"r");
if(yyin == NULL){
printf("Unable to open \"%s\"\n",file_name);
}
} else {
printf("stdin\n");
yyin = stdin;
break;
}
}
}
%}
%%

122
examples/manual/wc.lex Normal file
View File

@ -0,0 +1,122 @@
%{
/*
* wc.lex : A simple example of using FLEX
* to create a wc-like utility.
*
* See MISC/fastwc/ in the flex distribution for examples
* of how to write this scanner for maximum performance.
*/
int numchars = 0;
int numwords = 0;
int numlines = 0;
int totchars = 0;
int totwords = 0;
int totlines = 0;
/*
* rules start from here
*/
%}
%%
[\n] { numchars++; numlines++; }
[\r] { numchars++; }
[^ \t\n]+ { numwords++; numchars += yyleng; }
. { numchars++; }
%%
/*
* additional C code start from here. This supplies
* all the argument processing etc.
*/
int main(int argc, char *argv[])
{
int loop,first=1;
int lflag = 0; /* 1 if we count # of lines */
int wflag = 0; /* 1 if we count # of words */
int cflag = 0; /* 1 if we count # of characters */
int fflag = 0; /* 1 if we have a file name */
for(loop=1; loop<argc; loop++){
if(argv[loop][0] == '-'){
switch(argv[loop][1]){
case 'l':
lflag = 1;
break;
case 'w':
wflag = 1;
break;
case 'c':
cflag = 1;
break;
default:
fprintf(stderr,"unknown option -%c\n",
argv[loop][1]);
}
}
}
if(lflag == 0 && wflag == 0 && cflag == 0){
lflag = wflag = cflag = 1; /* default to all on */
}
for(loop=1; loop<argc; loop++){
if(argv[loop][0] != '-'){
fflag = 1;
numlines = numchars = numwords = 0;
if((yyin = fopen(argv[loop],"rb")) != NULL){
if(first){
first = 0;
} else {
YY_NEW_FILE;
}
(void) yylex();
fclose(yyin);
totwords += numwords;
totchars += numchars;
totlines += numlines;
printf("file : %25s :",argv[loop]) ;
if(lflag){
fprintf(stdout,"lines %5d ",numlines);
}
if(cflag){
fprintf(stdout,"characters %5d ",numchars);
}
if(wflag){
fprintf(stdout,"words %5d ",numwords);
}
fprintf(stdout,"\n");
}else{
fprintf(stderr,"wc : file not found %s\n",argv[loop]);
}
}
}
if(!fflag){
fprintf(stderr,"usage : wc [-l -w -c] file [file...]\n");
fprintf(stderr,"-l = count lines\n");
fprintf(stderr,"-c = count characters\n");
fprintf(stderr,"-w = count words\n");
exit(1);
}
for(loop=0;loop<79; loop++){
fprintf(stdout,"-");
}
fprintf(stdout,"\n");
fprintf(stdout,"total : %25s ","") ;
if(lflag){
fprintf(stdout,"lines %5d ",totlines);
}
if(cflag){
fprintf(stdout,"characters %5d ",totchars);
}
if(wflag){
fprintf(stdout,"words %5d ",totwords);
}
fprintf(stdout,"\n");
return(0);
}

View File

@ -0,0 +1,29 @@
/*
* yymore.lex: An example of using yymore()
* to good effect.
*/
%{
#include <memory.h>
void yyerror(char *message)
{
printf("Error: %s\n",message);
}
%}
%x STRING
%%
\" BEGIN(STRING);
<STRING>[^\\\n"]* yymore();
<STRING><<EOF>> yyerror("EOF in string."); BEGIN(INITIAL);
<STRING>\n yyerror("Unterminated string."); BEGIN(INITIAL);
<STRING>\\\n yymore();
<STRING>\" {
yytext[yyleng-1] = '\0';
printf("string = \"%s\"",yytext); BEGIN(INITIAL);
}
%%

View File

@ -0,0 +1,33 @@
/*
* yymore.lex: An example of using yymore()
* to good effect.
*/
%{
#include <memory.h>
void yyerror(char *message)
{
printf("Error: %s\n",message);
}
%}
%x STRING
%%
\" BEGIN(STRING);
<STRING>[^\\\n"]* yymore();
<STRING><<EOF>> yyerror("EOF in string."); BEGIN(INITIAL);
<STRING>\n yyerror("Unterminated string."); BEGIN(INITIAL);
<STRING>\\\n {
bcopy(yytext,yytext+2,yyleng-2);
yytext += 2; yyleng -= 2;
yymore();
}
<STRING>\" {
yyleng -= 1; yytext[yyleng] = '\0';
printf("string = \"%s\"",yytext); BEGIN(INITIAL);
}
%%

View File

@ -0,0 +1,7 @@
"This is a test \
of multi-line string \
scanning in flex. \
This may be breaking some law \
of usage though..."

58
examples/testxxLexer.l Normal file
View File

@ -0,0 +1,58 @@
// An example of using the flex C++ scanner class.
%option C++ noyywrap
%{
int mylineno = 0;
%}
string \"[^\n"]+\"
ws [ \t]+
alpha [A-Za-z]
dig [0-9]
name ({alpha}|{dig}|\$)({alpha}|{dig}|\_|\.|\-|\/|\$)*
num1 [-+]?{dig}+\.?([eE][-+]?{dig}+)?
num2 [-+]?{dig}*\.{dig}+([eE][-+]?{dig}+)?
number {num1}|{num2}
%%
{ws} /* skip blanks and tabs */
"/*" {
int c;
while((c = yyinput()) != 0)
{
if(c == '\n')
++mylineno;
else if(c == '*')
{
if((c = yyinput()) == '/')
break;
else
unput(c);
}
}
}
{number} cout << "number " << YYText() << '\n';
\n mylineno++;
{name} cout << "name " << YYText() << '\n';
{string} cout << "string " << YYText() << '\n';
%%
int main( int /* argc */, char** /* argv */ )
{
FlexLexer* lexer = new yyFlexLexer;
while(lexer->yylex() != 0)
;
return 0;
}

46
to.do/README Normal file
View File

@ -0,0 +1,46 @@
The contents of this directory are:
Wilhelms.todo
This guy Gerhard Wilhelms did an exhaustive line-by-line
study of flex, finding a large number of glitches; it's
not clear how significant they are (some have subseuqently
been fixed).
Wish-List
A long list of cryptic one-line descriptions of various
features people have asked for, or I've thought would be
handy. If you have questions about particular ones, let
me know.
reentrant
A set of mods contributed by John Bossom for adding
an option to flex to make reentrant scanners.
rflex-2.5.4-diffs
A set of mods contributed by Chris Appleton for
the same purpose.
streams
email from David Madden about coping with non-blocking
I/O.
unicode
patches to support Unicode scanners, contributed
by James A. Lauth.
Of these, the ones that people frequently ask about are support for reentrant
scanners and support for Unicode. I haven't played with the reentrant stuff
so don't know how solid / well designed it is. I've sent out the Unicode
stuff to numerous people and haven't received any complaints, so I imagine
it works well.
Another thing people ask for fairly often is removing the limit on size
of the scanners. There's some mail in the faqs/ directory about ways to
do this by cranking some #define's in flexdef.h, though of course the best
solution would be dynamic memory/resizing.
By the way, I have translated the core flex algorithms into a C++ regular
expression class (but one that doesn't support the uglier stuff like
yymore()/yyless(), trailing context, etc.). If you ever wind up wanting
it, just let me know.

711
to.do/Wilhelms.todo Normal file
View File

@ -0,0 +1,711 @@
PARSE.Y 344: ('rule'-rule " | re '$' ": There are some errors concerning
trailing context. First of all the rule " re '$' " implies that this is
no variable_trail_rule because the tail of it ( '$' ) has a fixed length
of 1. The only possible reason for making this rule variable is when
'previous_continued_action' is true. In this case 'variable_trail_rule'
must be set and the beginning of the trailing part must be marked.
However the variables 'varlength' and 'headcnt' have not the same meaning
as in the rule " re2 re ". Here ( in the rule " re '$' " ) 'varlength'
is true if the head ( 're' ) of the rule has variable length, and
'headcnt' is still 0 because it isn't set during reduction of 're'.
Therefore the test for a variable trailing rule
" if ( ! varlength || headcnt != 0 ) "
is wrong and should be removed.
Also it is not necessary to set 'varlength' or 'headcnt' if you set
" trailcnt = 1; ". If this rule is made variable then 'variable_trail_rule'
is set and neither 'headcnt' nor 'trailcnt' are used in 'finish_rule()'.
And if this rule is normal then the head may be variable or not, but in
'finish_rule()' code is generated to reduce 'yy_cp' by 1.
Finally I found no reason to create an epsilon-state and insert it in
front of mkstate( '\n' ) instead of adding it behind. This epsilon-state
should be marked as STATE_TRAILING_CONTEXT. Otherwise you get no warning
of dangerous trailing context if you have a rule " x\n*$ " which was made
variable with '|'.)
| re '$'
{
/* if ( trlcontxt )
{
synerr( "trailing context used twice" );
$$ = mkstate( SYM_EPSILON );
}
else */ if ( previous_continued_action )
{
/* see the comment in the rule for "re2 re"
* above
*/
/* if ( ! varlength || headcnt != 0 ) */
{
fprintf( stderr,
"%s: warning - trailing context rule at line %d made variable because\n",
program_name, linenum );
fprintf( stderr,
" of preceding '|' action\n" );
}
/* mark as variable */
/* varlength = true;
headcnt = 0; */
add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK )
;
variable_trail_rule = true;
}
/* trlcontxt = true;
if ( ! varlength )
headcnt = rulelen;
++rulelen; */
trailcnt = 1;
current_state_type = STATE_TRAILING_CONTEXT;
eps = mkstate( SYM_EPSILON );
current_state_type = STATE_NORMAL;
$$ = link_machines( $1,
link_machines( mkstate( '\n' ), eps ) );
}
DFA.C 618: (ntod(): The arrays 'targstate[]' and 'targfreq[]' can be
maintained in a better way. Up to now it is possible that states are added
to 'targstate[]' more than once, because the state 'newds' from the call
to snstods() creates a new entry in 'targstate[]'. But 'newds' may already
exist in 'targstate[]' !
Another point is that 'targfreq[]' is not updated if "caseins && ! useecs"
is true.
My algorithm should solve these problems. However it could be simplified
by replacing 'newds' by 'targ' and removing the statement "targ = newds;".
Remark to the second point: I decremented the targfreq-counter if 'sym'
was an uppercase letter and incremented it if 'sym' was a lowercase
letter. The index 'i' of 'targfreq[i]' points to the correct position in
'targstate[]' even if a new state was added.)
for ( sym = 1; sym <= numecs; ++sym )
{
if ( symlist[sym] )
{
symlist[sym] = 0;
if ( duplist[sym] == NIL )
{ /* symbol has unique out-transitions */
numstates = symfollowset( dset, dsize, sym, nset );
nset = epsclosure( nset, &numstates, accset,
&nacc, &hashval );
if ( snstods( nset, numstates, accset,
nacc, hashval, &newds ) )
{
totnst = totnst + numstates;
++todo_next;
numas += nacc;
if ( variable_trailing_context_rules && nacc > 0 )
check_trailing_context( nset, numstates,
accset, nacc );
}
targ = newds;
}
else
{
/* sym's equivalence class has the same transitions
* as duplist(sym)'s equivalence class
*/
targ = state[duplist[sym]];
}
state[sym] = targ;
if ( trace )
fprintf( stderr, "\t%d\t%d\n", sym, targ );
/* update frequency count for destination state */
for ( i = 1; i <= targptr; ++i )
if ( targstate[i] == targ )
break;
if ( i <= targptr )
{
++targfreq[i];
++numdup;
}
else
{
targfreq[++targptr] = 1;
targstate[targptr] = targ;
++numuniq;
}
if ( caseins && ! useecs )
{
if ( sym >= 'A' && sym <= 'Z' )
{
--targfreq[i];
--totaltrans;
}
else if ( sym >= 'a' && sym <= 'z' )
{
++targfreq[i];
++totaltrans;
}
}
++totaltrans;
duplist[sym] = NIL;
}
}
GEN.C 438: (gen_next_compressed_state(): I have rewritten the function
'yy_try_NUL_trans()' so it really just tries to find out whether a
transition on the NUL character goes to the jamstate or not. ( That means
I removed each creation of backtracking information and the saving of the
new state on the 'yy_state_buf[]'. )
Therefore I removed the call for 'gen_backtracking()' here, because the
function 'gen_next_compressed_state()' is also used in 'gen_NUL_trans()'.)
/* gen_backtracking(); */
GEN.C 587ff: (gen_next_state(): Since the backtracking information is not
created in 'gen_next_compressed_state()' any more, it is done here
before the next state is computed ( for "compressed" tables ). This
removes the bug that the backtracking information is created twice if
'nultrans' is not NULL and 'gen_next_compressed_state()' is called.
Finally I had to insert the creation of a "{" and a "}", because there
is a local variable created in 'gen_next_compressed_state()'. ( These are
needed only when backtracking information is really created.) )
if ( ! fulltbl && ! fullspd )
gen_backtracking();
if ( worry_about_NULs && nultrans )
{
indent_puts( "if ( *yy_cp )" );
indent_up();
indent_puts( "{" );
}
else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 )
indent_puts( "{" );
if ( fulltbl )
indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];",
char_map );
else if ( fullspd )
indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;",
char_map );
else
gen_next_compressed_state( char_map );
if ( worry_about_NULs && nultrans )
{
indent_puts( "}" );
indent_down();
indent_puts( "else" );
indent_up();
indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" );
indent_down();
}
else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 )
indent_puts( "}" );
if ( fullspd || fulltbl )
gen_backtracking();
if ( reject )
indent_puts( "*yy_state_ptr++ = yy_current_state;" );
}
GEN.C 553: (gen_next_match(): There is a problem if 'interactive' is true. In
this case the scanner jams if the next state is the jamstate ( i.e.
yy_base[yy_current_state] == jambase ). However the scanner reaches also
the jamstate if the transition character is the NUL-character or if the
end of the buffer is reached. Then in the EOB-action is decided whether
this was really a NUL character or the end-of-buffer. ( If it was a NUL,
scanning will be resumed. If it was the end-of-buffer, the buffer will be
filled first, before scanning will be resumed. )
These actions are not done if you use an 'interactive' scanner, because
the EOB-action is not executed. Therefore you have to continue scanning,
if you have just matched a NUL character ( i.e. *yy_cp == '\0' and
yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) and if you are not
already in the yamstate ( i.e. yy_current_state != jamstate ).
Note that the '<' in " yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] "
implies that the EOB action is *not* executed if the last match before the
end-of-buffer was maximal.
The following change in the algorithm results in a minor performance
penalty because the additional conditions are tested only if you have
reached the end of the match or if you are using NUL characters in your
patterns.)
if ( interactive )
{
printf( "while ( yy_base[yy_current_state] != %d\n", jambase );
set_indent( 4 );
indent_puts( "|| ( *yy_cp == '\\0'" );
indent_up();
indent_puts(
" && yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" );
do_indent();
printf( " && yy_current_state != %d ) );\n", jamstate );
set_indent( 2 );
}
else
printf( "while ( yy_current_state != %d );\n", jamstate );
GEN.C 341: (gen_find_action(): Question: The variables 'yy_full_match',
'yy_full_state' and 'yy_full_lp' are used only in the REJECT macro. Why
do you not also test here on 'real_reject' before you create code to set
these variables ( like you did in line 327ff for the action of the case
" ( yy_act & YY_TRAILING_MASK ) " ) ?)
New code beginning at line 338 to show the context:
indent_puts( "else" );
indent_up();
indent_puts( "{" );
if ( real_reject )
{
/* remember matched text in case we back up due to REJECT */
indent_puts( "yy_full_match = yy_cp;" );
indent_puts( "yy_full_state = yy_state_ptr;" );
indent_puts( "yy_full_lp = yy_lp;" );
}
indent_puts( "break;" );
indent_puts( "}" );
indent_down();
indent_puts( "++yy_lp;" );
indent_puts( "goto find_rule;" );
}
FLEX.SKEL 364,379: (YY_END_OF_BUFFER action: If it was really a NUL character
which started this action, then 'yy_bp' points still at the beginning of
the current run and 'yy_c_buf_p' points behind the NUL character.
Contrast this with the situation after the call of 'yy_get_next_buffer()'!
Therefore I removed the statement " yy_bp = yytext + YY_MORE_ADJ; "
( line 379 ) and replaced the statement
" yy_c_buf_p = yytext + yy_amount_of_matched_text; " ( line 364 ) by the
easier one " yy_c_buf_p = --yy_cp; ". Here 'yy_cp' is also adjusted.
This guarantees that both 'yy_c_buf_p' and 'yy_cp' point at the NUL
character. Therefore 'yy_cp' will have the correct value when it is needed
after the call to 'yy_try_NUL_trans()' ( when we know whether we make a
transition or not ).
line 364:
yy_c_buf_p = --yy_cp;
line 379:
/* yy_bp = yytext + YY_MORE_ADJ; */
GEN.C 632: (gen_NUL_trans(): I have rewritten 'yy_try_NUL_trans()'. The new
version just finds out whether a transition on the NUL character goes to
the jamstate or not. See also my remarks to 'gen_next_compressed_state()'.
Note that the test " yy_is_jam = (yy_current_state == jamstate); " is
also used, if 'interactive' is true. Otherwise 'yy_try_NUL_trans()' would
return 0, if the NUL character was the last character of a pattern
( e.g. "x\0" ), and we therefore would not reach the last state.
Remark: Change also the comment in FLEX.SKEL for this function.)
FLEX.SKEL, line 583:
%% code to find the next state goes here
GEN.C, line 632ff:
/* int need_backtracking = (num_backtracking > 0 && ! reject);
if ( need_backtracking )
/ * we'll need yy_cp lying around for the gen_backtracking() * /
indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); */
GEN.C, line 674ff:
/* if ( reject )
indent_puts( "*yy_state_ptr++ = yy_current_state;" ); */
do_indent();
/* if ( interactive )
printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n",
jambase );
else */
printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate );
}
/* if we've entered an accepting state, backtrack; note that
* compressed tables have *already* done such backtracking, so
* we needn't bother with it again
*/
/* if ( need_backtracking && (fullspd || fulltbl) )
{
putchar( '\n' );
indent_puts( "if ( ! yy_is_jam )" );
indent_up();
indent_puts( "{" );
gen_backtracking();
indent_puts( "}" );
indent_down();
} */
}
GEN.C 1293: (make_tables(): The changed functionality of 'yy_try_NUL_trans()'
implies changes in the EOB action. If the next state 'yy_next_state' is 0
( i.e. the jamstate ), you can immediately jump to 'yy_find_action'.
Remember that 'yy_cp' was already adjusted to point at the NUL !
Also you must not use the backtracking information because the actual
state 'yy_current_state' may be an accepting state.
If 'yy_next_state' is not the jamstate, we make a transition on the NUL.
This requires the following actions:
- Create backtracking information for compressed tables *before* we make
the transition on NUL.
- Now increment 'yy_cp' and set 'yy_current_state' to 'yy_next_state'.
( Note that 'yy_cp' points at the NUL up to now. )
- Save the new state on the stack 'yy_state_buf[]' if 'reject' is true.
- Create backtracking information *after* the transition, if 'fulltbl'
or 'fullspd' is true.
- Finally decide, if 'interactive' is true, whether scanning should be
resumed at 'yy_match' or whether we have reached a final state and
should jump to 'yy_find_action'. (Condition like in 'gen_next_match()'.)
If 'interactive' is false, just resume scanning.)
Corresponding code in FLEX.SKEL beginning at line 381:
if ( yy_next_state )
{
/* consume the NUL */
%% code to do backtracking for compressed tables and set up yy_cp goes here
}
else
goto yy_find_action;
Code in GEN.C beginning at line 1293:
/* first, deal with backtracking and setting up yy_cp if the scanner
* finds that it should JAM on the NUL
*/
skelout();
set_indent( 6 );
if ( ! fulltbl && ! fullspd )
gen_backtracking();
indent_puts( "++yy_cp;" );
indent_puts( "yy_current_state = yy_next_state;" );
if ( reject )
indent_puts( "*yy_state_ptr++ = yy_current_state;" );
if ( fulltbl || fullspd )
gen_backtracking();
if ( interactive )
{
do_indent();
printf( "if ( yy_base[yy_current_state] != %d\n", jambase );
indent_up();
indent_puts( "|| ( *yy_cp == '\\0'" );
indent_puts( "&& yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" );
do_indent();
printf( "&& yy_current_state != %d ) )\n", jamstate );
indent_puts( "goto yy_match;" );
indent_down();
indent_puts( "else" );
indent_up();
indent_puts( "goto yy_find_action;" );
indent_down();
}
else
indent_puts( "goto yy_match;" );
/* if ( fullspd || fulltbl )
indent_puts( "yy_cp = yy_c_buf_p;" );
else
{ / * compressed table * /
if ( ! reject && ! interactive )
{
/ * do the guaranteed-needed backtrack to figure out the match * /
indent_puts( "yy_cp = yy_last_accepting_cpos;" );
indent_puts( "yy_current_state = yy_last_accepting_state;" );
}
} */
FLEX.SKEL 513: (yy_get_next_buffer(): Here is an error if 'yymore()' is active
in the last match (i.e. yy_doing_yy_more == 1 and yy_more_len > 0). Then
'number_to_move' will be (1 + yy_more_len), i.e. the previous character
plus the additional characters for using 'yymore()'.)
if ( number_to_move == 1 + YY_MORE_ADJ )
{
ret_val = EOB_ACT_END_OF_FILE;
yy_current_buffer->yy_eof_status = EOF_DONE;
}
else
{
ret_val = EOB_ACT_LAST_MATCH;
yy_current_buffer->yy_eof_status = EOF_PENDING;
}
}
GEN.C 1317: (make_tables(): In the generation of 'yy_get_previous_state()' the
variable 'yy_bp' must be set to 'yytext + YY_MORE_ADJ' if 'bol_needed' is
true. Otherwise 'yy_bp' points eventually at the beginning of the
yymore-prefix instead of the current run.)
if ( bol_needed )
indent_puts( "register YY_CHAR *yy_bp = yytext + YY_MORE_ADJ;\n" );
FLEX.SKEL 589ff: (yyunput(): The function 'yyunput()' should be rewritten.
First of all the example for 'unput()' in file flexdoc doesn't work:
{
int i;
unput( ')' );
for ( i = yyleng - 1; i >= 0; --i )
unput( yytext[i] );
unput( '(' );
}
The actual version of 'yyunput()' modifies 'yyleng'. Therefore 'yyleng' is
decremented by " unput( ')' ) " and the pattern to be pushed back has lost
its last character. To avoid this just copy the 'yytext'-string and
'yyleng' before you call 'unput()'.
Another point is that 'yytext' and 'yyleng' could be maintained in a
better way. ( Up to now 'yyleng' can become negative ! )
I think it's better to say that the pushed back pattern should fulfill
the beginning-of-line-condition if and only if the old pattern does
( excluding a possibly existing 'yymore'-prefix ! ).
Up to now you have problems if there is a 'yymore'-prefix, because
'yytext' will be corrupted by YY_DO_BEFORE_ACTION. ( This macro sets
'yytext' to 'yy_bp - yy_more_len', but our 'yy_bp' points already at the
beginning of the 'yymore'-prefix. )
My version of 'yyunput()' reduces the 'yytext'-string by 1 for every
pushed back character and decrements 'yyleng' until 'yytext' is the empty
string. The beginning-of-line-condition is preserved when 'bol_needed' is
true. ( Then the character before the current run is copied in front of
the pushed back character. ) If there is a 'yymore'-prefix, 'yy_more_len'
will be decremented if 'yy_cp' reaches the beginning of the current run.
Remark: The parameter 'yytext' in " yyunput( c, yytext ) " is not really
necessary since 'yytext' is a global variable. You could also set
" register YY_CHAR *yy_bp = yytext; " at the beginning of 'yyunput()'.)
Replace lines 622 - 623 in FLEX.SKEL:
if ( yy_cp > yy_bp && yy_cp[-1] == '\n' )
yy_cp[-2] = '\n';
by
%% code to adjust yy_bp and yy_more_len goes here
Add in GEN.C a function 'gen_yyunput()':
/* generate code to adjust yy_bp and yy_more_len in yyunput
*/
void gen_yyunput()
{
if ( yymore_used )
indent_puts( "yy_bp += YY_MORE_ADJ;\n" );
if ( bol_needed )
indent_puts( "yy_cp[-2] = yy_bp[-1];\n" );
if ( yymore_used )
{
indent_puts( "if ( (yy_cp == yy_bp) && YY_MORE_ADJ )" );
indent_up();
indent_puts( "--yy_more_len;" );
indent_down();
indent_puts( "else" );
indent_up();
indent_puts( "--yy_bp;" );
indent_down();
}
else
indent_puts( "--yy_bp;" );
}
Finally add in the function 'make_tables()' behind the call of
'gen_NUL_trans()' in line 1328:
skelout();
gen_yyunput();
FLEX.SKEL 642,658: (input(): There is an error in 'input()' if the end of
'yy_current_buffer' is reached and 'yymore' is active. Then
'yy_get_next_buffer()' is called which function assumes that 'yytext'
points at the beginning of the 'yymore'-prefix. This function can't
recognize the end of the input stream correctly and therefore returns
EOB_ACT_LAST_MATCH instead of EOB_ACT_END_OF_FILE. Also if the end of
the input file isn't reached yet (EOB_ACT_CONTINUE_SCAN) at least one
character will be lost.
To avoid this error just turn off 'yy_doing_yy_more'. Then you need
not to adjust with YY_MORE_ADJ in lines 667 and 682. However you have to
use a function 'gen_input()', because 'yy_doing_yy_more' does not exist
if 'yymore_used' is false.
( Another solution is to adjust 'yytext':
" yytext = yy_c_buf_p - YY_MORE_ADJ; ", line 658. )
I think the trick with "yy_did_buffer_switch_on_eof" should be done here
the same way as in the YY_END_OF_BUFFER action.
Finally I removed the variable 'yy_cp' and used 'yy_c_buf_p' instead.)
#ifdef __cplusplus
static int yyinput()
#else
static int input()
#endif
{
int c;
*yy_c_buf_p = yy_hold_char; /* yy_cp not needed */
if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
{
/* yy_c_buf_p now points to the character we want to return.
* If this occurs *before* the EOB characters, then it's a
* valid NUL; if not, then we've hit the end of the buffer.
*/
if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
/* this was really a NUL */
*yy_c_buf_p = '\0';
else
{ /* need more input */
%% code to turn off yy_doing_yy_more and yy_more_len goes here
yytext = yy_c_buf_p;
++yy_c_buf_p;
switch ( yy_get_next_buffer() )
{
case EOB_ACT_END_OF_FILE:
{
yy_did_buffer_switch_on_eof = 0;
if ( yywrap() )
{
yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */
return ( EOF );
}
else
{
if ( ! yy_did_buffer_switch_on_eof )
YY_NEW_FILE;
}
#ifdef __cplusplus
return ( yyinput() );
#else
return ( input() );
#endif
}
break;
case EOB_ACT_CONTINUE_SCAN:
yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */
break;
case EOB_ACT_LAST_MATCH:
#ifdef __cplusplus
YY_FATAL_ERROR( "unexpected last match in yyinput()" );
#else
YY_FATAL_ERROR( "unexpected last match in input()" );
#endif
}
}
}
c = *yy_c_buf_p;
yy_hold_char = *++yy_c_buf_p;
return ( c );
}
Add in GEN.C a function 'gen_input()':
/* generate code to turn off yy_doing_yy_more and yy_more_len in input
*/
void gen_input()
{
if ( yymore_used )
indent_puts( "yy_doing_yy_more = yy_more_len = 0;" );
}
Finally add in the function 'make_tables()' behind the call of
'gen_yyunput()':
set_indent( 3 );
skelout();
gen_input();
PARSE.Y 54: ( 'goal'-rule: If there is no rule in the input file, the end of
the prolog is not marked yet, because 'flexscan()' is still in the start
condition <SECT2PROLOG> and the rule <SECT2PROLOG><<EOF>> is not done up
to now. Therefore mark the end of prolog here, before you add the default
rule. I test here on " num_rules == 1 ", because the 'initforrule'-rule
increments 'num_rules' before this action is executed.)
if ( num_rules == 1 )
fprintf( temp_action_file, "%%%% end of prolog\n" )
;
SCAN.L 255: ( '<SECT2PROLOG><<EOF>>'-rule: If there are no rules at all in
the input file, then this rule will be executed at the end of
'make_tables()'. At this point 'temp_action_file' was closed for writing
and has been reopened for reading. The macro MARK_END_OF_PROLOG will
therefore lead to a write-error.
To avoid this error add the condition " if ( num_rules == 0 ) ". If this
rule is executed at the end of 'make_tables()' there will be at least the
default rule, i.e. 'num_rules' will be greater than 0.
Remark: This correction together with the one before will allow an input
file which just consists of "%%". ( Copy 'stdin' to 'stdout'. ))
<SECT2PROLOG><<EOF>> {
if ( num_rules == 0 )
MARK_END_OF_PROLOG;
yyterminate();
}
MISC.C 376: ( flexfatal(): The call of 'flexend( 1 )' will lead to an
infinite loop if 'flexfatal()' is called from 'flexend()'. I therefore
introduced the flag 'doing_flexend' to prevent 'flexend()' to be called
more than once.)
Replace the function call 'flexend( 1 );' in GEN.C, line 376, by
if ( ! doing_flexend )
flexend( 1 );
Set 'doing_flexend' at the beginning of 'flexend()' in MAIN.C, line 195:
doing_flexend = true;
Add in FLEXDEF.H, line 381, the declaration of 'doing_flexend':
extern int yymore_used, reject, real_reject, continued_action, doing_flexend;
Add in FLEXDEF.H, line 376, a comment for this variable:
* doing_flexend - true if flexend() has been started
Initialize 'doing_flexend' in 'flexinit()' in MAIN.C, line 401:
yymore_used = continued_action = reject = doing_flexend = false;
FLEX.SKEL 94: ( 'YY_INPUT()'-macro: I have problems with 'fileno()' and
'read()'.
I used the C Compiler of the BORLAND C++ Compiler and compiled the created
scanner with the option 'ANSI keywords'.
In this compiler the prototype of the function 'read(...)' is declared in
the header file 'io.h' and not in 'stdio.h'. Therefore I get a warning.
Real trouble caused 'fileno' which is defined as macro in 'stdio.h':
#define fileno(f) ((f)->fd)
However this macro does not belong to the 'ANSI keywords' because it is
define'd under the condition " #if !__STDC__ ". Therefore I get a warning
and a linker error that the function 'fileno()' does not exist.
(I can avoid this problem by adding the above define-macro in the *.l file
or by replacing the option 'ANSI keywords' by 'Borland C++ keywords'.))

123
to.do/Wish-List Normal file
View File

@ -0,0 +1,123 @@
start conditions given own name space by making them structure fields
#define BEGIN(x) yy_start_state = yy_states->x
reentrant/
streams/
yylineno maintained per input buffer
use yyconst instead of const, to fix __STDC__ == 0 problem
scan input for unput()
-CF/-Cf support interactive scanners
reject_really_used -> maintain_backup_tables
full library encapsulation: flex'ing on the fly
fix MAX_MNS/MARKER_DIFFERENCE to not be a hard limit
Two flags to warn when something is seen that lex or posix might interpret
differently; this should be quite doable as -l already exists. Proposed
names: -Wl, -Wp.
reentrant C scanners
yy_fseek() for positioning in input file
set-able "at beginning of line" , no more unput() trashes yytext?
yy_unput_string(); unput() shifts yytext to preserve it, grows buffer as needed
yy_malloc_type as void* so can be easily switched to char* for poor
hopeless bastards running SunSoft stuff?
public "TODO" file, requesting help?
test -P to make sure it's not broken now due to e.g. yy_scan_string
%option
hook for treating input interactively even if not isatty()
scan.l:22:error message :-( (see flex.todo)
document yy_fill_buffer
lint, gcc-lint
-lfl removed from flex.1
merge 2.4.6, e.g., NEWS
'|' action copies action instead of omitting break
if yy_current_buffer defined on entry to yylex(), don't promote nil yyin
to stdin, etc.
multibyte character flex
ANSI only
multiple inclusion of <stdlib.h>?
[=...=] POSIX stuff
+flex.todo
yylineno, yycol by checking for whether rules can match embedded newlines,
only trailing newlines, always trailing newlines, or no newlines
compute transition path to each DFA state, to aid in backtracking
for each state, store pointer to predecessor, character for xtion
merge flex.1, flexdoc.1?
bison++ interface
YYLEXER_NAME
out-line FlexLexer destructors
GNU readline contrib?
isatty() decl?
#ifdef chud for unput() etc. not being used?
"../scan.l", line 207: warning: ::yy_did_buffer_switch_on_eof defined but not used
cc -c -g scan.c
"scan.cc", line 1752: warning: statement not reached
alloca.c removed from Makefile
// comments
output partitioning for e.g., scanning tables, actions, etc.
497 09/11 14:17-PDT 3450 To:t_bonner@oscar Re: Modifying yytext in an actio
MISC stuff non-writeable
texinfo version of manual
ALSO: document how to do so (including no need to redefine unput()),
whether feature added or not
example of "error" backtracking rules as opposed to "catch-all"
get rid of get_previous_state via accepting #'s tied to previous state #'s
-p tells something about backtracking
easy way to scan strings instead of files
input() across buffer boundaries, buffer overflow; unput() fix
start state stack
NLSTATE - sets "in newline" state; also mechanism to clear "in newline" state
checks for bogus backtrack rules ... - rule shadowing
document incompatibility with lex when unput()'ing a newline
after a newline has been read
document that comments are not allowed on definition lines
foo bar /* the "foo" definition ... */
perhaps indented code in section 2 leads to warnings?
#line directives for code at beginning of scanner routine
nuke %used etc.
hooks for direct access to the buffer, e.g. for flushing it
options in .l file as well as on command line; particularly the rename-prefix
option
clarify "eat up * not followed by /" in <comment> example; move it to
performance, offer simpler version for start states
hook for finding out how much text can be safely pushed back
the .backtrack code knows how to identify characters that cause transitions
(you wanted this for some clearer error messages for the
"default rule can be matched")
yy_switch_to_buffer sets yy_init to 0?
handy library routines, such as yy_C_comment(), yy_C_string(),
obey #line directives in input; first, get rid of # comments ...
flex.h header for declarations of e.g., yymore(), yytext?
but what about %array making the yytext definition out of date?
merge w/ okeeffe code
rearrange the Performance Considerations section so that the easy
fixes come first
copyright notice in manuals?
input() updates yytext and yyleng; perhaps unput too???;
right now it trashes them (doesn't restore '\0')
document that yyleng can now be modified
except if yymore() used?
anchoring allowed inside ()'s - (^abc|def$)
unput() propagates non-newline state too?
complain about invalid anchoring - foo(^abc), (^abc)+
library in its own directory
yylineno
example in flexdoc on YY_INPUT reading from input()
redesign for retargetability (i.e., use w/ other languages ...)
clean up escape expansion
bison @N
example for doc. on scanning strings w/ escapes in them:
POSIX/
get rid of duplicated code between "re2 re" rule and "re '$'" rule
preformatted man pages for VMS sites, possibly using col -b to get rid
of backspaces ...
slurp entire input file into mega-buffer; allows pointers to in-place
identifiers
lex compatibility flag
update flags in docs
-n removed from POSIX?
"MAKE = ..." shouldn't be commented out, or else bigtest can fail
BSD man macros

3182
to.do/flex.rmail Normal file

File diff suppressed because it is too large Load Diff

195
to.do/unicode/FlexLexer.h Normal file
View File

@ -0,0 +1,195 @@
// $Header$
// FlexLexer.h -- define interfaces for lexical analyzer classes generated
// by flex
// Copyright (c) 1993 The Regents of the University of California.
// All rights reserved.
//
// This code is derived from software contributed to Berkeley by
// Kent Williams and Tom Epperly.
//
// Redistribution and use in source and binary forms are permitted provided
// that: (1) source distributions retain this entire copyright notice and
// comment, and (2) distributions including binaries display the following
// acknowledgement: ``This product includes software developed by the
// University of California, Berkeley and its contributors'' in the
// documentation or other materials provided with the distribution and in
// all advertising materials mentioning features or use of this software.
// Neither the name of the University nor the names of its contributors may
// be used to endorse or promote products derived from this software without
// specific prior written permission.
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
// This file defines FlexLexer, an abstract class which specifies the
// external interface provided to flex C++ lexer objects, and yyFlexLexer,
// which defines a particular lexer class.
//
// If you want to create multiple lexer classes, you use the -P flag
// to rename each yyFlexLexer to some other xxFlexLexer. You then
// include <FlexLexer.h> in your other sources once per lexer class:
//
// #undef yyFlexLexer
// #define yyFlexLexer xxFlexLexer
// #include <FlexLexer.h>
//
// #undef yyFlexLexer
// #define yyFlexLexer zzFlexLexer
// #include <FlexLexer.h>
// ...
//
// Since this header is generic for all sizes of flex scanners, you must
// define the type YY_CHAR before including it:
//
// typedef xxx YY_CHAR;
// #include <FlexLexer.h>
// ...
//
// where xxx = char for 7-bit scanners, unsigned char for 8-bit, and
// wchar_t for 16-bit.
#ifndef __FLEX_LEXER_H
// Never included before - need to define base class.
#define __FLEX_LEXER_H
#include <iostream.h>
extern "C++" {
struct yy_buffer_state;
typedef int yy_state_type;
class FlexLexer {
public:
virtual ~FlexLexer() { }
const YY_CHAR* YYText() { return yytext; }
int YYLeng() { return yyleng; }
virtual void
yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0;
virtual struct yy_buffer_state*
yy_create_buffer( istream* s, int size ) = 0;
virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0;
virtual void yyrestart( istream* s ) = 0;
virtual int yylex() = 0;
// Call yylex with new input/output sources.
int yylex( istream* new_in, ostream* new_out = 0 )
{
switch_streams( new_in, new_out );
return yylex();
}
// Switch to new input/output streams. A nil stream pointer
// indicates "keep the current one".
virtual void switch_streams( istream* new_in = 0,
ostream* new_out = 0 ) = 0;
int lineno() const { return yylineno; }
int debug() const { return yy_flex_debug; }
void set_debug( int flag ) { yy_flex_debug = flag; }
protected:
YY_CHAR* yytext;
int yyleng;
int yylineno; // only maintained if you use %option yylineno
int yy_flex_debug; // only has effect with -d or "%option debug"
};
}
#endif
#if defined(yyFlexLexer) || ! defined(yyFlexLexerOnce)
// Either this is the first time through (yyFlexLexerOnce not defined),
// or this is a repeated include to define a different flavor of
// yyFlexLexer, as discussed in the flex man page.
#define yyFlexLexerOnce
class yyFlexLexer : public FlexLexer {
public:
// arg_yyin and arg_yyout default to the cin and cout, but we
// only make that assignment when initializing in yylex().
yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 );
virtual ~yyFlexLexer();
void yy_switch_to_buffer( struct yy_buffer_state* new_buffer );
struct yy_buffer_state* yy_create_buffer( istream* s, int size );
void yy_delete_buffer( struct yy_buffer_state* b );
void yyrestart( istream* s );
virtual int yylex();
virtual void switch_streams( istream* new_in, ostream* new_out );
protected:
virtual int LexerInput( YY_CHAR* buf, int max_size );
virtual void LexerOutput( const YY_CHAR* buf, int size );
virtual void LexerError( const char* msg );
void yyunput( int c, YY_CHAR* buf_ptr );
int yyinput();
void yy_load_buffer_state();
void yy_init_buffer( struct yy_buffer_state* b, istream* s );
void yy_flush_buffer( struct yy_buffer_state* b );
int yy_start_stack_ptr;
int yy_start_stack_depth;
int* yy_start_stack;
void yy_push_state( int new_state );
void yy_pop_state();
int yy_top_state();
yy_state_type yy_get_previous_state();
yy_state_type yy_try_NUL_trans( yy_state_type current_state );
int yy_get_next_buffer();
istream* yyin; // input source for default LexerInput
ostream* yyout; // output sink for default LexerOutput
struct yy_buffer_state* yy_current_buffer;
// yy_hold_char holds the character lost when yytext is formed.
YY_CHAR yy_hold_char;
// Number of characters read into yy_ch_buf.
int yy_n_chars;
// Points to current character in buffer.
YY_CHAR* yy_c_buf_p;
int yy_init; // whether we need to initialize
int yy_start; // start state number
// Flag which is used to allow yywrap()'s to do buffer switches
// instead of setting up a fresh yyin. A bit of a hack ...
int yy_did_buffer_switch_on_eof;
// The following are not always needed, but may be depending
// on use of certain flex features (like REJECT or yymore()).
yy_state_type yy_last_accepting_state;
YY_CHAR* yy_last_accepting_cpos;
yy_state_type* yy_state_buf;
yy_state_type* yy_state_ptr;
YY_CHAR* yy_full_match;
int* yy_full_state;
int yy_full_lp;
int yy_lp;
int yy_looking_for_trail_begin;
int yy_more_flag;
int yy_more_len;
int yy_more_offset;
int yy_prev_more_offset;
};
#endif

149
to.do/unicode/ccl.c Normal file
View File

@ -0,0 +1,149 @@
/* ccl - routines for character classes */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header$ */
#include "flexdef.h"
/* ccladd - add a single character to a ccl */
void ccladd( cclp, ch )
int cclp;
int ch;
{
int ind, len, newpos, i;
check_char( ch );
len = ccllen[cclp];
ind = cclmap[cclp];
/* check to see if the character is already in the ccl */
for ( i = 0; i < len; ++i )
if ( ccltbl[ind + i] == ch )
return;
newpos = ind + len;
if ( newpos >= current_max_ccl_tbl_size )
{
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
++num_reallocs;
ccltbl = reallocate_wchar_array( ccltbl,
current_max_ccl_tbl_size );
}
ccllen[cclp] = len + 1;
ccltbl[newpos] = ch;
}
/* cclinit - return an empty ccl */
int cclinit()
{
if ( ++lastccl >= current_maxccls )
{
current_maxccls += MAX_CCLS_INCREMENT;
++num_reallocs;
cclmap = reallocate_integer_array( cclmap, current_maxccls );
ccllen = reallocate_integer_array( ccllen, current_maxccls );
cclng = reallocate_integer_array( cclng, current_maxccls );
}
if ( lastccl == 1 )
/* we're making the first ccl */
cclmap[lastccl] = 0;
else
/* The new pointer is just past the end of the last ccl.
* Since the cclmap points to the \first/ character of a
* ccl, adding the length of the ccl to the cclmap pointer
* will produce a cursor to the first free space.
*/
cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
ccllen[lastccl] = 0;
cclng[lastccl] = 0; /* ccl's start out life un-negated */
return lastccl;
}
/* cclnegate - negate the given ccl */
void cclnegate( cclp )
int cclp;
{
cclng[cclp] = 1;
}
/* list_character_set - list the members of a set of characters in CCL form
*
* Writes to the given file a character-class representation of those
* characters present in the given CCL. A character is present if it
* has a non-zero value in the cset array.
*/
void list_character_set( file, cset )
FILE *file;
int cset[];
{
register int i;
putc( '[', file );
for ( i = 0; i < csize; ++i )
{
if ( cset[i] )
{
register int start_char = i;
putc( ' ', file );
fputs( readable_form( i ), file );
while ( ++i < csize && cset[i] )
;
if ( i - 1 > start_char )
/* this was a run */
fprintf( file, "-%s", readable_form( i - 1 ) );
putc( ' ', file );
}
}
putc( ']', file );
}

102
to.do/unicode/changes.txt Normal file
View File

@ -0,0 +1,102 @@
Summary of changes for flex Unicode support
- ccl.c
- ccladd()
- changed call to reallocate_Character_array to reallocate_wchar_array
- ecs.c
- mkeccl()
- changed type of ccls from Char to wchar_t
- flex.1
- added description of -U option
- added extra qualifier to -Ca option regarding usage with -U
- modified -C, -Cf, and -CF options regarding usage with -U
- flex.skl
- changed all references of char (except error messages) to YY_CHAR
- added new insertion point for defining YY_CHAR and YY_SC_TO_UI()
- yy_scan_bytes()
- renamed to yy_scan_chars to avoid confusion with 2-byte chars
- renamed param bytes to chars
- ECHO
- redefined C version as fwrite(yytext, sizeof(YY_CHAR), yyleng, yyout)
- YY_INPUT
- removed char* cast on param buf of C++ version
- yyFlexLexer::LexerInput()
- changed get() call to read((unsigned char *) buf, sizeof(YY_CHAR))
- changed read() call to read((unsigned char *) buf, max_size *
sizeof(YY_CHAR))
- changed gcount() call to gcount() / sizeof(YY_CHAR)
- yyFlexLexer::LexerOutput()
- changed write() call to write((unsigned char *) buf, size *
sizeof(YY_CHAR))
- yy_get_next_buffer()
- yy_flex_realloc() call
- changed param b->yy_buf_size + 2 to
(b->yy_buf_size + 2) * sizeof(YY_CHAR)
- input() and yyFlexLexer::yyinput()
- changed line c = *(unsigned char *) yy_c_buf_p; to
c = YY_SC_TO_UI(*yy_c_buf_p);
- flexdef.h
- defined CSIZE as 65536
- changed myesc() proto to return int
- changed type of ccltbl from Char * to wchar_t *
- added allocate_wchar_array() and reallocate_wchar_array() macros
- changed mkeccl() proto's first param to wchar_t[]
- changed cshell() proto's first param to wchar_t[]
- FlexLexer.h
- changed all references of char (except error messages) to YY_CHAR
- added description about typedef'ing YY_CHAR before inclusion
- gen.c
- changed appropriate references of char in output strings to YY_CHAR
- added C_uchar_decl and C_ushort_decl for 16-bit yy_ec type bump-down
- genecs()
- added code for 16-bit yy_ec type bump; -Ca bumps type to long
- make_tables()
- YY_INPUT
- read redefined as read(..., max_size * sizeof(YY_CHAR))
- added code to use getwc() and WEOF for 16-bit interactive
- fread redefined as fread(buf, sizeof(YY_CHAR), max_size, yyin)
- main.c
- changed type of ccltbl from Char * to wchar_t *
- check_options()
- changed default csize from CSIZE to 256 due to redef of CSIZE
- added code to check for options incompatible with -U
- added code to ouput typedef of YY_CHAR to skeleton, plus extra call to
skelout() to get down to original insertion point
- flexend()
- added code to print "U" when printing stats
- flexinit()
- added code set csize for option -U
- changed assignment of csize in option -8 from CSIZE to 256 due to redef
of CSIZE
- readin()
- changed appropriate references of char in output strings to YY_CHAR
- removed output of YY_CHAR typedef; now located in check_options()
- usage()
- added fprintf for -U usage
- misc.c
- check_char()
- added code to distinguish chars needing -8 and -U flags
- cshell()
- changed type of v from Char to wchar_t
- changed type of k from Char to wchar_t
- myesc()
- now returns an int to handle 16-bit escape sequences
- changed esc_char from Char to unsigned int as per htoi() and otoi()
- scan.l
- changed ESCSEQ to accept 6 digit octal escapes and 4 digit hex escapes
- removed myesc() and ndlookup() protos
- added option "16bit"
- tblcmp.c
- mktemplate
- changed type of transset from Char to wchar_t

225
to.do/unicode/ecs.c Normal file
View File

@ -0,0 +1,225 @@
/* ecs - equivalence class routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header$ */
#include "flexdef.h"
/* ccl2ecl - convert character classes to set of equivalence classes */
void ccl2ecl()
{
int i, ich, newlen, cclp, ccls, cclmec;
for ( i = 1; i <= lastccl; ++i )
{
/* We loop through each character class, and for each character
* in the class, add the character's equivalence class to the
* new "character" class we are creating. Thus when we are all
* done, character classes will really consist of collections
* of equivalence classes
*/
newlen = 0;
cclp = cclmap[i];
for ( ccls = 0; ccls < ccllen[i]; ++ccls )
{
ich = ccltbl[cclp + ccls];
cclmec = ecgroup[ich];
if ( cclmec > 0 )
{
ccltbl[cclp + newlen] = cclmec;
++newlen;
}
}
ccllen[i] = newlen;
}
}
/* cre8ecs - associate equivalence class numbers with class members
*
* fwd is the forward linked-list of equivalence class members. bck
* is the backward linked-list, and num is the number of class members.
*
* Returned is the number of classes.
*/
int cre8ecs( fwd, bck, num )
int fwd[], bck[], num;
{
int i, j, numcl;
numcl = 0;
/* Create equivalence class numbers. From now on, ABS( bck(x) )
* is the equivalence class number for object x. If bck(x)
* is positive, then x is the representative of its equivalence
* class.
*/
for ( i = 1; i <= num; ++i )
if ( bck[i] == NIL )
{
bck[i] = ++numcl;
for ( j = fwd[i]; j != NIL; j = fwd[j] )
bck[j] = -numcl;
}
return numcl;
}
/* mkeccl - update equivalence classes based on character class xtions
*
* synopsis
* Char ccls[];
* int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
* void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
* int llsiz, int NUL_mapping );
*
* ccls contains the elements of the character class, lenccl is the
* number of elements in the ccl, fwd is the forward link-list of equivalent
* characters, bck is the backward link-list, and llsiz size of the link-list.
*
* NUL_mapping is the value which NUL (0) should be mapped to.
*/
void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
wchar_t ccls[];
int lenccl, fwd[], bck[], llsiz, NUL_mapping;
{
int cclp, oldec, newec;
int cclm, i, j;
static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */
/* Note that it doesn't matter whether or not the character class is
* negated. The same results will be obtained in either case.
*/
cclp = 0;
while ( cclp < lenccl )
{
cclm = ccls[cclp];
if ( NUL_mapping && cclm == 0 )
cclm = NUL_mapping;
oldec = bck[cclm];
newec = cclm;
j = cclp + 1;
for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
{ /* look for the symbol in the character class */
for ( ; j < lenccl; ++j )
{
register int ccl_char;
if ( NUL_mapping && ccls[j] == 0 )
ccl_char = NUL_mapping;
else
ccl_char = ccls[j];
if ( ccl_char > i )
break;
if ( ccl_char == i && ! cclflags[j] )
{
/* We found an old companion of cclm
* in the ccl. Link it into the new
* equivalence class and flag it as
* having been processed.
*/
bck[i] = newec;
fwd[newec] = i;
newec = i;
/* Set flag so we don't reprocess. */
cclflags[j] = 1;
/* Get next equivalence class member. */
/* continue 2 */
goto next_pt;
}
}
/* Symbol isn't in character class. Put it in the old
* equivalence class.
*/
bck[i] = oldec;
if ( oldec != NIL )
fwd[oldec] = i;
oldec = i;
next_pt: ;
}
if ( bck[cclm] != NIL || oldec != bck[cclm] )
{
bck[cclm] = NIL;
fwd[oldec] = NIL;
}
fwd[newec] = NIL;
/* Find next ccl member to process. */
for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
{
/* Reset "doesn't need processing" flag. */
cclflags[cclp] = 0;
}
}
}
/* mkechar - create equivalence class for single character */
void mkechar( tch, fwd, bck )
int tch, fwd[], bck[];
{
/* If until now the character has been a proper subset of
* an equivalence class, break it away to create a new ec
*/
if ( fwd[tch] != NIL )
bck[fwd[tch]] = bck[tch];
if ( bck[tch] != NIL )
fwd[bck[tch]] = fwd[tch];
fwd[tch] = NIL;
bck[tch] = NIL;
}

4099
to.do/unicode/flex.1 Normal file

File diff suppressed because it is too large Load Diff

1542
to.do/unicode/flex.skl Normal file

File diff suppressed because it is too large Load Diff

1062
to.do/unicode/flexdef.h Normal file

File diff suppressed because it is too large Load Diff

1650
to.do/unicode/gen.c Normal file

File diff suppressed because it is too large Load Diff

1228
to.do/unicode/main.c Normal file

File diff suppressed because it is too large Load Diff

894
to.do/unicode/misc.c Normal file
View File

@ -0,0 +1,894 @@
/* misc - miscellaneous flex routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header$ */
#include "flexdef.h"
void action_define( defname, value )
char *defname;
int value;
{
char buf[MAXLINE];
if ( (int) strlen( defname ) > MAXLINE / 2 )
{
format_pinpoint_message( _( "name \"%s\" ridiculously long" ),
defname );
return;
}
sprintf( buf, "#define %s %d\n", defname, value );
add_action( buf );
}
void add_action( new_text )
char *new_text;
{
int len = strlen( new_text );
while ( len + action_index >= action_size - 10 /* slop */ )
{
int new_size = action_size * 2;
if ( new_size <= 0 )
/* Increase just a little, to try to avoid overflow
* on 16-bit machines.
*/
action_size += action_size / 8;
else
action_size = new_size;
action_array =
reallocate_character_array( action_array, action_size );
}
strcpy( &action_array[action_index], new_text );
action_index += len;
}
/* allocate_array - allocate memory for an integer array of the given size */
void *allocate_array( size, element_size )
int size;
size_t element_size;
{
register void *mem;
size_t num_bytes = element_size * size;
mem = flex_alloc( num_bytes );
if ( ! mem )
flexfatal(
_( "memory allocation failed in allocate_array()" ) );
return mem;
}
/* all_lower - true if a string is all lower-case */
int all_lower( str )
register char *str;
{
while ( *str )
{
if ( ! isascii( (Char) *str ) || ! islower( *str ) )
return 0;
++str;
}
return 1;
}
/* all_upper - true if a string is all upper-case */
int all_upper( str )
register char *str;
{
while ( *str )
{
if ( ! isascii( (Char) *str ) || ! isupper( *str ) )
return 0;
++str;
}
return 1;
}
/* bubble - bubble sort an integer array in increasing order
*
* synopsis
* int v[n], n;
* void bubble( v, n );
*
* description
* sorts the first n elements of array v and replaces them in
* increasing order.
*
* passed
* v - the array to be sorted
* n - the number of elements of 'v' to be sorted
*/
void bubble( v, n )
int v[], n;
{
register int i, j, k;
for ( i = n; i > 1; --i )
for ( j = 1; j < i; ++j )
if ( v[j] > v[j + 1] ) /* compare */
{
k = v[j]; /* exchange */
v[j] = v[j + 1];
v[j + 1] = k;
}
}
/* check_char - checks a character to make sure it's within the range
* we're expecting. If not, generates fatal error message
* and exits.
*/
void check_char( c )
int c;
{
if ( c >= CSIZE )
lerrsf( _( "bad character '%s' detected in check_char()" ),
readable_form( c ) );
if ( c >= csize )
{
if ( c < 256 )
lerrsf(
_( "scanner requires -8 flag to use the character %s" ),
readable_form( c ) );
else
lerrsf(
_( "scanner requires -U flag to use the character %s" ),
readable_form( c ) );
}
}
/* clower - replace upper-case letter to lower-case */
Char clower( c )
register int c;
{
return (Char) ((isascii( c ) && isupper( c )) ? tolower( c ) : c);
}
/* copy_string - returns a dynamically allocated copy of a string */
char *copy_string( str )
register const char *str;
{
register const char *c1;
register char *c2;
char *copy;
unsigned int size;
/* find length */
for ( c1 = str; *c1; ++c1 )
;
size = (c1 - str + 1) * sizeof( char );
copy = (char *) flex_alloc( size );
if ( copy == NULL )
flexfatal( _( "dynamic memory failure in copy_string()" ) );
for ( c2 = copy; (*c2++ = *str++) != 0; )
;
return copy;
}
/* copy_unsigned_string -
* returns a dynamically allocated copy of a (potentially) unsigned string
*/
Char *copy_unsigned_string( str )
register Char *str;
{
register Char *c;
Char *copy;
/* find length */
for ( c = str; *c; ++c )
;
copy = allocate_Character_array( c - str + 1 );
for ( c = copy; (*c++ = *str++) != 0; )
;
return copy;
}
/* cshell - shell sort a character array in increasing order
*
* synopsis
*
* Char v[n];
* int n, special_case_0;
* cshell( v, n, special_case_0 );
*
* description
* Does a shell sort of the first n elements of array v.
* If special_case_0 is true, then any element equal to 0
* is instead assumed to have infinite weight.
*
* passed
* v - array to be sorted
* n - number of elements of v to be sorted
*/
void cshell( v, n, special_case_0 )
wchar_t v[];
int n, special_case_0;
{
int gap, i, j, jg;
wchar_t k;
for ( gap = n / 2; gap > 0; gap = gap / 2 )
for ( i = gap; i < n; ++i )
for ( j = i - gap; j >= 0; j = j - gap )
{
jg = j + gap;
if ( special_case_0 )
{
if ( v[jg] == 0 )
break;
else if ( v[j] != 0 && v[j] <= v[jg] )
break;
}
else if ( v[j] <= v[jg] )
break;
k = v[j];
v[j] = v[jg];
v[jg] = k;
}
}
/* dataend - finish up a block of data declarations */
void dataend()
{
if ( datapos > 0 )
dataflush();
/* add terminator for initialization; { for vi */
outn( " } ;\n" );
dataline = 0;
datapos = 0;
}
/* dataflush - flush generated data statements */
void dataflush()
{
outc( '\n' );
if ( ++dataline >= NUMDATALINES )
{
/* Put out a blank line so that the table is grouped into
* large blocks that enable the user to find elements easily.
*/
outc( '\n' );
dataline = 0;
}
/* Reset the number of characters written on the current line. */
datapos = 0;
}
/* flexerror - report an error message and terminate */
void flexerror( msg )
const char msg[];
{
fprintf( stderr, "%s: %s\n", program_name, msg );
flexend( 1 );
}
/* flexfatal - report a fatal error message and terminate */
void flexfatal( msg )
const char msg[];
{
fprintf( stderr, _( "%s: fatal internal error, %s\n" ),
program_name, msg );
exit( 1 );
}
/* htoi - convert a hexadecimal digit string to an integer value */
int htoi( str )
Char str[];
{
unsigned int result;
(void) sscanf( (char *) str, "%x", &result );
return result;
}
/* lerrif - report an error message formatted with one integer argument */
void lerrif( msg, arg )
const char msg[];
int arg;
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
flexerror( errmsg );
}
/* lerrsf - report an error message formatted with one string argument */
void lerrsf( msg, arg )
const char msg[], arg[];
{
char errmsg[MAXLINE];
(void) sprintf( errmsg, msg, arg );
flexerror( errmsg );
}
/* line_directive_out - spit out a "#line" statement */
void line_directive_out( output_file, do_infile )
FILE *output_file;
int do_infile;
{
char directive[MAXLINE], filename[MAXLINE];
char *s1, *s2, *s3;
static char line_fmt[] = "#line %d \"%s\"\n";
if ( ! gen_line_dirs )
return;
if ( (do_infile && ! infilename) || (! do_infile && ! outfilename) )
/* don't know the filename to use, skip */
return;
s1 = do_infile ? infilename : outfilename;
s2 = filename;
s3 = &filename[sizeof( filename ) - 2];
while ( s2 < s3 && *s1 )
{
if ( *s1 == '\\' )
/* Escape the '\' */
*s2++ = '\\';
*s2++ = *s1++;
}
*s2 = '\0';
if ( do_infile )
sprintf( directive, line_fmt, linenum, filename );
else
{
if ( output_file == stdout )
/* Account for the line directive itself. */
++out_linenum;
sprintf( directive, line_fmt, out_linenum, filename );
}
/* If output_file is nil then we should put the directive in
* the accumulated actions.
*/
if ( output_file )
{
fputs( directive, output_file );
}
else
add_action( directive );
}
/* mark_defs1 - mark the current position in the action array as
* representing where the user's section 1 definitions end
* and the prolog begins
*/
void mark_defs1()
{
defs1_offset = 0;
action_array[action_index++] = '\0';
action_offset = prolog_offset = action_index;
action_array[action_index] = '\0';
}
/* mark_prolog - mark the current position in the action array as
* representing the end of the action prolog
*/
void mark_prolog()
{
action_array[action_index++] = '\0';
action_offset = action_index;
action_array[action_index] = '\0';
}
/* mk2data - generate a data statement for a two-dimensional array
*
* Generates a data statement initializing the current 2-D array to "value".
*/
void mk2data( value )
int value;
{
if ( datapos >= NUMDATAITEMS )
{
outc( ',' );
dataflush();
}
if ( datapos == 0 )
/* Indent. */
out( " " );
else
outc( ',' );
++datapos;
out_dec( "%5d", value );
}
/* mkdata - generate a data statement
*
* Generates a data statement initializing the current array element to
* "value".
*/
void mkdata( value )
int value;
{
if ( datapos >= NUMDATAITEMS )
{
outc( ',' );
dataflush();
}
if ( datapos == 0 )
/* Indent. */
out( " " );
else
outc( ',' );
++datapos;
out_dec( "%5d", value );
}
/* myctoi - return the integer represented by a string of digits */
int myctoi( array )
char array[];
{
int val = 0;
(void) sscanf( array, "%d", &val );
return val;
}
/* myesc - return character corresponding to escape sequence */
int myesc( array )
Char array[];
{
Char c;
unsigned int esc_char;
switch ( array[1] )
{
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
#if __STDC__
case 'a': return '\a';
case 'v': return '\v';
#else
case 'a': return '\007';
case 'v': return '\013';
#endif
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{ /* \<octal> */
int sptr = 1;
while ( isascii( array[sptr] ) &&
isdigit( array[sptr] ) )
/* Don't increment inside loop control
* because if isdigit() is a macro it might
* expand into multiple increments ...
*/
++sptr;
c = array[sptr];
array[sptr] = '\0';
esc_char = otoi( array + 1 );
array[sptr] = c;
return esc_char;
}
case 'x':
{ /* \x<hex> */
int sptr = 2;
while ( isascii( array[sptr] ) &&
isxdigit( (char) array[sptr] ) )
/* Don't increment inside loop control
* because if isdigit() is a macro it might
* expand into multiple increments ...
*/
++sptr;
c = array[sptr];
array[sptr] = '\0';
esc_char = htoi( array + 2 );
array[sptr] = c;
return esc_char;
}
default:
return array[1];
}
}
/* otoi - convert an octal digit string to an integer value */
int otoi( str )
Char str[];
{
unsigned int result;
(void) sscanf( (char *) str, "%o", &result );
return result;
}
/* out - various flavors of outputing a (possibly formatted) string for the
* generated scanner, keeping track of the line count.
*/
void out( str )
const char str[];
{
fputs( str, stdout );
out_line_count( str );
}
void out_dec( fmt, n )
const char fmt[];
int n;
{
printf( fmt, n );
out_line_count( fmt );
}
void out_dec2( fmt, n1, n2 )
const char fmt[];
int n1, n2;
{
printf( fmt, n1, n2 );
out_line_count( fmt );
}
void out_hex( fmt, x )
const char fmt[];
unsigned int x;
{
printf( fmt, x );
out_line_count( fmt );
}
void out_line_count( str )
const char str[];
{
register int i;
for ( i = 0; str[i]; ++i )
if ( str[i] == '\n' )
++out_linenum;
}
void out_str( fmt, str )
const char fmt[], str[];
{
printf( fmt, str );
out_line_count( fmt );
out_line_count( str );
}
void out_str3( fmt, s1, s2, s3 )
const char fmt[], s1[], s2[], s3[];
{
printf( fmt, s1, s2, s3 );
out_line_count( fmt );
out_line_count( s1 );
out_line_count( s2 );
out_line_count( s3 );
}
void out_str_dec( fmt, str, n )
const char fmt[], str[];
int n;
{
printf( fmt, str, n );
out_line_count( fmt );
out_line_count( str );
}
void outc( c )
int c;
{
putc( c, stdout );
if ( c == '\n' )
++out_linenum;
}
void outn( str )
const char str[];
{
puts( str );
out_line_count( str );
++out_linenum;
}
/* readable_form - return the the human-readable form of a character
*
* The returned string is in static storage.
*/
char *readable_form( c )
register int c;
{
static char rform[10];
if ( (c >= 0 && c < 32) || c >= 127 )
{
switch ( c )
{
case '\b': return "\\b";
case '\f': return "\\f";
case '\n': return "\\n";
case '\r': return "\\r";
case '\t': return "\\t";
#if __STDC__
case '\a': return "\\a";
case '\v': return "\\v";
#endif
default:
(void) sprintf( rform, "\\%.3o",
(unsigned int) c );
return rform;
}
}
else if ( c == ' ' )
return "' '";
else
{
rform[0] = c;
rform[1] = '\0';
return rform;
}
}
/* reallocate_array - increase the size of a dynamic array */
void *reallocate_array( array, size, element_size )
void *array;
int size;
size_t element_size;
{
register void *new_array;
size_t num_bytes = element_size * size;
new_array = flex_realloc( array, num_bytes );
if ( ! new_array )
flexfatal( _( "attempt to increase array size failed" ) );
return new_array;
}
/* skelout - write out one section of the skeleton file
*
* Description
* Copies skelfile or skel array to stdout until a line beginning with
* "%%" or EOF is found.
*/
void skelout()
{
char buf_storage[MAXLINE];
char *buf = buf_storage;
int do_copy = 1;
/* Loop pulling lines either from the skelfile, if we're using
* one, or from the skel[] array.
*/
while ( skelfile ?
(fgets( buf, MAXLINE, skelfile ) != NULL) :
((buf = (char *) skel[skel_ind++]) != 0) )
{ /* copy from skel array */
if ( buf[0] == '%' )
{ /* control line */
switch ( buf[1] )
{
case '%':
return;
case '+':
do_copy = C_plus_plus;
break;
case '-':
do_copy = ! C_plus_plus;
break;
case '*':
do_copy = 1;
break;
default:
flexfatal(
_( "bad line in skeleton file" ) );
}
}
else if ( do_copy )
{
if ( skelfile )
/* Skeleton file reads include final
* newline, skel[] array does not.
*/
out( buf );
else
outn( buf );
}
}
}
/* transition_struct_out - output a yy_trans_info structure
*
* outputs the yy_trans_info structure with the two elements, element_v and
* element_n. Formats the output with spaces and carriage returns.
*/
void transition_struct_out( element_v, element_n )
int element_v, element_n;
{
out_dec2( " {%4d,%4d },", element_v, element_n );
datapos += TRANS_STRUCT_PRINT_LENGTH;
if ( datapos >= 79 - TRANS_STRUCT_PRINT_LENGTH )
{
outc( '\n' );
if ( ++dataline % 10 == 0 )
outc( '\n' );
datapos = 0;
}
}
/* The following is only needed when building flex's parser using certain
* broken versions of bison.
*/
void *yy_flex_xmalloc( size )
int size;
{
void *result = flex_alloc( (size_t) size );
if ( ! result )
flexfatal(
_( "memory allocation failed in yy_flex_xmalloc()" ) );
return result;
}
/* zero_out - set a region of memory to 0
*
* Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero.
*/
void zero_out( region_ptr, size_in_bytes )
char *region_ptr;
size_t size_in_bytes;
{
register char *rp, *rp_end;
rp = region_ptr;
rp_end = region_ptr + size_in_bytes;
while ( rp < rp_end )
*rp++ = 0;
}

710
to.do/unicode/scan.l Normal file
View File

@ -0,0 +1,710 @@
/* scan.l - scanner for flex input */
%{
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header$ */
#include "flexdef.h"
#include "parse.h"
#define ACTION_ECHO add_action( yytext )
#define ACTION_IFDEF(def, should_define) \
{ \
if ( should_define ) \
action_define( def, 1 ); \
}
#define MARK_END_OF_PROLOG mark_prolog();
#define YY_DECL \
int flexscan()
#define RETURNCHAR \
yylval = (unsigned char) yytext[0]; \
return CHAR;
#define RETURNNAME \
strcpy( nmstr, yytext ); \
return NAME;
#define PUT_BACK_STRING(str, start) \
for ( i = strlen( str ) - 1; i >= start; --i ) \
unput((str)[i])
#define CHECK_REJECT(str) \
if ( all_upper( str ) ) \
reject = true;
#define CHECK_YYMORE(str) \
if ( all_lower( str ) ) \
yymore_used = true;
%}
%option caseless nodefault outfile="scan.c" stack noyy_top_state
%option nostdinit
%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
%x OPTION LINEDIR
WS [[:blank:]]+
OPTWS [[:blank:]]*
NOT_WS [^[:blank:]\n]
NL \r?\n
NAME ([[:alpha:]_][[:alnum:]_-]*)
NOT_NAME [^[:alpha:]_*\n]+
SCNAME {NAME}
ESCSEQ (\\([^\n]|[0-7]{1,6}|x[[:xdigit:]]{1,4}))
FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
CCL_CHAR ([^\\\n\]]|{ESCSEQ})
CCL_EXPR ("[:"[[:alpha:]]+":]")
LEXOPT [aceknopr]
%%
static int bracelevel, didadef, indented_code;
static int doing_rule_action = false;
static int option_sense;
int doing_codeblock = false;
int i;
Char nmdef[MAXLINE];
<INITIAL>{
^{WS} indented_code = true; BEGIN(CODEBLOCK);
^"/*" ACTION_ECHO; yy_push_state( COMMENT );
^#{OPTWS}line{WS} yy_push_state( LINEDIR );
^"%s"{NAME}? return SCDECL;
^"%x"{NAME}? return XSCDECL;
^"%{".*{NL} {
++linenum;
line_directive_out( (FILE *) 0, 1 );
indented_code = false;
BEGIN(CODEBLOCK);
}
{WS} /* discard */
^"%%".* {
sectnum = 2;
bracelevel = 0;
mark_defs1();
line_directive_out( (FILE *) 0, 1 );
BEGIN(SECT2PROLOG);
return SECTEND;
}
^"%pointer".*{NL} yytext_is_array = false; ++linenum;
^"%array".*{NL} yytext_is_array = true; ++linenum;
^"%option" BEGIN(OPTION); return OPTION_OP;
^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
^{NAME} {
strcpy( nmstr, yytext );
didadef = false;
BEGIN(PICKUPDEF);
}
{SCNAME} RETURNNAME;
^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
{OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
}
<COMMENT>{
"*/" ACTION_ECHO; yy_pop_state();
"*" ACTION_ECHO;
[^*\n]+ ACTION_ECHO;
[^*\n]*{NL} ++linenum; ACTION_ECHO;
}
<LINEDIR>{
\n yy_pop_state();
[[:digit:]]+ linenum = myctoi( yytext );
\"[^"\n]*\" {
flex_free( (void *) infilename );
infilename = copy_string( yytext + 1 );
infilename[strlen( infilename ) - 1] = '\0';
}
. /* ignore spurious characters */
}
<CODEBLOCK>{
^"%}".*{NL} ++linenum; BEGIN(INITIAL);
{NAME}|{NOT_NAME}|. ACTION_ECHO;
{NL} {
++linenum;
ACTION_ECHO;
if ( indented_code )
BEGIN(INITIAL);
}
}
<PICKUPDEF>{
{WS} /* separates name and definition */
{NOT_WS}.* {
strcpy( (char *) nmdef, yytext );
/* Skip trailing whitespace. */
for ( i = strlen( (char *) nmdef ) - 1;
i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
--i )
;
nmdef[i + 1] = '\0';
ndinstal( nmstr, nmdef );
didadef = true;
}
{NL} {
if ( ! didadef )
synerr( _( "incomplete name definition" ) );
BEGIN(INITIAL);
++linenum;
}
}
<OPTION>{
{NL} ++linenum; BEGIN(INITIAL);
{WS} option_sense = true;
"=" return '=';
no option_sense = ! option_sense;
7bit csize = option_sense ? 128 : 256;
8bit csize = option_sense ? 256 : 128;
16bit csize = option_sense ? 65536 : 256;
align long_align = option_sense;
always-interactive {
action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
}
array yytext_is_array = option_sense;
backup backing_up_report = option_sense;
batch interactive = ! option_sense;
"c++" C_plus_plus = option_sense;
caseful|case-sensitive caseins = ! option_sense;
caseless|case-insensitive caseins = option_sense;
debug ddebug = option_sense;
default spprdflt = ! option_sense;
ecs useecs = option_sense;
fast {
useecs = usemecs = false;
use_read = fullspd = true;
}
full {
useecs = usemecs = false;
use_read = fulltbl = true;
}
input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
interactive interactive = option_sense;
lex-compat lex_compat = option_sense;
main {
action_define( "YY_MAIN", option_sense );
do_yywrap = ! option_sense;
}
meta-ecs usemecs = option_sense;
never-interactive {
action_define( "YY_NEVER_INTERACTIVE", option_sense );
}
perf-report performance_report += option_sense ? 1 : -1;
pointer yytext_is_array = ! option_sense;
read use_read = option_sense;
reject reject_really_used = option_sense;
stack action_define( "YY_STACK_USED", option_sense );
stdinit do_stdinit = option_sense;
stdout use_stdout = option_sense;
unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
verbose printstats = option_sense;
warn nowarn = ! option_sense;
yylineno do_yylineno = option_sense;
yymore yymore_really_used = option_sense;
yywrap do_yywrap = option_sense;
yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
outfile return OPT_OUTFILE;
prefix return OPT_PREFIX;
yyclass return OPT_YYCLASS;
\"[^"\n]*\" {
strcpy( nmstr, yytext + 1 );
nmstr[strlen( nmstr ) - 1] = '\0';
return NAME;
}
(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
format_synerr( _( "unrecognized %%option: %s" ),
yytext );
BEGIN(RECOVER);
}
}
<RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
<SECT2PROLOG>{
^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
^{WS}.* ACTION_ECHO; /* indented code in prolog */
^{NOT_WS}.* { /* non-indented code */
if ( bracelevel <= 0 )
{ /* not in %{ ... %} */
yyless( 0 ); /* put it all back */
yy_set_bol( 1 );
mark_prolog();
BEGIN(SECT2);
}
else
ACTION_ECHO;
}
.* ACTION_ECHO;
{NL} ++linenum; ACTION_ECHO;
<<EOF>> {
mark_prolog();
sectnum = 0;
yyterminate(); /* to stop the parser */
}
}
<SECT2>{
^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
^{OPTWS}"%{" {
indented_code = false;
doing_codeblock = true;
bracelevel = 1;
BEGIN(PERCENT_BRACE_ACTION);
}
^{OPTWS}"<" BEGIN(SC); return '<';
^{OPTWS}"^" return '^';
\" BEGIN(QUOTE); return '"';
"{"/[[:digit:]] BEGIN(NUM); return '{';
"$"/([[:blank:]]|{NL}) return '$';
{WS}"%{" {
bracelevel = 1;
BEGIN(PERCENT_BRACE_ACTION);
if ( in_rule )
{
doing_rule_action = true;
in_rule = false;
return '\n';
}
}
{WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
^{WS}"/*" {
yyless( yyleng - 2 ); /* put back '/', '*' */
bracelevel = 0;
continued_action = false;
BEGIN(ACTION);
}
^{WS} /* allow indented rules */
{WS} {
/* This rule is separate from the one below because
* otherwise we get variable trailing context, so
* we can't build the scanner using -{f,F}.
*/
bracelevel = 0;
continued_action = false;
BEGIN(ACTION);
if ( in_rule )
{
doing_rule_action = true;
in_rule = false;
return '\n';
}
}
{OPTWS}{NL} {
bracelevel = 0;
continued_action = false;
BEGIN(ACTION);
unput( '\n' ); /* so <ACTION> sees it */
if ( in_rule )
{
doing_rule_action = true;
in_rule = false;
return '\n';
}
}
^{OPTWS}"<<EOF>>" |
"<<EOF>>" return EOF_OP;
^"%%".* {
sectnum = 3;
BEGIN(SECT3);
yyterminate(); /* to stop the parser */
}
"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
int cclval;
strcpy( nmstr, yytext );
/* Check to see if we've already encountered this
* ccl.
*/
if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
{
if ( input() != ']' )
synerr( _( "bad character class" ) );
yylval = cclval;
++cclreuse;
return PREVCCL;
}
else
{
/* We fudge a bit. We know that this ccl will
* soon be numbered as lastccl + 1 by cclinit.
*/
cclinstal( (Char *) nmstr, lastccl + 1 );
/* Push back everything but the leading bracket
* so the ccl can be rescanned.
*/
yyless( 1 );
BEGIN(FIRSTCCL);
return '[';
}
}
"{"{NAME}"}" {
register Char *nmdefptr;
strcpy( nmstr, yytext + 1 );
nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
if ( (nmdefptr = ndlookup( nmstr )) == 0 )
format_synerr(
_( "undefined definition {%s}" ),
nmstr );
else
{ /* push back name surrounded by ()'s */
int len = strlen( (char *) nmdefptr );
if ( lex_compat || nmdefptr[0] == '^' ||
(len > 0 && nmdefptr[len - 1] == '$') )
{ /* don't use ()'s after all */
PUT_BACK_STRING((char *) nmdefptr, 0);
if ( nmdefptr[0] == '^' )
BEGIN(CARETISBOL);
}
else
{
unput(')');
PUT_BACK_STRING((char *) nmdefptr, 0);
unput('(');
}
}
}
[/|*+?.(){}] return (unsigned char) yytext[0];
. RETURNCHAR;
}
<SC>{
[,*] return (unsigned char) yytext[0];
">" BEGIN(SECT2); return '>';
">"/^ BEGIN(CARETISBOL); return '>';
{SCNAME} RETURNNAME;
. {
format_synerr( _( "bad <start condition>: %s" ),
yytext );
}
}
<CARETISBOL>"^" BEGIN(SECT2); return '^';
<QUOTE>{
[^"\n] RETURNCHAR;
\" BEGIN(SECT2); return '"';
{NL} {
synerr( _( "missing quote" ) );
BEGIN(SECT2);
++linenum;
return '"';
}
}
<FIRSTCCL>{
"^"/[^-\]\n] BEGIN(CCL); return '^';
"^"/("-"|"]") return '^';
. BEGIN(CCL); RETURNCHAR;
}
<CCL>{
-/[^\]\n] return '-';
[^\]\n] RETURNCHAR;
"]" BEGIN(SECT2); return ']';
.|{NL} {
synerr( _( "bad character class" ) );
BEGIN(SECT2);
return ']';
}
}
<FIRSTCCL,CCL>{
"[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
"[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
"[:blank:]" BEGIN(CCL); return CCE_BLANK;
"[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
"[:digit:]" BEGIN(CCL); return CCE_DIGIT;
"[:graph:]" BEGIN(CCL); return CCE_GRAPH;
"[:lower:]" BEGIN(CCL); return CCE_LOWER;
"[:print:]" BEGIN(CCL); return CCE_PRINT;
"[:punct:]" BEGIN(CCL); return CCE_PUNCT;
"[:space:]" BEGIN(CCL); return CCE_SPACE;
"[:upper:]" BEGIN(CCL); return CCE_UPPER;
"[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
{CCL_EXPR} {
format_synerr(
_( "bad character class expression: %s" ),
yytext );
BEGIN(CCL); return CCE_ALNUM;
}
}
<NUM>{
[[:digit:]]+ {
yylval = myctoi( yytext );
return NUMBER;
}
"," return ',';
"}" BEGIN(SECT2); return '}';
. {
synerr( _( "bad character inside {}'s" ) );
BEGIN(SECT2);
return '}';
}
{NL} {
synerr( _( "missing }" ) );
BEGIN(SECT2);
++linenum;
return '}';
}
}
<PERCENT_BRACE_ACTION>{
{OPTWS}"%}".* bracelevel = 0;
<ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
<CODEBLOCK,ACTION>{
"reject" {
ACTION_ECHO;
CHECK_REJECT(yytext);
}
"yymore" {
ACTION_ECHO;
CHECK_YYMORE(yytext);
}
}
{NAME}|{NOT_NAME}|. ACTION_ECHO;
{NL} {
++linenum;
ACTION_ECHO;
if ( bracelevel == 0 ||
(doing_codeblock && indented_code) )
{
if ( doing_rule_action )
add_action( "\tYY_BREAK\n" );
doing_rule_action = doing_codeblock = false;
BEGIN(SECT2);
}
}
}
/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
<ACTION>{
"{" ACTION_ECHO; ++bracelevel;
"}" ACTION_ECHO; --bracelevel;
[^[:alpha:]_{}"'/\n]+ ACTION_ECHO;
{NAME} ACTION_ECHO;
"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
\" ACTION_ECHO; BEGIN(ACTION_STRING);
{NL} {
++linenum;
ACTION_ECHO;
if ( bracelevel == 0 )
{
if ( doing_rule_action )
add_action( "\tYY_BREAK\n" );
doing_rule_action = false;
BEGIN(SECT2);
}
}
. ACTION_ECHO;
}
<ACTION_STRING>{
[^"\\\n]+ ACTION_ECHO;
\\. ACTION_ECHO;
{NL} ++linenum; ACTION_ECHO;
\" ACTION_ECHO; BEGIN(ACTION);
. ACTION_ECHO;
}
<COMMENT,ACTION,ACTION_STRING><<EOF>> {
synerr( _( "EOF encountered inside an action" ) );
yyterminate();
}
<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
yylval = myesc( (Char *) yytext );
if ( YY_START == FIRSTCCL )
BEGIN(CCL);
return CHAR;
}
<SECT3>{
.*(\n?) ECHO;
<<EOF>> sectnum = 0; yyterminate();
}
<*>.|\n format_synerr( _( "bad character: %s" ), yytext );
%%
int yywrap()
{
if ( --num_input_files > 0 )
{
set_input_file( *++input_files );
return 0;
}
else
return 1;
}
/* set_input_file - open the given file (if NULL, stdin) for scanning */
void set_input_file( file )
char *file;
{
if ( file && strcmp( file, "-" ) )
{
infilename = copy_string( file );
yyin = fopen( infilename, "r" );
if ( yyin == NULL )
lerrsf( _( "can't open %s" ), file );
}
else
{
yyin = stdin;
infilename = copy_string( "<stdin>" );
}
linenum = 1;
}
/* Wrapper routines for accessing the scanner's malloc routines. */
void *flex_alloc( size )
size_t size;
{
return (void *) malloc( size );
}
void *flex_realloc( ptr, size )
void *ptr;
size_t size;
{
return (void *) realloc( ptr, size );
}
void flex_free( ptr )
void *ptr;
{
if ( ptr )
free( ptr );
}

887
to.do/unicode/tblcmp.c Normal file
View File

@ -0,0 +1,887 @@
/* tblcmp - table compression routines */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Vern Paxson.
*
* The United States Government has rights in this work pursuant
* to contract no. DE-AC03-76SF00098 between the United States
* Department of Energy and the University of California.
*
* Redistribution and use in source and binary forms are permitted provided
* that: (1) source distributions retain this entire copyright notice and
* comment, and (2) distributions including binaries display the following
* acknowledgement: ``This product includes software developed by the
* University of California, Berkeley and its contributors'' in the
* documentation or other materials provided with the distribution and in
* all advertising materials mentioning features or use of this software.
* Neither the name of the University nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
/* $Header$ */
#include "flexdef.h"
/* declarations for functions that have forward references */
void mkentry PROTO((register int*, int, int, int, int));
void mkprot PROTO((int[], int, int));
void mktemplate PROTO((int[], int, int));
void mv2front PROTO((int));
int tbldiff PROTO((int[], int, int[]));
/* bldtbl - build table entries for dfa state
*
* synopsis
* int state[numecs], statenum, totaltrans, comstate, comfreq;
* bldtbl( state, statenum, totaltrans, comstate, comfreq );
*
* State is the statenum'th dfa state. It is indexed by equivalence class and
* gives the number of the state to enter for a given equivalence class.
* totaltrans is the total number of transitions out of the state. Comstate
* is that state which is the destination of the most transitions out of State.
* Comfreq is how many transitions there are out of State to Comstate.
*
* A note on terminology:
* "protos" are transition tables which have a high probability of
* either being redundant (a state processed later will have an identical
* transition table) or nearly redundant (a state processed later will have
* many of the same out-transitions). A "most recently used" queue of
* protos is kept around with the hope that most states will find a proto
* which is similar enough to be usable, and therefore compacting the
* output tables.
* "templates" are a special type of proto. If a transition table is
* homogeneous or nearly homogeneous (all transitions go to the same
* destination) then the odds are good that future states will also go
* to the same destination state on basically the same character set.
* These homogeneous states are so common when dealing with large rule
* sets that they merit special attention. If the transition table were
* simply made into a proto, then (typically) each subsequent, similar
* state will differ from the proto for two out-transitions. One of these
* out-transitions will be that character on which the proto does not go
* to the common destination, and one will be that character on which the
* state does not go to the common destination. Templates, on the other
* hand, go to the common state on EVERY transition character, and therefore
* cost only one difference.
*/
void bldtbl( state, statenum, totaltrans, comstate, comfreq )
int state[], statenum, totaltrans, comstate, comfreq;
{
int extptr, extrct[2][CSIZE + 1];
int mindiff, minprot, i, d;
/* If extptr is 0 then the first array of extrct holds the result
* of the "best difference" to date, which is those transitions
* which occur in "state" but not in the proto which, to date,
* has the fewest differences between itself and "state". If
* extptr is 1 then the second array of extrct hold the best
* difference. The two arrays are toggled between so that the
* best difference to date can be kept around and also a difference
* just created by checking against a candidate "best" proto.
*/
extptr = 0;
/* If the state has too few out-transitions, don't bother trying to
* compact its tables.
*/
if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
else
{
/* "checkcom" is true if we should only check "state" against
* protos which have the same "comstate" value.
*/
int checkcom =
comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
minprot = firstprot;
mindiff = totaltrans;
if ( checkcom )
{
/* Find first proto which has the same "comstate". */
for ( i = firstprot; i != NIL; i = protnext[i] )
if ( protcomst[i] == comstate )
{
minprot = i;
mindiff = tbldiff( state, minprot,
extrct[extptr] );
break;
}
}
else
{
/* Since we've decided that the most common destination
* out of "state" does not occur with a high enough
* frequency, we set the "comstate" to zero, assuring
* that if this state is entered into the proto list,
* it will not be considered a template.
*/
comstate = 0;
if ( firstprot != NIL )
{
minprot = firstprot;
mindiff = tbldiff( state, minprot,
extrct[extptr] );
}
}
/* We now have the first interesting proto in "minprot". If
* it matches within the tolerances set for the first proto,
* we don't want to bother scanning the rest of the proto list
* to see if we have any other reasonable matches.
*/
if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
{
/* Not a good enough match. Scan the rest of the
* protos.
*/
for ( i = minprot; i != NIL; i = protnext[i] )
{
d = tbldiff( state, i, extrct[1 - extptr] );
if ( d < mindiff )
{
extptr = 1 - extptr;
mindiff = d;
minprot = i;
}
}
}
/* Check if the proto we've decided on as our best bet is close
* enough to the state we want to match to be usable.
*/
if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
{
/* No good. If the state is homogeneous enough,
* we make a template out of it. Otherwise, we
* make a proto.
*/
if ( comfreq * 100 >=
totaltrans * TEMPLATE_SAME_PERCENTAGE )
mktemplate( state, statenum, comstate );
else
{
mkprot( state, statenum, comstate );
mkentry( state, numecs, statenum,
JAMSTATE, totaltrans );
}
}
else
{ /* use the proto */
mkentry( extrct[extptr], numecs, statenum,
prottbl[minprot], mindiff );
/* If this state was sufficiently different from the
* proto we built it from, make it, too, a proto.
*/
if ( mindiff * 100 >=
totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
mkprot( state, statenum, comstate );
/* Since mkprot added a new proto to the proto queue,
* it's possible that "minprot" is no longer on the
* proto queue (if it happened to have been the last
* entry, it would have been bumped off). If it's
* not there, then the new proto took its physical
* place (though logically the new proto is at the
* beginning of the queue), so in that case the
* following call will do nothing.
*/
mv2front( minprot );
}
}
}
/* cmptmps - compress template table entries
*
* Template tables are compressed by using the 'template equivalence
* classes', which are collections of transition character equivalence
* classes which always appear together in templates - really meta-equivalence
* classes.
*/
void cmptmps()
{
int tmpstorage[CSIZE + 1];
register int *tmp = tmpstorage, i, j;
int totaltrans, trans;
peakpairs = numtemps * numecs + tblend;
if ( usemecs )
{
/* Create equivalence classes based on data gathered on
* template transitions.
*/
nummecs = cre8ecs( tecfwd, tecbck, numecs );
}
else
nummecs = numecs;
while ( lastdfa + numtemps + 1 >= current_max_dfas )
increase_max_dfas();
/* Loop through each template. */
for ( i = 1; i <= numtemps; ++i )
{
/* Number of non-jam transitions out of this template. */
totaltrans = 0;
for ( j = 1; j <= numecs; ++j )
{
trans = tnxt[numecs * i + j];
if ( usemecs )
{
/* The absolute value of tecbck is the
* meta-equivalence class of a given
* equivalence class, as set up by cre8ecs().
*/
if ( tecbck[j] > 0 )
{
tmp[tecbck[j]] = trans;
if ( trans > 0 )
++totaltrans;
}
}
else
{
tmp[j] = trans;
if ( trans > 0 )
++totaltrans;
}
}
/* It is assumed (in a rather subtle way) in the skeleton
* that if we're using meta-equivalence classes, the def[]
* entry for all templates is the jam template, i.e.,
* templates never default to other non-jam table entries
* (e.g., another template)
*/
/* Leave room for the jam-state after the last real state. */
mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
}
}
/* expand_nxt_chk - expand the next check arrays */
void expand_nxt_chk()
{
register int old_max = current_max_xpairs;
current_max_xpairs += MAX_XPAIRS_INCREMENT;
++num_reallocs;
nxt = reallocate_integer_array( nxt, current_max_xpairs );
chk = reallocate_integer_array( chk, current_max_xpairs );
zero_out( (char *) (chk + old_max),
(size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) );
}
/* find_table_space - finds a space in the table for a state to be placed
*
* synopsis
* int *state, numtrans, block_start;
* int find_table_space();
*
* block_start = find_table_space( state, numtrans );
*
* State is the state to be added to the full speed transition table.
* Numtrans is the number of out-transitions for the state.
*
* find_table_space() returns the position of the start of the first block (in
* chk) able to accommodate the state
*
* In determining if a state will or will not fit, find_table_space() must take
* into account the fact that an end-of-buffer state will be added at [0],
* and an action number will be added in [-1].
*/
int find_table_space( state, numtrans )
int *state, numtrans;
{
/* Firstfree is the position of the first possible occurrence of two
* consecutive unused records in the chk and nxt arrays.
*/
register int i;
register int *state_ptr, *chk_ptr;
register int *ptr_to_last_entry_in_state;
/* If there are too many out-transitions, put the state at the end of
* nxt and chk.
*/
if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
{
/* If table is empty, return the first available spot in
* chk/nxt, which should be 1.
*/
if ( tblend < 2 )
return 1;
/* Start searching for table space near the end of
* chk/nxt arrays.
*/
i = tblend - numecs;
}
else
/* Start searching for table space from the beginning
* (skipping only the elements which will definitely not
* hold the new state).
*/
i = firstfree;
while ( 1 ) /* loops until a space is found */
{
while ( i + numecs >= current_max_xpairs )
expand_nxt_chk();
/* Loops until space for end-of-buffer and action number
* are found.
*/
while ( 1 )
{
/* Check for action number space. */
if ( chk[i - 1] == 0 )
{
/* Check for end-of-buffer space. */
if ( chk[i] == 0 )
break;
else
/* Since i != 0, there is no use
* checking to see if (++i) - 1 == 0,
* because that's the same as i == 0,
* so we skip a space.
*/
i += 2;
}
else
++i;
while ( i + numecs >= current_max_xpairs )
expand_nxt_chk();
}
/* If we started search from the beginning, store the new
* firstfree for the next call of find_table_space().
*/
if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
firstfree = i + 1;
/* Check to see if all elements in chk (and therefore nxt)
* that are needed for the new state have not yet been taken.
*/
state_ptr = &state[1];
ptr_to_last_entry_in_state = &chk[i + numecs + 1];
for ( chk_ptr = &chk[i + 1];
chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr )
if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
break;
if ( chk_ptr == ptr_to_last_entry_in_state )
return i;
else
++i;
}
}
/* inittbl - initialize transition tables
*
* Initializes "firstfree" to be one beyond the end of the table. Initializes
* all "chk" entries to be zero.
*/
void inittbl()
{
register int i;
zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) );
tblend = 0;
firstfree = tblend + 1;
numtemps = 0;
if ( usemecs )
{
/* Set up doubly-linked meta-equivalence classes; these
* are sets of equivalence classes which all have identical
* transitions out of TEMPLATES.
*/
tecbck[1] = NIL;
for ( i = 2; i <= numecs; ++i )
{
tecbck[i] = i - 1;
tecfwd[i - 1] = i;
}
tecfwd[numecs] = NIL;
}
}
/* mkdeftbl - make the default, "jam" table entries */
void mkdeftbl()
{
int i;
jamstate = lastdfa + 1;
++tblend; /* room for transition on end-of-buffer character */
while ( tblend + numecs >= current_max_xpairs )
expand_nxt_chk();
/* Add in default end-of-buffer transition. */
nxt[tblend] = end_of_buffer_state;
chk[tblend] = jamstate;
for ( i = 1; i <= numecs; ++i )
{
nxt[tblend + i] = 0;
chk[tblend + i] = jamstate;
}
jambase = tblend;
base[jamstate] = jambase;
def[jamstate] = 0;
tblend += numecs;
++numtemps;
}
/* mkentry - create base/def and nxt/chk entries for transition array
*
* synopsis
* int state[numchars + 1], numchars, statenum, deflink, totaltrans;
* mkentry( state, numchars, statenum, deflink, totaltrans );
*
* "state" is a transition array "numchars" characters in size, "statenum"
* is the offset to be used into the base/def tables, and "deflink" is the
* entry to put in the "def" table entry. If "deflink" is equal to
* "JAMSTATE", then no attempt will be made to fit zero entries of "state"
* (i.e., jam entries) into the table. It is assumed that by linking to
* "JAMSTATE" they will be taken care of. In any case, entries in "state"
* marking transitions to "SAME_TRANS" are treated as though they will be
* taken care of by whereever "deflink" points. "totaltrans" is the total
* number of transitions out of the state. If it is below a certain threshold,
* the tables are searched for an interior spot that will accommodate the
* state array.
*/
void mkentry( state, numchars, statenum, deflink, totaltrans )
register int *state;
int numchars, statenum, deflink, totaltrans;
{
register int minec, maxec, i, baseaddr;
int tblbase, tbllast;
if ( totaltrans == 0 )
{ /* there are no out-transitions */
if ( deflink == JAMSTATE )
base[statenum] = JAMSTATE;
else
base[statenum] = 0;
def[statenum] = deflink;
return;
}
for ( minec = 1; minec <= numchars; ++minec )
{
if ( state[minec] != SAME_TRANS )
if ( state[minec] != 0 || deflink != JAMSTATE )
break;
}
if ( totaltrans == 1 )
{
/* There's only one out-transition. Save it for later to fill
* in holes in the tables.
*/
stack1( statenum, minec, state[minec], deflink );
return;
}
for ( maxec = numchars; maxec > 0; --maxec )
{
if ( state[maxec] != SAME_TRANS )
if ( state[maxec] != 0 || deflink != JAMSTATE )
break;
}
/* Whether we try to fit the state table in the middle of the table
* entries we have already generated, or if we just take the state
* table at the end of the nxt/chk tables, we must make sure that we
* have a valid base address (i.e., non-negative). Note that
* negative base addresses dangerous at run-time (because indexing
* the nxt array with one and a low-valued character will access
* memory before the start of the array.
*/
/* Find the first transition of state that we need to worry about. */
if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
{
/* Attempt to squeeze it into the middle of the tables. */
baseaddr = firstfree;
while ( baseaddr < minec )
{
/* Using baseaddr would result in a negative base
* address below; find the next free slot.
*/
for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
;
}
while ( baseaddr + maxec - minec + 1 >= current_max_xpairs )
expand_nxt_chk();
for ( i = minec; i <= maxec; ++i )
if ( state[i] != SAME_TRANS &&
(state[i] != 0 || deflink != JAMSTATE) &&
chk[baseaddr + i - minec] != 0 )
{ /* baseaddr unsuitable - find another */
for ( ++baseaddr;
baseaddr < current_max_xpairs &&
chk[baseaddr] != 0; ++baseaddr )
;
while ( baseaddr + maxec - minec + 1 >=
current_max_xpairs )
expand_nxt_chk();
/* Reset the loop counter so we'll start all
* over again next time it's incremented.
*/
i = minec - 1;
}
}
else
{
/* Ensure that the base address we eventually generate is
* non-negative.
*/
baseaddr = MAX( tblend + 1, minec );
}
tblbase = baseaddr - minec;
tbllast = tblbase + maxec;
while ( tbllast + 1 >= current_max_xpairs )
expand_nxt_chk();
base[statenum] = tblbase;
def[statenum] = deflink;
for ( i = minec; i <= maxec; ++i )
if ( state[i] != SAME_TRANS )
if ( state[i] != 0 || deflink != JAMSTATE )
{
nxt[tblbase + i] = state[i];
chk[tblbase + i] = statenum;
}
if ( baseaddr == firstfree )
/* Find next free slot in tables. */
for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
;
tblend = MAX( tblend, tbllast );
}
/* mk1tbl - create table entries for a state (or state fragment) which
* has only one out-transition
*/
void mk1tbl( state, sym, onenxt, onedef )
int state, sym, onenxt, onedef;
{
if ( firstfree < sym )
firstfree = sym;
while ( chk[firstfree] != 0 )
if ( ++firstfree >= current_max_xpairs )
expand_nxt_chk();
base[state] = firstfree - sym;
def[state] = onedef;
chk[firstfree] = state;
nxt[firstfree] = onenxt;
if ( firstfree > tblend )
{
tblend = firstfree++;
if ( firstfree >= current_max_xpairs )
expand_nxt_chk();
}
}
/* mkprot - create new proto entry */
void mkprot( state, statenum, comstate )
int state[], statenum, comstate;
{
int i, slot, tblbase;
if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
{
/* Gotta make room for the new proto by dropping last entry in
* the queue.
*/
slot = lastprot;
lastprot = protprev[lastprot];
protnext[lastprot] = NIL;
}
else
slot = numprots;
protnext[slot] = firstprot;
if ( firstprot != NIL )
protprev[firstprot] = slot;
firstprot = slot;
prottbl[slot] = statenum;
protcomst[slot] = comstate;
/* Copy state into save area so it can be compared with rapidly. */
tblbase = numecs * (slot - 1);
for ( i = 1; i <= numecs; ++i )
protsave[tblbase + i] = state[i];
}
/* mktemplate - create a template entry based on a state, and connect the state
* to it
*/
void mktemplate( state, statenum, comstate )
int state[], statenum, comstate;
{
int i, numdiff, tmpbase, tmp[CSIZE + 1];
wchar_t transset[CSIZE + 1];
int tsptr;
++numtemps;
tsptr = 0;
/* Calculate where we will temporarily store the transition table
* of the template in the tnxt[] array. The final transition table
* gets created by cmptmps().
*/
tmpbase = numtemps * numecs;
if ( tmpbase + numecs >= current_max_template_xpairs )
{
current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
++num_reallocs;
tnxt = reallocate_integer_array( tnxt,
current_max_template_xpairs );
}
for ( i = 1; i <= numecs; ++i )
if ( state[i] == 0 )
tnxt[tmpbase + i] = 0;
else
{
transset[tsptr++] = i;
tnxt[tmpbase + i] = comstate;
}
if ( usemecs )
mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
mkprot( tnxt + tmpbase, -numtemps, comstate );
/* We rely on the fact that mkprot adds things to the beginning
* of the proto queue.
*/
numdiff = tbldiff( state, firstprot, tmp );
mkentry( tmp, numecs, statenum, -numtemps, numdiff );
}
/* mv2front - move proto queue element to front of queue */
void mv2front( qelm )
int qelm;
{
if ( firstprot != qelm )
{
if ( qelm == lastprot )
lastprot = protprev[lastprot];
protnext[protprev[qelm]] = protnext[qelm];
if ( protnext[qelm] != NIL )
protprev[protnext[qelm]] = protprev[qelm];
protprev[qelm] = NIL;
protnext[qelm] = firstprot;
protprev[firstprot] = qelm;
firstprot = qelm;
}
}
/* place_state - place a state into full speed transition table
*
* State is the statenum'th state. It is indexed by equivalence class and
* gives the number of the state to enter for a given equivalence class.
* Transnum is the number of out-transitions for the state.
*/
void place_state( state, statenum, transnum )
int *state, statenum, transnum;
{
register int i;
register int *state_ptr;
int position = find_table_space( state, transnum );
/* "base" is the table of start positions. */
base[statenum] = position;
/* Put in action number marker; this non-zero number makes sure that
* find_table_space() knows that this position in chk/nxt is taken
* and should not be used for another accepting number in another
* state.
*/
chk[position - 1] = 1;
/* Put in end-of-buffer marker; this is for the same purposes as
* above.
*/
chk[position] = 1;
/* Place the state into chk and nxt. */
state_ptr = &state[1];
for ( i = 1; i <= numecs; ++i, ++state_ptr )
if ( *state_ptr != 0 )
{
chk[position + i] = i;
nxt[position + i] = *state_ptr;
}
if ( position + numecs > tblend )
tblend = position + numecs;
}
/* stack1 - save states with only one out-transition to be processed later
*
* If there's room for another state on the "one-transition" stack, the
* state is pushed onto it, to be processed later by mk1tbl. If there's
* no room, we process the sucker right now.
*/
void stack1( statenum, sym, nextstate, deflink )
int statenum, sym, nextstate, deflink;
{
if ( onesp >= ONE_STACK_SIZE - 1 )
mk1tbl( statenum, sym, nextstate, deflink );
else
{
++onesp;
onestate[onesp] = statenum;
onesym[onesp] = sym;
onenext[onesp] = nextstate;
onedef[onesp] = deflink;
}
}
/* tbldiff - compute differences between two state tables
*
* "state" is the state array which is to be extracted from the pr'th
* proto. "pr" is both the number of the proto we are extracting from
* and an index into the save area where we can find the proto's complete
* state table. Each entry in "state" which differs from the corresponding
* entry of "pr" will appear in "ext".
*
* Entries which are the same in both "state" and "pr" will be marked
* as transitions to "SAME_TRANS" in "ext". The total number of differences
* between "state" and "pr" is returned as function value. Note that this
* number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
*/
int tbldiff( state, pr, ext )
int state[], pr, ext[];
{
register int i, *sp = state, *ep = ext, *protp;
register int numdiff = 0;
protp = &protsave[numecs * (pr - 1)];
for ( i = numecs; i > 0; --i )
{
if ( *++protp == *++sp )
*++ep = SAME_TRANS;
else
{
*++ep = *sp;
++numdiff;
}
}
return numdiff;
}