mirror of
https://github.com/westes/flex.git
synced 2026-01-26 15:39:06 +00:00
adding the rest of vern's files
This commit is contained in:
parent
26e78464e7
commit
2eae880030
15
examples/README
Normal file
15
examples/README
Normal file
@ -0,0 +1,15 @@
|
||||
This directory contains some examples of what you can do with
|
||||
flex. These files are not tested regularly so you might have to tinker
|
||||
a bit before they work for you. Updates, new files and patches are welcome.
|
||||
|
||||
- debflex.awk, an awk script for anotating flex debug output.
|
||||
It presently only works with gawk and mawk, not with "old"
|
||||
or "new" awk.
|
||||
|
||||
- testxxLexer.l, a sample C++ program that uses flex's scanner
|
||||
class option ("-+").
|
||||
|
||||
- fastwc/, a subdirectory containing examples of how to use flex
|
||||
to write progressively higher-performance versions of the Unix
|
||||
"wc" utility. This certainly should work with 2.5, but hasn't
|
||||
been tested.
|
||||
119
examples/debflex.awk
Normal file
119
examples/debflex.awk
Normal file
@ -0,0 +1,119 @@
|
||||
# Clarify the flex debug trace by substituting first line of each rule.
|
||||
# Francois Pinard <pinard@iro.umontreal.ca>, July 1990.
|
||||
#
|
||||
# Rewritten to process correctly \n's in scanner input.
|
||||
# BEGIN section modified to correct a collection of rules.
|
||||
# Michal Jaegermann <michal@phys.ualberta.ca>, December 1993
|
||||
#
|
||||
# Sample usage:
|
||||
# flex -d PROGRAM.l
|
||||
# gcc -o PROGRAM PROGRAM.c -lfl
|
||||
# PROGRAM 2>&1 | gawk -f debflex.awk PROGRAM.l
|
||||
#
|
||||
# (VP's note: this script presently does not work with either "old" or
|
||||
# "new" awk; fixes so it does will be welcome)
|
||||
|
||||
BEGIN {
|
||||
# Insure proper usage.
|
||||
|
||||
if (ARGC != 2) {
|
||||
print "usage: gawk -f debflex.awk FLEX_SOURCE <DEBUG_OUTPUT";
|
||||
exit (1);
|
||||
}
|
||||
|
||||
# Remove and save the name of flex source.
|
||||
|
||||
source = ARGV[1];
|
||||
ARGC--;
|
||||
|
||||
# Swallow the flex source file.
|
||||
|
||||
line = 0;
|
||||
section = 1;
|
||||
while (getline <source) {
|
||||
|
||||
# Count the lines.
|
||||
|
||||
line++;
|
||||
|
||||
# Count the sections. When encountering section 3,
|
||||
# break out of the awk BEGIN block.
|
||||
|
||||
if (match ($0, /^%%/)) {
|
||||
section++;
|
||||
if (section == 3) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
# Only the lines in section 2 which do not begin in a
|
||||
# tab or space might be referred to by the flex debug
|
||||
# trace. Save only those lines.
|
||||
|
||||
if (section == 2 && match ($0, /^[^ \t]/)) {
|
||||
rules[line] = $0;
|
||||
}
|
||||
}
|
||||
}
|
||||
dashes = "-----------------------------------------------------------";
|
||||
collect = "";
|
||||
line = 0;
|
||||
}
|
||||
|
||||
# collect complete rule output from a scanner
|
||||
$0 !~ /^--/ {
|
||||
collect = collect "\n" $0;
|
||||
next;
|
||||
}
|
||||
# otherwise we have a new rule - process what we got so far
|
||||
{
|
||||
process();
|
||||
}
|
||||
# and the same thing if we hit EOF
|
||||
END {
|
||||
process();
|
||||
}
|
||||
|
||||
function process() {
|
||||
|
||||
# splitting this way we loose some double dashes and
|
||||
# left parentheses from echoed input - a small price to pay
|
||||
n = split(collect, field, "\n--|[(]");
|
||||
|
||||
# this loop kicks in only when we already collected something
|
||||
for (i = 1; i <= n; i++) {
|
||||
if (0 != line) {
|
||||
# we do not care for traces of newlines.
|
||||
if (0 == match(field[i], /\"\n+\"[)]/)) {
|
||||
if (rules[line]) {
|
||||
text = field[i];
|
||||
while ( ++i <= n) {
|
||||
text = text field[i];
|
||||
}
|
||||
printf("%s:%d: %-8s -- %s\n",
|
||||
source, line, text, rules[line]);
|
||||
}
|
||||
else {
|
||||
print;
|
||||
printf "%s:%d: *** No such rule.\n", source, line;
|
||||
}
|
||||
}
|
||||
line = 0;
|
||||
break;
|
||||
}
|
||||
if ("" != field[i]) {
|
||||
if ("end of buffer or a NUL)" == field[i]) {
|
||||
print dashes; # Simplify trace of buffer reloads
|
||||
continue;
|
||||
}
|
||||
if (match(field[i], /accepting rule at line /)) {
|
||||
# force interpretation of line as a number
|
||||
line = 0 + substr(field[i], RLENGTH);
|
||||
continue;
|
||||
}
|
||||
# echo everything else
|
||||
printf("--%s\n", field[i]);
|
||||
}
|
||||
}
|
||||
collect = "\n" $0; # ... and start next trace
|
||||
}
|
||||
24
examples/manual/ChangeLog
Normal file
24
examples/manual/ChangeLog
Normal file
@ -0,0 +1,24 @@
|
||||
Tue Oct 5 21:51:59 1993 Vern Paxson
|
||||
|
||||
* Removed FILTER/ subdirectory.
|
||||
|
||||
* Removed alloca.c.
|
||||
|
||||
* Changed Makefile definition of CC to just "gcc -g", removed
|
||||
assumption of alloca being present.
|
||||
|
||||
* Added pointer to MISC/fastwc/ to wc.lex.
|
||||
|
||||
Tue Jun 8 15:47:39 1993 Gavin Thomas Nicol (nick at sillybugs)
|
||||
|
||||
* Changed main() in wc.lex extensively. The old version would not
|
||||
work correctly without the YY_NEW_FILE. (lex handles the older
|
||||
version OK though).
|
||||
|
||||
* Added a rule to expr.lex to handle whitespace. The old version
|
||||
reported an illegal character.
|
||||
|
||||
* Added -traditional to the gcc flags because the flex definition
|
||||
for free() clashes with some systems that have old header files.
|
||||
|
||||
|
||||
88
examples/manual/Makefile
Normal file
88
examples/manual/Makefile
Normal file
@ -0,0 +1,88 @@
|
||||
#############################################################
|
||||
#
|
||||
# Makefile : Makefile for Flex examples.
|
||||
# Author : G.T.Nicol
|
||||
# Last Updated : 1993/10/05
|
||||
#
|
||||
# If you use bison, you may have to supply an alloca
|
||||
#
|
||||
#############################################################
|
||||
|
||||
CC = gcc -g
|
||||
LEX = flex -i -I
|
||||
YACC = bison -d -y
|
||||
ALLOCA =
|
||||
|
||||
############################################################
|
||||
#
|
||||
# DO NOT CHANGE ANYTHING FROM HERE ON !!!!!!!!!
|
||||
#
|
||||
############################################################
|
||||
|
||||
all: expr front myname eof wc replace user_act string1\
|
||||
string2 yymore numbers dates cat
|
||||
|
||||
expr: expr.y expr.lex
|
||||
$(YACC) expr.y
|
||||
$(LEX) expr.lex
|
||||
$(CC) -o expr lex.yy.c y.tab.c $(ALLOCA) -ll -lm
|
||||
|
||||
front: front.y front.lex
|
||||
$(YACC) front.y
|
||||
$(LEX) front.lex
|
||||
$(CC) -o front lex.yy.c y.tab.c $(ALLOCA) -ll -lm
|
||||
|
||||
numbers: numbers.lex
|
||||
$(LEX) numbers.lex
|
||||
$(CC) lex.yy.c -o numbers
|
||||
|
||||
dates: dates.lex
|
||||
$(LEX) dates.lex
|
||||
$(CC) lex.yy.c -o dates -ll
|
||||
|
||||
yymore: yymore.lex
|
||||
$(LEX) yymore.lex
|
||||
$(CC) lex.yy.c -o yymore -ll
|
||||
|
||||
string1: string1.lex
|
||||
$(LEX) string1.lex
|
||||
$(CC) lex.yy.c -o string1 -ll
|
||||
|
||||
string2: string2.lex
|
||||
$(LEX) string2.lex
|
||||
$(CC) lex.yy.c -o string2 -ll
|
||||
|
||||
myname: myname.lex
|
||||
$(LEX) myname.lex
|
||||
$(CC) lex.yy.c -o myname -ll
|
||||
|
||||
myname2: myname2.lex
|
||||
$(LEX) myname2.lex
|
||||
$(CC) lex.yy.c -o myname2 -ll
|
||||
|
||||
eof: eof_rules.lex
|
||||
$(LEX) eof_rules.lex
|
||||
$(CC) lex.yy.c -o eof -ll
|
||||
|
||||
wc: wc.lex
|
||||
$(LEX) wc.lex
|
||||
$(CC) lex.yy.c -o wc -ll
|
||||
|
||||
cat: cat.lex
|
||||
$(LEX) cat.lex
|
||||
$(CC) lex.yy.c -o cat -ll
|
||||
|
||||
replace: replace.lex
|
||||
$(LEX) replace.lex
|
||||
$(CC) lex.yy.c -o replace -ll
|
||||
|
||||
user_act: expr.y expr.lex
|
||||
$(LEX) user_act.lex
|
||||
$(CC) -o user_act lex.yy.c -ll
|
||||
|
||||
clean:
|
||||
rm -f *.BAK *.o core *~* *.a
|
||||
rm -f *.tab.h *.tab.c
|
||||
rm -f myname expr lex.yy.c *.out eof wc yymore
|
||||
rm -f replace front user_act string1 string2
|
||||
rm -f dates numbers cat
|
||||
17
examples/manual/README
Normal file
17
examples/manual/README
Normal file
@ -0,0 +1,17 @@
|
||||
This directory contains the example programs from the manual, and a few
|
||||
other things as well. To make all the programs, simply type "make",
|
||||
and assuming you have flex and gcc, all will be well.
|
||||
|
||||
To build the programs individually, type
|
||||
|
||||
make program_name
|
||||
|
||||
For example:
|
||||
|
||||
make expr
|
||||
|
||||
|
||||
The subdirectory FILTER contains a collection of the silly filters
|
||||
that have appeared on the Internet over the years. The author of the
|
||||
flex manual has included them for fun, but does not guarantee they will
|
||||
work with flex, or even work at all.
|
||||
45
examples/manual/cat.lex
Normal file
45
examples/manual/cat.lex
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* cat.lex: A demonstration of YY_NEW_FILE.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
|
||||
char **names = NULL;
|
||||
int current = 1;
|
||||
%}
|
||||
|
||||
%%
|
||||
<<EOF>> {
|
||||
current += 1;
|
||||
if(names[current] != NULL){
|
||||
yyin = fopen(names[current],"r");
|
||||
if(yyin == NULL){
|
||||
fprintf(stderr,"cat: unable to open %s\n",
|
||||
names[current]);
|
||||
yyterminate();
|
||||
}
|
||||
YY_NEW_FILE;
|
||||
} else {
|
||||
yyterminate();
|
||||
}
|
||||
}
|
||||
%%
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if(argc < 2){
|
||||
fprintf(stderr,"Usage: cat files....\n");
|
||||
exit(1);
|
||||
}
|
||||
names = argv;
|
||||
|
||||
yyin = fopen(names[current],"r");
|
||||
if(yyin == NULL){
|
||||
fprintf(stderr,"cat: unable to open %s\n",
|
||||
names[current]);
|
||||
yyterminate();
|
||||
}
|
||||
|
||||
yylex();
|
||||
}
|
||||
106
examples/manual/dates.lex
Normal file
106
examples/manual/dates.lex
Normal file
@ -0,0 +1,106 @@
|
||||
/*
|
||||
* dates.lex: An example of using start states to
|
||||
* distinguish between different date formats.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <ctype.h>
|
||||
|
||||
char month[20],dow[20],day[20],year[20];
|
||||
|
||||
%}
|
||||
|
||||
skip of|the|[ \t,]*
|
||||
|
||||
mon (mon(day)?)
|
||||
tue (tue(sday)?)
|
||||
wed (wed(nesday)?)
|
||||
thu (thu(rsday)?)
|
||||
fri (fri(day)?)
|
||||
sat (sat(urday)?)
|
||||
sun (sun(day)?)
|
||||
|
||||
day_of_the_week ({mon}|{tue}|{wed}|{thu}|{fri}|{sat}|{sun})
|
||||
|
||||
jan (jan(uary)?)
|
||||
feb (feb(ruary)?)
|
||||
mar (mar(ch)?)
|
||||
apr (apr(il)?)
|
||||
may (may)
|
||||
jun (jun(e)?)
|
||||
jul (jul(y)?)
|
||||
aug (aug(ust)?)
|
||||
sep (sep(tember)?)
|
||||
oct (oct(ober)?)
|
||||
nov (nov(ember)?)
|
||||
dec (dec(ember)?)
|
||||
|
||||
first_half ({jan}|{feb}|{mar}|{apr}|{may}|{jun})
|
||||
second_half ({jul}|{aug}|{sep}|{oct}|{nov}|{dec})
|
||||
month {first_half}|{second_half}
|
||||
|
||||
nday [1-9]|[1-2][0-9]|3[0-1]
|
||||
nmonth [1-9]|1[0-2]
|
||||
nyear [0-9]{1,4}
|
||||
|
||||
year_ext (ad|AD|bc|BC)?
|
||||
day_ext (st|nd|rd|th)?
|
||||
|
||||
%s LONG SHORT
|
||||
%s DAY DAY_FIRST YEAR_FIRST YEAR_LAST YFMONTH YLMONTH
|
||||
|
||||
%%
|
||||
|
||||
/* the default is month-day-year */
|
||||
|
||||
<LONG>{day_of_the_week} strcpy(dow,yytext);
|
||||
<LONG>{month} strcpy(month,yytext); BEGIN(DAY);
|
||||
|
||||
/* handle the form: day-month-year */
|
||||
|
||||
<LONG>{nday}{day_ext} strcpy(day,yytext); BEGIN(DAY_FIRST);
|
||||
<DAY_FIRST>{month} strcpy(month,yytext); BEGIN(LONG);
|
||||
<DAY>{nday}{day_ext} strcpy(day,yytext); BEGIN(LONG);
|
||||
|
||||
<LONG>{nyear}{year_ext} {
|
||||
printf("Long:\n");
|
||||
printf(" DOW : %s \n",dow);
|
||||
printf(" Day : %s \n",day);
|
||||
printf(" Month : %s \n",month);
|
||||
printf(" Year : %s \n",yytext);
|
||||
strcpy(dow,"");
|
||||
strcpy(day,"");
|
||||
strcpy(month,"");
|
||||
}
|
||||
|
||||
/* handle dates of the form: day-month-year */
|
||||
|
||||
<SHORT>{nday} strcpy(day,yytext); BEGIN(YEAR_LAST);
|
||||
<YEAR_LAST>{nmonth} strcpy(month,yytext);BEGIN(YLMONTH);
|
||||
<YLMONTH>{nyear} strcpy(year,yytext); BEGIN(SHORT);
|
||||
|
||||
/* handle dates of the form: year-month-day */
|
||||
|
||||
<SHORT>{nyear} strcpy(year,yytext); BEGIN(YEAR_FIRST);
|
||||
<YEAR_FIRST>{nmonth} strcpy(month,yytext);BEGIN(YFMONTH);
|
||||
<YFMONTH>{nday} strcpy(day,yytext); BEGIN(SHORT);
|
||||
|
||||
|
||||
<SHORT>\n {
|
||||
printf("Short:\n");
|
||||
printf(" Day : %s \n",day);
|
||||
printf(" Month : %s \n",month);
|
||||
printf(" Year : %s \n",year);
|
||||
strcpy(year,"");
|
||||
strcpy(day,"");
|
||||
strcpy(month,"");
|
||||
}
|
||||
|
||||
long\n BEGIN(LONG);
|
||||
short\n BEGIN(SHORT);
|
||||
|
||||
{skip}*
|
||||
\n
|
||||
.
|
||||
|
||||
|
||||
28
examples/manual/datetest.dat
Normal file
28
examples/manual/datetest.dat
Normal file
@ -0,0 +1,28 @@
|
||||
short
|
||||
1989:12:23
|
||||
1989:11:12
|
||||
23:12:1989
|
||||
11:12:1989
|
||||
1989/12/23
|
||||
1989/11/12
|
||||
23/12/1989
|
||||
11/12/1989
|
||||
1989-12-23
|
||||
1989-11-12
|
||||
23-12-1989
|
||||
11-12-1989
|
||||
long
|
||||
Friday the 5th of January, 1989
|
||||
Friday, 5th of January, 1989
|
||||
Friday, January 5th, 1989
|
||||
Fri, January 5th, 1989
|
||||
Fri, Jan 5th, 1989
|
||||
Fri, Jan 5, 1989
|
||||
FriJan 5, 1989
|
||||
FriJan5, 1989
|
||||
FriJan51989
|
||||
Jan51989
|
||||
|
||||
|
||||
|
||||
|
||||
65
examples/manual/eof_rules.lex
Normal file
65
examples/manual/eof_rules.lex
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* eof_rules.lex : An example of using multiple buffers
|
||||
* EOF rules, and start states
|
||||
*/
|
||||
|
||||
%{
|
||||
|
||||
#define MAX_NEST 10
|
||||
|
||||
YY_BUFFER_STATE include_stack[MAX_NEST];
|
||||
int include_count = -1;
|
||||
|
||||
%}
|
||||
|
||||
|
||||
%x INCLUDE
|
||||
|
||||
%%
|
||||
|
||||
^"#include"[ \t]*\" BEGIN(INCLUDE);
|
||||
<INCLUDE>\" BEGIN(INITIAL);
|
||||
<INCLUDE>[^\"]+ { /* get the include file name */
|
||||
if ( include_count >= MAX_NEST){
|
||||
fprintf( stderr, "Too many include files" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
include_stack[++include_count] = YY_CURRENT_BUFFER;
|
||||
|
||||
yyin = fopen( yytext, "r" );
|
||||
if ( ! yyin ){
|
||||
fprintf( stderr, "Unable to open \"%s\"\n",yytext);
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
|
||||
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
<INCLUDE><<EOF>>
|
||||
{
|
||||
fprintf( stderr, "EOF in include" );
|
||||
yyterminate();
|
||||
}
|
||||
<<EOF>> {
|
||||
if ( include_count <= 0 ){
|
||||
yyterminate();
|
||||
} else {
|
||||
yy_delete_buffer(include_stack[include_count--] );
|
||||
yy_switch_to_buffer(include_stack[include_count] );
|
||||
BEGIN(INCLUDE);
|
||||
}
|
||||
}
|
||||
[a-z]+ ECHO;
|
||||
.|\n ECHO;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
17
examples/manual/eof_test01.txt
Normal file
17
examples/manual/eof_test01.txt
Normal file
@ -0,0 +1,17 @@
|
||||
This is test file #1
|
||||
|
||||
-------------------------------------------------
|
||||
|
||||
We will now include test #2 in a standard way.
|
||||
|
||||
#include "eof_test02.txt"
|
||||
|
||||
-------------------------------------------------
|
||||
|
||||
And now we will include test # 2 with a different
|
||||
format
|
||||
|
||||
#include "eof_test02.txt"
|
||||
-------------------------------------------------
|
||||
|
||||
and this is the end of the test.
|
||||
8
examples/manual/eof_test02.txt
Normal file
8
examples/manual/eof_test02.txt
Normal file
@ -0,0 +1,8 @@
|
||||
INCLUDE #2
|
||||
|
||||
This is the second file that will
|
||||
be included.
|
||||
|
||||
>>> Foo are GNU?
|
||||
|
||||
#include "eof_test03.txt"
|
||||
7
examples/manual/eof_test03.txt
Normal file
7
examples/manual/eof_test03.txt
Normal file
@ -0,0 +1,7 @@
|
||||
INCLUDE #3
|
||||
|
||||
This is the third file that will
|
||||
be included.
|
||||
|
||||
>>> echo "I am `whoami`!!"
|
||||
|
||||
35
examples/manual/expr.lex
Normal file
35
examples/manual/expr.lex
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* expr.lex : Scanner for a simple
|
||||
* expression parser.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include "y.tab.h"
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
[0-9]+ { yylval.val = atol(yytext);
|
||||
return(NUMBER);
|
||||
}
|
||||
[0-9]+\.[0-9]+ {
|
||||
sscanf(yytext,"%f",&yylval.val);
|
||||
return(NUMBER);
|
||||
}
|
||||
"+" return(PLUS);
|
||||
"-" return(MINUS);
|
||||
"*" return(MULT);
|
||||
"/" return(DIV);
|
||||
"^" return(EXPON);
|
||||
"(" return(LB);
|
||||
")" return(RB);
|
||||
\n return(EOL);
|
||||
[\t ]* /* throw away whitespace */
|
||||
. { yyerror("Illegal character");
|
||||
return(EOL);
|
||||
}
|
||||
%%
|
||||
|
||||
|
||||
|
||||
64
examples/manual/expr.y
Normal file
64
examples/manual/expr.y
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* expr.y : A simple yacc expression parser
|
||||
* Based on the Bison manual example.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
%}
|
||||
|
||||
%union {
|
||||
float val;
|
||||
}
|
||||
|
||||
%token NUMBER
|
||||
%token PLUS MINUS MULT DIV EXPON
|
||||
%token EOL
|
||||
%token LB RB
|
||||
|
||||
%left MINUS PLUS
|
||||
%left MULT DIV
|
||||
%right EXPON
|
||||
|
||||
%type <val> exp NUMBER
|
||||
|
||||
%%
|
||||
input :
|
||||
| input line
|
||||
;
|
||||
|
||||
line : EOL
|
||||
| exp EOL { printf("%g\n",$1);}
|
||||
|
||||
exp : NUMBER { $$ = $1; }
|
||||
| exp PLUS exp { $$ = $1 + $3; }
|
||||
| exp MINUS exp { $$ = $1 - $3; }
|
||||
| exp MULT exp { $$ = $1 * $3; }
|
||||
| exp DIV exp { $$ = $1 / $3; }
|
||||
| MINUS exp %prec MINUS { $$ = -$2; }
|
||||
| exp EXPON exp { $$ = pow($1,$3);}
|
||||
| LB exp RB { $$ = $2; }
|
||||
;
|
||||
|
||||
%%
|
||||
|
||||
yyerror(char *message)
|
||||
{
|
||||
printf("%s\n",message);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
yyparse();
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
40
examples/manual/front.lex
Normal file
40
examples/manual/front.lex
Normal file
@ -0,0 +1,40 @@
|
||||
%{
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "y.tab.h" /* this comes from bison */
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
#define copy_and_return(token_type) { strcpy(yylval.name,yytext); \
|
||||
return(token_type); }
|
||||
|
||||
int yylexlinenum = 0; /* so we can count lines */
|
||||
%}
|
||||
|
||||
%%
|
||||
/* Lexical scanning rules begin from here. */
|
||||
|
||||
MEN|WOMEN|STOCKS|TREES copy_and_return(NOUN)
|
||||
MISTAKES|GNUS|EMPLOYEES copy_and_return(NOUN)
|
||||
LOSERS|USERS|CARS|WINDOWS copy_and_return(NOUN)
|
||||
|
||||
DATABASE|NETWORK|FSF|GNU copy_and_return(PROPER_NOUN)
|
||||
COMPANY|HOUSE|OFFICE|LPF copy_and_return(PROPER_NOUN)
|
||||
|
||||
THE|THIS|THAT|THOSE copy_and_return(DECLARATIVE)
|
||||
|
||||
ALL|FIRST|LAST copy_and_return(CONDITIONAL)
|
||||
|
||||
FIND|SEARCH|SORT|ERASE|KILL copy_and_return(VERB)
|
||||
ADD|REMOVE|DELETE|PRINT copy_and_return(VERB)
|
||||
|
||||
QUICKLY|SLOWLY|CAREFULLY copy_and_return(ADVERB)
|
||||
|
||||
IN|AT|ON|AROUND|INSIDE|ON copy_and_return(POSITIONAL)
|
||||
|
||||
"." return(PERIOD);
|
||||
"\n" yylexlinenum++; return(NEWLINE);
|
||||
.
|
||||
%%
|
||||
|
||||
118
examples/manual/front.y
Normal file
118
examples/manual/front.y
Normal file
@ -0,0 +1,118 @@
|
||||
/* C code supplied at the beginning of the file. */
|
||||
|
||||
%{
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
extern int yylexlinenum; /* these are in YYlex */
|
||||
extern char *yytext; /* current token */
|
||||
|
||||
|
||||
%}
|
||||
|
||||
/* Keywords and reserved words begin here. */
|
||||
|
||||
%union{ /* this is the data union */
|
||||
char name[128]; /* names */
|
||||
}
|
||||
|
||||
/*-------------------- the reserved words -----------------------------*/
|
||||
|
||||
%token PERIOD
|
||||
%token NEWLINE
|
||||
%token POSITIONAL
|
||||
|
||||
%token VERB
|
||||
%token ADVERB
|
||||
|
||||
%token PROPER_NOUN
|
||||
%token NOUN
|
||||
|
||||
%token DECLARATIVE
|
||||
%token CONDITIONAL
|
||||
|
||||
|
||||
%type <name> declarative
|
||||
%type <name> verb_phrase
|
||||
%type <name> noun_phrase
|
||||
%type <name> position_phrase
|
||||
%type <name> adverb
|
||||
|
||||
%type <name> POSITIONAL VERB ADVERB PROPER_NOUN
|
||||
%type <name> NOUN DECLARATIVE CONDITIONAL
|
||||
|
||||
%%
|
||||
|
||||
sentence_list : sentence
|
||||
| sentence_list NEWLINE sentence
|
||||
;
|
||||
|
||||
|
||||
sentence : verb_phrase noun_phrase position_phrase adverb period
|
||||
{
|
||||
printf("I understand that sentence.\n");
|
||||
printf("VP = %s \n",$1);
|
||||
printf("NP = %s \n",$2);
|
||||
printf("PP = %s \n",$3);
|
||||
printf("AD = %s \n",$4);
|
||||
}
|
||||
| { yyerror("That's a strange sentence !!"); }
|
||||
;
|
||||
|
||||
position_phrase : POSITIONAL declarative PROPER_NOUN
|
||||
{
|
||||
sprintf($$,"%s %s %s",$1,$2,$3);
|
||||
}
|
||||
| /* empty */ { strcpy($$,""); }
|
||||
;
|
||||
|
||||
|
||||
verb_phrase : VERB { strcpy($$,$1); strcat($$," "); }
|
||||
| adverb VERB
|
||||
{
|
||||
sprintf($$,"%s %s",$1,$2);
|
||||
}
|
||||
;
|
||||
|
||||
adverb : ADVERB { strcpy($$,$1); }
|
||||
| /* empty */ { strcpy($$,""); }
|
||||
;
|
||||
|
||||
noun_phrase : DECLARATIVE NOUN
|
||||
{
|
||||
sprintf($$,"%s %s",$1,$2);
|
||||
}
|
||||
| CONDITIONAL declarative NOUN
|
||||
{
|
||||
sprintf($$,"%s %s %s",$1,$2,$3);
|
||||
}
|
||||
| NOUN { strcpy($$,$1); strcat($$," "); }
|
||||
;
|
||||
|
||||
declarative : DECLARATIVE { strcpy($$,$1); }
|
||||
| /* empty */ { strcpy($$,""); }
|
||||
;
|
||||
|
||||
period : /* empty */
|
||||
| PERIOD
|
||||
;
|
||||
|
||||
|
||||
%%
|
||||
|
||||
/* Supplied main() and yyerror() functions. */
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
yyparse(); /* parse the file */
|
||||
return(0);
|
||||
}
|
||||
|
||||
int yyerror(char *message)
|
||||
{
|
||||
extern FILE *yyout;
|
||||
|
||||
fprintf(yyout,"\nError at line %5d. (%s) \n",
|
||||
yylexlinenum,message);
|
||||
}
|
||||
442
examples/manual/j2t.lex
Normal file
442
examples/manual/j2t.lex
Normal file
@ -0,0 +1,442 @@
|
||||
/*
|
||||
* j2t.lex : An example of the use (possibly abuse!)
|
||||
* of start states.
|
||||
*/
|
||||
|
||||
%{
|
||||
#define MAX_STATES 1024
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
#define CHAPTER "@chapter"
|
||||
#define SECTION "@section"
|
||||
#define SSECTION "@subsection"
|
||||
#define SSSECTION "@subsubsection"
|
||||
|
||||
int states[MAX_STATES];
|
||||
int statep = 0;
|
||||
|
||||
int need_closing = FALSE;
|
||||
|
||||
char buffer[YY_BUF_SIZE];
|
||||
|
||||
extern char *yytext;
|
||||
|
||||
/*
|
||||
* set up the head of the *.texinfo file the program
|
||||
* will produce. This is a standard texinfo header.
|
||||
*/
|
||||
|
||||
void print_header(void)
|
||||
{
|
||||
printf("\\input texinfo @c -*-texinfo-*-\n");
|
||||
printf("@c %c**start of header\n",'%');
|
||||
printf("@setfilename jargon.info\n");
|
||||
printf("@settitle The New Hackers Dictionary\n");
|
||||
printf("@synindex fn cp\n");
|
||||
printf("@synindex vr cp\n");
|
||||
printf("@c %c**end of header\n",'%');
|
||||
printf("@setchapternewpage odd\n");
|
||||
printf("@finalout\n");
|
||||
printf("@c @smallbook\n");
|
||||
printf("\n");
|
||||
printf("@c ==========================================================\n\n");
|
||||
printf("@c This file was produced by j2t. Any mistakes are *not* the\n");
|
||||
printf("@c fault of the jargon file editors. \n");
|
||||
printf("@c ==========================================================\n\n");
|
||||
printf("@titlepage\n");
|
||||
printf("@title The New Hackers Dictionary\n");
|
||||
printf("@subtitle Version 2.9.10\n");
|
||||
printf("@subtitle Generated by j2t\n");
|
||||
printf("@author Eric S. Raymond, Guy L. Steel, Mark Crispin et al.\n");
|
||||
printf("@end titlepage\n");
|
||||
printf("@page\n");
|
||||
printf("\n@c ==========================================================\n");
|
||||
printf("\n\n");
|
||||
printf("@unnumbered Preface\n");
|
||||
printf("@c *******\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* create the tail of the texinfo file produced.
|
||||
*/
|
||||
|
||||
void print_trailer(void)
|
||||
{
|
||||
printf("\n@c ==========================================================\n");
|
||||
printf("@contents\n"); /* print the table of contents */
|
||||
printf("@bye\n\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* write an underline under a section
|
||||
* or chapter so we can find it later.
|
||||
*/
|
||||
|
||||
void write_underline(int len, int space, char ch)
|
||||
{
|
||||
int loop;
|
||||
|
||||
printf("@c ");
|
||||
|
||||
for(loop=3; loop<space; loop++){
|
||||
printf(" ");
|
||||
}
|
||||
|
||||
while(len--){
|
||||
printf("%c",ch);
|
||||
}
|
||||
printf("\n\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* check for texinfo special characters
|
||||
* and escape them
|
||||
*/
|
||||
|
||||
char *check_and_convert(char *string)
|
||||
{
|
||||
int buffpos = 0;
|
||||
int len,loop;
|
||||
|
||||
len = strlen(string);
|
||||
for(loop=0; loop<len; loop++){
|
||||
if(string[loop] == '@' || string[loop] == '{' || string[loop] == '}'){
|
||||
buffer[buffpos++] = '@';
|
||||
buffer[buffpos++] = string[loop];
|
||||
} else {
|
||||
buffer[buffpos++] = string[loop];
|
||||
}
|
||||
}
|
||||
buffer[buffpos] = '\0';
|
||||
return(buffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* write out a chapter,section, or subsection
|
||||
* header
|
||||
*/
|
||||
|
||||
void write_block_header(char *type)
|
||||
{
|
||||
int loop;
|
||||
int len;
|
||||
|
||||
(void)check_and_convert(yytext);
|
||||
len = strlen(buffer);
|
||||
for(loop=0; buffer[loop] != '\n';loop++)
|
||||
;
|
||||
buffer[loop] = '\0';
|
||||
printf("%s %s\n",type,buffer);
|
||||
write_underline(strlen(buffer),strlen(type)+1,'*');
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
/*
|
||||
* the flex description starts here
|
||||
*/
|
||||
|
||||
%x HEADING EXAMPLE ENUM EXAMPLE2
|
||||
%x BITEM BITEM_ITEM
|
||||
%s LITEM LITEM2
|
||||
|
||||
%%
|
||||
|
||||
^#[^#]*"#" /* skip the header & trailer */
|
||||
/* chapters have asterisks under them
|
||||
* and are terminated by a colon
|
||||
*/
|
||||
^[^\n:]+\n[*]+\n write_block_header(CHAPTER);
|
||||
|
||||
^"= "[A-Z]" ="\n"="* { /* we create a seciton for each category */
|
||||
if(need_closing == TRUE){
|
||||
printf("@end table\n\n\n");
|
||||
}
|
||||
need_closing = TRUE;
|
||||
write_block_header(SECTION);
|
||||
printf("\n\n@table @b\n");
|
||||
}
|
||||
|
||||
"Examples:"[^\.]+ ECHO;
|
||||
|
||||
"*"[^*\n]+"*" { /* @emph{}(emphasized) text */
|
||||
yytext[yyleng-1] = '\0';
|
||||
(void)check_and_convert(&yytext[1]);
|
||||
printf("@i{%s}",buffer);
|
||||
}
|
||||
|
||||
"{{"[^}]+"}}" { /* special emphasis */
|
||||
yytext[yyleng-2] = '\0';
|
||||
(void)check_and_convert(&yytext[2]);
|
||||
printf("@b{%s}",buffer);
|
||||
}
|
||||
|
||||
"{"[^}]+"}" { /* special emphasis */
|
||||
yytext[yyleng-1] = '\0';
|
||||
(void)check_and_convert(&yytext[1]);
|
||||
printf("@b{%s}",buffer);
|
||||
}
|
||||
|
||||
/* escape some special texinfo characters */
|
||||
<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"@" printf("@@");
|
||||
<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"{" printf("@{");
|
||||
<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"}" printf("@}");
|
||||
|
||||
/*
|
||||
* reproduce @example code
|
||||
*/
|
||||
|
||||
":"\n+[^\n0-9*]+\n" "[^ ] {
|
||||
int loop;
|
||||
int len;
|
||||
int cnt;
|
||||
|
||||
printf(":\n\n@example \n");
|
||||
strcpy(buffer,yytext);
|
||||
len = strlen(buffer);
|
||||
cnt = 0;
|
||||
for(loop=len; loop > 0;loop--){
|
||||
if(buffer[loop] == '\n')
|
||||
cnt++;
|
||||
if(cnt == 2)
|
||||
break;
|
||||
}
|
||||
yyless(loop+1);
|
||||
statep++;
|
||||
states[statep] = EXAMPLE2;
|
||||
BEGIN(EXAMPLE2);
|
||||
}
|
||||
<EXAMPLE,EXAMPLE2>^\n {
|
||||
printf("@end example\n\n");
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
|
||||
/*
|
||||
* repoduce @enumerate lists
|
||||
*/
|
||||
|
||||
":"\n+[ \t]*[0-9]+"." {
|
||||
int loop;
|
||||
int len;
|
||||
|
||||
printf(":\n\n@enumerate \n");
|
||||
strcpy(buffer,yytext);
|
||||
len = strlen(buffer);
|
||||
for(loop=len; loop > 0;loop--){
|
||||
if(buffer[loop] == '\n')
|
||||
break;
|
||||
}
|
||||
yyless(loop);
|
||||
statep++;
|
||||
states[statep] = ENUM;
|
||||
BEGIN(ENUM);
|
||||
}
|
||||
|
||||
<ENUM>"@" printf("@@");
|
||||
<ENUM>":"\n+" "[^0-9] {
|
||||
printf(":\n\n@example\n");
|
||||
statep++;
|
||||
states[statep] = EXAMPLE;
|
||||
BEGIN(EXAMPLE);
|
||||
}
|
||||
|
||||
|
||||
<ENUM>\n[ \t]+[0-9]+"." {
|
||||
printf("\n\n@item ");
|
||||
}
|
||||
<ENUM>^[^ ] |
|
||||
<ENUM>\n\n\n[ \t]+[^0-9] {
|
||||
printf("\n\n@end enumerate\n\n");
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
|
||||
/*
|
||||
* reproduce one kind of @itemize list
|
||||
*/
|
||||
|
||||
":"\n+":" {
|
||||
int loop;
|
||||
int len;
|
||||
|
||||
printf(":\n\n@itemize @bullet \n");
|
||||
yyless(2);
|
||||
statep++;
|
||||
states[statep] = LITEM2;
|
||||
BEGIN(LITEM2);
|
||||
}
|
||||
<LITEM2>^":".+":" {
|
||||
(void)check_and_convert(&yytext[1]);
|
||||
buffer[strlen(buffer)-1]='\0';
|
||||
printf("@item @b{%s:}\n",buffer);
|
||||
}
|
||||
|
||||
<LITEM2>\n\n\n+[^:\n] {
|
||||
printf("\n\n@end itemize\n\n");
|
||||
ECHO;
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
|
||||
/*
|
||||
* create a list out of the revision history part.
|
||||
* We need the "Version" for this because it
|
||||
* clashes with other rules otherwise.
|
||||
*/
|
||||
|
||||
:[\n]+"Version"[^:\n*]+":" {
|
||||
int loop;
|
||||
int len;
|
||||
|
||||
printf(":\n\n@itemize @bullet \n");
|
||||
strcpy(buffer,yytext);
|
||||
len = strlen(buffer);
|
||||
for(loop=len; loop > 0;loop--){
|
||||
if(buffer[loop] == '\n')
|
||||
break;
|
||||
}
|
||||
yyless(loop);
|
||||
statep++;
|
||||
states[statep] = LITEM;
|
||||
BEGIN(LITEM);
|
||||
}
|
||||
<LITEM>^.+":" {
|
||||
(void)check_and_convert(yytext);
|
||||
buffer[strlen(buffer)-1]='\0';
|
||||
printf("@item @b{%s}\n\n",buffer);
|
||||
}
|
||||
|
||||
<LITEM>^[^:\n]+\n\n[^:\n]+\n {
|
||||
int loop;
|
||||
|
||||
strcpy(buffer,yytext);
|
||||
for(loop=0; buffer[loop] != '\n'; loop++);
|
||||
buffer[loop] = '\0';
|
||||
printf("%s\n",buffer);
|
||||
printf("@end itemize\n\n");
|
||||
printf("%s",&buffer[loop+1]);
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
|
||||
/*
|
||||
* reproduce @itemize @bullet lists
|
||||
*/
|
||||
|
||||
":"\n[ ]*"*" {
|
||||
int loop;
|
||||
int len;
|
||||
|
||||
printf(":\n\n@itemize @bullet \n");
|
||||
len = strlen(buffer);
|
||||
for(loop=0; loop < len;loop++){
|
||||
if(buffer[loop] == '\n')
|
||||
break;
|
||||
}
|
||||
yyless((len-loop)+2);
|
||||
statep++;
|
||||
states[statep] = BITEM;
|
||||
BEGIN(BITEM);
|
||||
}
|
||||
|
||||
<BITEM>^" "*"*" {
|
||||
printf("@item");
|
||||
statep++;
|
||||
states[statep] = BITEM_ITEM;
|
||||
BEGIN(BITEM_ITEM);
|
||||
}
|
||||
<BITEM>"@" printf("@@");
|
||||
<BITEM>^\n {
|
||||
printf("@end itemize\n\n");
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
<BITEM_ITEM>[^\:]* {
|
||||
printf(" @b{%s}\n\n",check_and_convert(yytext));
|
||||
}
|
||||
<BITEM_ITEM>":" {
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
|
||||
/*
|
||||
* recreate @chapter, @section etc.
|
||||
*/
|
||||
|
||||
^:[^:]* {
|
||||
(void)check_and_convert(&yytext[1]);
|
||||
statep++;
|
||||
states[statep] = HEADING;
|
||||
BEGIN(HEADING);
|
||||
}
|
||||
<HEADING>:[^\n] {
|
||||
printf("@item @b{%s}\n",buffer);
|
||||
write_underline(strlen(buffer),6,'~');
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
<HEADING>:\n"*"* {
|
||||
if(need_closing == TRUE){
|
||||
printf("@end table\n\n\n");
|
||||
need_closing = FALSE;
|
||||
}
|
||||
printf("@chapter %s\n",buffer);
|
||||
write_underline(strlen(buffer),9,'*');
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
<HEADING>:\n"="* {
|
||||
if(need_closing == TRUE){
|
||||
printf("@end table\n\n\n");
|
||||
need_closing = FALSE;
|
||||
}
|
||||
printf("@section %s\n",buffer);
|
||||
write_underline(strlen(buffer),9,'=');
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
<HEADING>"@" printf("@@");
|
||||
<HEADING>:\n"-"* {
|
||||
if(need_closing == TRUE){
|
||||
printf("@end table\n\n\n");
|
||||
need_closing = FALSE;
|
||||
}
|
||||
printf("@subsection %s\n",buffer);
|
||||
write_underline(strlen(buffer),12,'-');
|
||||
statep--;
|
||||
BEGIN(states[statep]);
|
||||
}
|
||||
|
||||
/*
|
||||
* recreate @example text
|
||||
*/
|
||||
|
||||
^" " {
|
||||
printf("@example\n");
|
||||
statep++;
|
||||
states[statep] = EXAMPLE;
|
||||
BEGIN(EXAMPLE);
|
||||
}
|
||||
<EXAMPLE>^" "
|
||||
. ECHO;
|
||||
|
||||
%%
|
||||
|
||||
/*
|
||||
* initialise and go.
|
||||
*/
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
states[0] = INITIAL;
|
||||
statep = 0;
|
||||
print_header();
|
||||
yylex();
|
||||
print_trailer();
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
15
examples/manual/myname.lex
Normal file
15
examples/manual/myname.lex
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
*
|
||||
* myname.lex : A sample Flex program
|
||||
* that does token replacement.
|
||||
*/
|
||||
|
||||
%%
|
||||
|
||||
%NAME { printf("%s",getenv("LOGNAME")); }
|
||||
%HOST { printf("%s",getenv("HOST")); }
|
||||
%HOSTTYPE { printf("%s",getenv("HOSTTYPE"));}
|
||||
%HOME { printf("%s",getenv("HOME")); }
|
||||
|
||||
%%
|
||||
|
||||
6
examples/manual/myname.txt
Normal file
6
examples/manual/myname.txt
Normal file
@ -0,0 +1,6 @@
|
||||
Hello, my name name is %NAME. Actually
|
||||
"%NAME" isn't my real name, it is the
|
||||
alias I use when I'm on %HOST, which
|
||||
is the %HOSTTYPE I use. My HOME
|
||||
directory is %HOME.
|
||||
|
||||
19
examples/manual/myname2.lex
Normal file
19
examples/manual/myname2.lex
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
* myname2.lex : A sample Flex program
|
||||
* that does token replacement.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
%}
|
||||
|
||||
%x STRING
|
||||
%%
|
||||
\" ECHO; BEGIN(STRING);
|
||||
<STRING>[^\"\n]* ECHO;
|
||||
<STRING>\" ECHO; BEGIN(INITIAL);
|
||||
|
||||
%NAME { printf("%s",getenv("LOGNAME")); }
|
||||
%HOST { printf("%s",getenv("HOST")); }
|
||||
%HOSTTYPE { printf("%s",getenv("HOSTTYPE"));}
|
||||
%HOME { printf("%s",getenv("HOME")); }
|
||||
145
examples/manual/numbers.lex
Normal file
145
examples/manual/numbers.lex
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* numbers.lex : An example of the definitions and techniques
|
||||
* for scanning numbers
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
|
||||
#define UNSIGNED_LONG_SYM 1
|
||||
#define SIGNED_LONG_SYM 2
|
||||
#define UNSIGNED_SYM 3
|
||||
#define SIGNED_SYM 4
|
||||
#define LONG_DOUBLE_SYM 5
|
||||
#define FLOAT_SYM 6
|
||||
|
||||
union _yylval {
|
||||
long double ylong_double;
|
||||
float yfloat;
|
||||
unsigned long yunsigned_long;
|
||||
unsigned yunsigned;
|
||||
long ysigned_long;
|
||||
int ysigned;
|
||||
} yylval;
|
||||
|
||||
%}
|
||||
|
||||
digit [0-9]
|
||||
hex_digit [0-9a-fA-F]
|
||||
oct_digit [0-7]
|
||||
|
||||
exponent [eE][+-]?{digit}+
|
||||
i {digit}+
|
||||
float_constant ({i}\.{i}?|{i}?\.{i}){exponent}?
|
||||
hex_constant 0[xX]{hex_digit}+
|
||||
oct_constant 0{oct_digit}*
|
||||
int_constant {digit}+
|
||||
long_ext [lL]
|
||||
unsigned_ext [uU]
|
||||
float_ext [fF]
|
||||
ulong_ext {long_ext}{unsigned_ext}|{unsigned_ext}{long_ext}
|
||||
|
||||
%%
|
||||
|
||||
{hex_constant}{ulong_ext} { /* we need to skip the "0x" part */
|
||||
sscanf(&yytext[2],"%lx",&yylval.yunsigned_long);
|
||||
return(UNSIGNED_LONG_SYM);
|
||||
}
|
||||
{hex_constant}{long_ext} {
|
||||
sscanf(&yytext[2],"%lx",&yylval.ysigned_long);
|
||||
return(SIGNED_LONG_SYM);
|
||||
}
|
||||
{hex_constant}{unsigned_ext} {
|
||||
sscanf(&yytext[2],"%x",&yylval.yunsigned);
|
||||
return(UNSIGNED_SYM);
|
||||
}
|
||||
{hex_constant} { /* use %lx to protect against overflow */
|
||||
sscanf(&yytext[2],"%lx",&yylval.ysigned_long);
|
||||
return(SIGNED_LONG_SYM);
|
||||
}
|
||||
{oct_constant}{ulong_ext} {
|
||||
sscanf(yytext,"%lo",&yylval.yunsigned_long);
|
||||
return(UNSIGNED_LONG_SYM);
|
||||
}
|
||||
{oct_constant}{long_ext} {
|
||||
sscanf(yytext,"%lo",&yylval.ysigned_long);
|
||||
return(SIGNED_LONG_SYM);
|
||||
}
|
||||
{oct_constant}{unsigned_ext} {
|
||||
sscanf(yytext,"%o",&yylval.yunsigned);
|
||||
return(UNSIGNED_SYM);
|
||||
}
|
||||
{oct_constant} { /* use %lo to protect against overflow */
|
||||
sscanf(yytext,"%lo",&yylval.ysigned_long);
|
||||
return(SIGNED_LONG_SYM);
|
||||
}
|
||||
{int_constant}{ulong_ext} {
|
||||
sscanf(yytext,"%ld",&yylval.yunsigned_long);
|
||||
return(UNSIGNED_LONG_SYM);
|
||||
}
|
||||
{int_constant}{long_ext} {
|
||||
sscanf(yytext,"%ld",&yylval.ysigned_long);
|
||||
return(SIGNED_LONG_SYM);
|
||||
}
|
||||
{int_constant}{unsigned_ext} {
|
||||
sscanf(yytext,"%d",&yylval.yunsigned);
|
||||
return(UNSIGNED_SYM);
|
||||
}
|
||||
{int_constant} { /* use %ld to protect against overflow */
|
||||
sscanf(yytext,"%ld",&yylval.ysigned_long);
|
||||
return(SIGNED_LONG_SYM);
|
||||
}
|
||||
{float_constant}{long_ext} {
|
||||
sscanf(yytext,"%lf",&yylval.ylong_double);
|
||||
return(LONG_DOUBLE_SYM);
|
||||
}
|
||||
{float_constant}{float_ext} {
|
||||
sscanf(yytext,"%f",&yylval.yfloat);
|
||||
return(FLOAT_SYM);
|
||||
}
|
||||
{float_constant} { /* use %lf to protect against overflow */
|
||||
sscanf(yytext,"%lf",&yylval.ylong_double);
|
||||
return(LONG_DOUBLE_SYM);
|
||||
}
|
||||
%%
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int code;
|
||||
|
||||
while((code = yylex())){
|
||||
printf("yytext : %s\n",yytext);
|
||||
switch(code){
|
||||
case UNSIGNED_LONG_SYM:
|
||||
printf("Type of number : UNSIGNED LONG\n");
|
||||
printf("Value of number : %lu\n",yylval.yunsigned_long);
|
||||
break;
|
||||
case SIGNED_LONG_SYM:
|
||||
printf("Type of number : SIGNED LONG\n");
|
||||
printf("Value of number : %ld\n",yylval.ysigned_long);
|
||||
break;
|
||||
case UNSIGNED_SYM:
|
||||
printf("Type of number : UNSIGNED\n");
|
||||
printf("Value of number : %u\n",yylval.yunsigned);
|
||||
break;
|
||||
case SIGNED_SYM:
|
||||
printf("Type of number : SIGNED\n");
|
||||
printf("Value of number : %d\n",yylval.ysigned);
|
||||
break;
|
||||
case LONG_DOUBLE_SYM:
|
||||
printf("Type of number : LONG DOUBLE\n");
|
||||
printf("Value of number : %lf\n",yylval.ylong_double);
|
||||
break;
|
||||
case FLOAT_SYM:
|
||||
printf("Type of number : FLOAT\n");
|
||||
printf("Value of number : %f\n",yylval.yfloat);
|
||||
break;
|
||||
default:
|
||||
printf("Type of number : UNDEFINED\n");
|
||||
printf("Value of number : UNDEFINED\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
78
examples/manual/pas_include.lex
Normal file
78
examples/manual/pas_include.lex
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* eof_rules.lex : An example of using multiple buffers
|
||||
* EOF rules, and start states
|
||||
*/
|
||||
|
||||
%{
|
||||
|
||||
#define MAX_NEST 10
|
||||
|
||||
YY_BUFFER_STATE include_stack[MAX_NEST];
|
||||
int include_count = -1;
|
||||
|
||||
%}
|
||||
|
||||
|
||||
%x INCLUDE
|
||||
%x COMMENT
|
||||
|
||||
|
||||
%%
|
||||
|
||||
"{" BEGIN(COMMENT);
|
||||
|
||||
<COMMENT>"}" BEGIN(INITIAL);
|
||||
<COMMENT>"$include"[ \t]*"(" BEGIN(INCLUDE);
|
||||
<COMMENT>[ \t]* /* skip whitespace */
|
||||
|
||||
<INCLUDE>")" BEGIN(COMMENT);
|
||||
<INCLUDE>[ \t]* /* skip whitespace */
|
||||
<INCLUDE>[^ \t\n() ]+ { /* get the include file name */
|
||||
if ( include_count >= MAX_NEST){
|
||||
fprintf( stderr, "Too many include files" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
include_stack[++include_count] = YY_CURRENT_BUFFER;
|
||||
|
||||
yyin = fopen( yytext, "r" );
|
||||
if ( ! yyin ){
|
||||
fprintf( stderr, "Unable to open %s",yytext);
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
|
||||
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
<INCLUDE><<EOF>>
|
||||
{
|
||||
fprintf( stderr, "EOF in include" );
|
||||
yyterminate();
|
||||
}
|
||||
<COMMENT><<EOF>>
|
||||
{
|
||||
fprintf( stderr, "EOF in comment" );
|
||||
yyterminate();
|
||||
}
|
||||
<<EOF>> {
|
||||
if ( include_count <= 0 ){
|
||||
yyterminate();
|
||||
} else {
|
||||
yy_delete_buffer(include_stack[include_count--] );
|
||||
yy_switch_to_buffer(include_stack[include_count] );
|
||||
BEGIN(INCLUDE);
|
||||
}
|
||||
}
|
||||
[a-z]+ ECHO;
|
||||
.|\n ECHO;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
120
examples/manual/pascal.lex
Normal file
120
examples/manual/pascal.lex
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* pascal.lex: An example PASCAL scanner
|
||||
*
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
#include "y.tab.h"
|
||||
|
||||
int line_number = 0;
|
||||
|
||||
void yyerror(char *message);
|
||||
|
||||
%}
|
||||
|
||||
%x COMMENT1 COMMENT2
|
||||
|
||||
white_space [ \t]*
|
||||
digit [0-9]
|
||||
alpha [A-Za-z_]
|
||||
alpha_num ({alpha}|{digit})
|
||||
hex_digit [0-9A-F]
|
||||
identifier {alpha}{alpha_num}*
|
||||
unsigned_integer {digit}+
|
||||
hex_integer ${hex_digit}{hex_digit}*
|
||||
exponent e[+-]?{digit}+
|
||||
i {unsigned_integer}
|
||||
real ({i}\.{i}?|{i}?\.{i}){exponent}?
|
||||
string \'([^'\n]|\'\')+\'
|
||||
bad_string \'([^'\n]|\'\')+
|
||||
|
||||
%%
|
||||
|
||||
"{" BEGIN(COMMENT1);
|
||||
<COMMENT1>[^}\n]+
|
||||
<COMMENT1>\n ++line_number;
|
||||
<COMMENT1><<EOF>> yyerror("EOF in comment");
|
||||
<COMMENT1>"}" BEGIN(INITIAL);
|
||||
|
||||
"(*" BEGIN(COMMENT2);
|
||||
<COMMENT2>[^)*\n]+
|
||||
<COMMENT2>\n ++line_number;
|
||||
<COMMENT2><<EOF>> yyerror("EOF in comment");
|
||||
<COMMENT2>"*)" BEGIN(INITIAL);
|
||||
<COMMENT2>[*)]
|
||||
|
||||
/* note that FILE and BEGIN are already
|
||||
* defined in FLEX or C so they can't
|
||||
* be used. This can be overcome in
|
||||
* a cleaner way by defining all the
|
||||
* tokens to start with TOK_ or some
|
||||
* other prefix.
|
||||
*/
|
||||
|
||||
and return(AND);
|
||||
array return(ARRAY);
|
||||
begin return(_BEGIN);
|
||||
case return(CASE);
|
||||
const return(CONST);
|
||||
div return(DIV);
|
||||
do return(DO);
|
||||
downto return(DOWNTO);
|
||||
else return(ELSE);
|
||||
end return(END);
|
||||
file return(_FILE);
|
||||
for return(FOR);
|
||||
function return(FUNCTION);
|
||||
goto return(GOTO);
|
||||
if return(IF);
|
||||
in return(IN);
|
||||
label return(LABEL);
|
||||
mod return(MOD);
|
||||
nil return(NIL);
|
||||
not return(NOT);
|
||||
of return(OF);
|
||||
packed return(PACKED);
|
||||
procedure return(PROCEDURE);
|
||||
program return(PROGRAM);
|
||||
record return(RECORD);
|
||||
repeat return(REPEAT);
|
||||
set return(SET);
|
||||
then return(THEN);
|
||||
to return(TO);
|
||||
type return(TYPE);
|
||||
until return(UNTIL);
|
||||
var return(VAR);
|
||||
while return(WHILE);
|
||||
with return(WITH);
|
||||
|
||||
"<="|"=<" return(LEQ);
|
||||
"=>"|">=" return(GEQ);
|
||||
"<>" return(NEQ);
|
||||
"=" return(EQ);
|
||||
|
||||
".." return(DOUBLEDOT);
|
||||
|
||||
{unsigned_integer} return(UNSIGNED_INTEGER);
|
||||
{real} return(REAL);
|
||||
{hex_integer} return(HEX_INTEGER);
|
||||
{string} return{STRING};
|
||||
{bad_string} yyerror("Unterminated string");
|
||||
|
||||
{identifier} return(IDENTIFIER);
|
||||
|
||||
[*/+\-,^.;:()\[\]] return(yytext[0]);
|
||||
|
||||
{white_space} /* do nothing */
|
||||
\n line_number += 1;
|
||||
. yyerror("Illegal input");
|
||||
|
||||
%%
|
||||
|
||||
void yyerror(char *message)
|
||||
{
|
||||
fprintf(stderr,"Error: \"%s\" in line %d. Token = %s\n",
|
||||
message,line_number,yytext);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
12
examples/manual/reject.lex
Normal file
12
examples/manual/reject.lex
Normal file
@ -0,0 +1,12 @@
|
||||
/*
|
||||
* reject.lex: An example of REJECT and unput()
|
||||
* misuse.
|
||||
*/
|
||||
|
||||
%%
|
||||
UNIX {
|
||||
unput('U'); unput('N'); unput('G'); unput('\0');
|
||||
REJECT;
|
||||
}
|
||||
GNU printf("GNU is Not Unix!\n");
|
||||
%%
|
||||
33
examples/manual/replace.lex
Normal file
33
examples/manual/replace.lex
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* replace.lex : A simple filter for renaming
|
||||
* parts of flex of bison generated
|
||||
* scanners or parsers.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
|
||||
char lower_replace[1024];
|
||||
char upper_replace[1024];
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
"yy" printf("%s",lower_replace);
|
||||
"YY" printf("%s",upper_replace);
|
||||
, ECHO;
|
||||
|
||||
%%
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc < 2){
|
||||
printf("Usage %s lower UPPER\n",argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
strcpy(lower_replace,argv[1]);
|
||||
strcpy(upper_replace,argv[2]);
|
||||
yylex();
|
||||
return(0);
|
||||
}
|
||||
98
examples/manual/string1.lex
Normal file
98
examples/manual/string1.lex
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* string1.lex: Handling strings by using input()
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <ctype.h>
|
||||
#include <malloc.h>
|
||||
|
||||
#define ALLOC_SIZE 32 /* for (re)allocating the buffer */
|
||||
|
||||
#define isodigit(x) ((x) >= '0' && (x) <= '7')
|
||||
#define hextoint(x) (isdigit((x)) ? (x) - '0' : ((x) - 'A') + 10)
|
||||
|
||||
void yyerror(char *message)
|
||||
{
|
||||
printf("\nError: %s\n",message);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
\" {
|
||||
int inch,count,max_size;
|
||||
char *buffer;
|
||||
int temp;
|
||||
|
||||
buffer = malloc(ALLOC_SIZE);
|
||||
max_size = ALLOC_SIZE;
|
||||
inch = input();
|
||||
count = 0;
|
||||
while(inch != EOF && inch != '"' && inch != '\n'){
|
||||
if(inch == '\\'){
|
||||
inch = input();
|
||||
switch(inch){
|
||||
case '\n': inch = input(); break;
|
||||
case 'b' : inch = '\b'; break;
|
||||
case 't' : inch = '\t'; break;
|
||||
case 'n' : inch = '\n'; break;
|
||||
case 'v' : inch = '\v'; break;
|
||||
case 'f' : inch = '\f'; break;
|
||||
case 'r' : inch = '\r'; break;
|
||||
case 'X' :
|
||||
case 'x' : inch = input();
|
||||
if(isxdigit(inch)){
|
||||
temp = hextoint(toupper(inch));
|
||||
inch = input();
|
||||
if(isxdigit(inch)){
|
||||
temp = (temp << 4) + hextoint(toupper(inch));
|
||||
} else {
|
||||
unput(inch);
|
||||
}
|
||||
inch = temp;
|
||||
} else {
|
||||
unput(inch);
|
||||
inch = 'x';
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if(isodigit(inch)){
|
||||
temp = inch - '0';
|
||||
inch = input();
|
||||
if(isodigit(inch)){
|
||||
temp = (temp << 3) + (inch - '0');
|
||||
} else {
|
||||
unput(inch);
|
||||
goto done;
|
||||
}
|
||||
inch = input();
|
||||
if(isodigit(inch)){
|
||||
temp = (temp << 3) + (inch - '0');
|
||||
} else {
|
||||
unput(inch);
|
||||
}
|
||||
done:
|
||||
inch = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer[count++] = inch;
|
||||
if(count >= max_size){
|
||||
buffer = realloc(buffer,max_size + ALLOC_SIZE);
|
||||
max_size += ALLOC_SIZE;
|
||||
}
|
||||
inch = input();
|
||||
}
|
||||
if(inch == EOF || inch == '\n'){
|
||||
yyerror("Unterminated string.");
|
||||
}
|
||||
buffer[count] = '\0';
|
||||
printf("String = \"%s\"\n",buffer);
|
||||
free(buffer);
|
||||
}
|
||||
.
|
||||
\n
|
||||
%%
|
||||
|
||||
|
||||
94
examples/manual/string2.lex
Normal file
94
examples/manual/string2.lex
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* string2.lex: An example of using scanning strings
|
||||
* by using start states.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <ctype.h>
|
||||
#include <malloc.h>
|
||||
|
||||
#define isodigit(x) ((x) >= '0' && (x) <= '7')
|
||||
#define hextoint(x) (isdigit((x)) ? (x) - '0' : ((x) - 'A') + 10)
|
||||
|
||||
char *buffer = NULL;
|
||||
int buffer_size = 0;
|
||||
|
||||
void yyerror(char *message)
|
||||
{
|
||||
printf("\nError: %s\n",message);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%x STRING
|
||||
|
||||
hex (x|X)[0-9a-fA-F]{1,2}
|
||||
oct [0-7]{1,3}
|
||||
|
||||
%%
|
||||
|
||||
\" {
|
||||
buffer = malloc(1);
|
||||
buffer_size = 1; strcpy(buffer,"");
|
||||
BEGIN(STRING);
|
||||
}
|
||||
<STRING>\n {
|
||||
yyerror("Unterminated string");
|
||||
free(buffer);
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
<STRING><<EOF>> {
|
||||
yyerror("EOF in string");
|
||||
free(buffer);
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
<STRING>[^\\\n"] {
|
||||
buffer = realloc(buffer,buffer_size+yyleng+1);
|
||||
buffer_size += yyleng;
|
||||
strcat(buffer,yytext);
|
||||
}
|
||||
<STRING>\\\n /* ignore this */
|
||||
<STRING>\\{hex} {
|
||||
int temp =0,loop = 0;
|
||||
for(loop=yyleng-2; loop>0; loop--){
|
||||
temp <<= 4;
|
||||
temp += hextoint(toupper(yytext[yyleng-loop]));
|
||||
}
|
||||
buffer = realloc(buffer,buffer_size+1);
|
||||
buffer[buffer_size-1] = temp;
|
||||
buffer[buffer_size] = '\0';
|
||||
buffer_size += 1;
|
||||
}
|
||||
<STRING>\\{oct} {
|
||||
int temp =0,loop = 0;
|
||||
for(loop=yyleng-1; loop>0; loop--){
|
||||
temp <<= 3;
|
||||
temp += (yytext[yyleng-loop] - '0');
|
||||
}
|
||||
buffer = realloc(buffer,buffer_size+1);
|
||||
buffer[buffer_size-1] = temp;
|
||||
buffer[buffer_size] = '\0';
|
||||
buffer_size += 1;
|
||||
}
|
||||
<STRING>\\[^\n] {
|
||||
buffer = realloc(buffer,buffer_size+1);
|
||||
switch(yytext[yyleng-1]){
|
||||
case 'b' : buffer[buffer_size-1] = '\b'; break;
|
||||
case 't' : buffer[buffer_size-1] = '\t'; break;
|
||||
case 'n' : buffer[buffer_size-1] = '\n'; break;
|
||||
case 'v' : buffer[buffer_size-1] = '\v'; break;
|
||||
case 'f' : buffer[buffer_size-1] = '\f'; break;
|
||||
case 'r' : buffer[buffer_size-1] = '\r'; break;
|
||||
default : buffer[buffer_size-1] = yytext[yyleng-1];
|
||||
}
|
||||
buffer[buffer_size] = '\0';
|
||||
buffer_size += 1;
|
||||
}
|
||||
<STRING>\" {
|
||||
printf("string = \"%s\"",buffer);
|
||||
free(buffer);
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
%%
|
||||
|
||||
|
||||
21
examples/manual/strtest.dat
Normal file
21
examples/manual/strtest.dat
Normal file
@ -0,0 +1,21 @@
|
||||
"This is a string"
|
||||
"The next string will be empty"
|
||||
""
|
||||
"This is a string with a \b(\\b) in it"
|
||||
"This is a string with a \t(\\t) in it"
|
||||
"This is a string with a \n(\\n) in it"
|
||||
"This is a string with a \v(\\v) in it"
|
||||
"This is a string with a \f(\\f) in it"
|
||||
"This is a string with a \r(\\r) in it"
|
||||
"This is a string with a \"(\\\") in it"
|
||||
"This is a string with a \z(\\z) in it"
|
||||
"This is a string with a \X4a(\\X4a) in it"
|
||||
"This is a string with a \x4a(\\x4a) in it"
|
||||
"This is a string with a \x7(\\x7) in it"
|
||||
"This is a string with a \112(\\112) in it"
|
||||
"This is a string with a \043(\\043) in it"
|
||||
"This is a string with a \7(\\7) in it"
|
||||
"This is a multi-line \
|
||||
string"
|
||||
"This is an unterminated string
|
||||
"This is an unterminated string too
|
||||
32
examples/manual/unput.lex
Normal file
32
examples/manual/unput.lex
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* unput.l : An example of what *not*
|
||||
* to do with unput().
|
||||
*/
|
||||
|
||||
|
||||
%{
|
||||
#include <stdio.h>
|
||||
|
||||
void putback_yytext(void);
|
||||
%}
|
||||
|
||||
%%
|
||||
foobar putback_yytext();
|
||||
raboof putback_yytext();
|
||||
%%
|
||||
|
||||
void putback_yytext(void)
|
||||
{
|
||||
int i;
|
||||
int l = strlen(yytext);
|
||||
char buffer[YY_BUF_SIZE];
|
||||
|
||||
strcpy(buffer,yytext);
|
||||
printf("Got: %s\n",yytext);
|
||||
for(i=0; i<l; i++){
|
||||
unput(buffer[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
31
examples/manual/user_act.lex
Normal file
31
examples/manual/user_act.lex
Normal file
@ -0,0 +1,31 @@
|
||||
%{
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
void user_action(void);
|
||||
|
||||
#define YY_USER_ACTION user_action();
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
.* ECHO;
|
||||
\n ECHO;
|
||||
|
||||
%%
|
||||
|
||||
void user_action(void)
|
||||
{
|
||||
int loop;
|
||||
|
||||
for(loop=0; loop<yyleng; loop++){
|
||||
if(islower(yytext[loop])){
|
||||
yytext[loop] = toupper(yytext[loop]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
30
examples/manual/userinit.lex
Normal file
30
examples/manual/userinit.lex
Normal file
@ -0,0 +1,30 @@
|
||||
%{
|
||||
#define YY_USER_INIT open_input_file()
|
||||
|
||||
extern FILE *yyin;
|
||||
|
||||
void open_input_file(void)
|
||||
{
|
||||
char *file_name,buffer[1024];
|
||||
|
||||
yyin = NULL;
|
||||
|
||||
while(yyin == NULL){
|
||||
printf("Input file: ");
|
||||
file_name = fgets(buffer,1024,stdin);
|
||||
if(file_name){
|
||||
file_name[strlen(file_name)-1] = '\0';
|
||||
yyin = fopen(file_name,"r");
|
||||
if(yyin == NULL){
|
||||
printf("Unable to open \"%s\"\n",file_name);
|
||||
}
|
||||
} else {
|
||||
printf("stdin\n");
|
||||
yyin = stdin;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
%}
|
||||
%%
|
||||
122
examples/manual/wc.lex
Normal file
122
examples/manual/wc.lex
Normal file
@ -0,0 +1,122 @@
|
||||
%{
|
||||
|
||||
/*
|
||||
* wc.lex : A simple example of using FLEX
|
||||
* to create a wc-like utility.
|
||||
*
|
||||
* See MISC/fastwc/ in the flex distribution for examples
|
||||
* of how to write this scanner for maximum performance.
|
||||
*/
|
||||
|
||||
int numchars = 0;
|
||||
int numwords = 0;
|
||||
int numlines = 0;
|
||||
int totchars = 0;
|
||||
int totwords = 0;
|
||||
int totlines = 0;
|
||||
|
||||
/*
|
||||
* rules start from here
|
||||
*/
|
||||
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
[\n] { numchars++; numlines++; }
|
||||
[\r] { numchars++; }
|
||||
[^ \t\n]+ { numwords++; numchars += yyleng; }
|
||||
. { numchars++; }
|
||||
|
||||
%%
|
||||
|
||||
/*
|
||||
* additional C code start from here. This supplies
|
||||
* all the argument processing etc.
|
||||
*/
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int loop,first=1;
|
||||
int lflag = 0; /* 1 if we count # of lines */
|
||||
int wflag = 0; /* 1 if we count # of words */
|
||||
int cflag = 0; /* 1 if we count # of characters */
|
||||
int fflag = 0; /* 1 if we have a file name */
|
||||
|
||||
for(loop=1; loop<argc; loop++){
|
||||
if(argv[loop][0] == '-'){
|
||||
switch(argv[loop][1]){
|
||||
case 'l':
|
||||
lflag = 1;
|
||||
break;
|
||||
case 'w':
|
||||
wflag = 1;
|
||||
break;
|
||||
case 'c':
|
||||
cflag = 1;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr,"unknown option -%c\n",
|
||||
argv[loop][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(lflag == 0 && wflag == 0 && cflag == 0){
|
||||
lflag = wflag = cflag = 1; /* default to all on */
|
||||
}
|
||||
|
||||
for(loop=1; loop<argc; loop++){
|
||||
if(argv[loop][0] != '-'){
|
||||
fflag = 1;
|
||||
numlines = numchars = numwords = 0;
|
||||
if((yyin = fopen(argv[loop],"rb")) != NULL){
|
||||
if(first){
|
||||
first = 0;
|
||||
} else {
|
||||
YY_NEW_FILE;
|
||||
}
|
||||
(void) yylex();
|
||||
fclose(yyin);
|
||||
totwords += numwords;
|
||||
totchars += numchars;
|
||||
totlines += numlines;
|
||||
printf("file : %25s :",argv[loop]) ;
|
||||
if(lflag){
|
||||
fprintf(stdout,"lines %5d ",numlines);
|
||||
}
|
||||
if(cflag){
|
||||
fprintf(stdout,"characters %5d ",numchars);
|
||||
}
|
||||
if(wflag){
|
||||
fprintf(stdout,"words %5d ",numwords);
|
||||
}
|
||||
fprintf(stdout,"\n");
|
||||
}else{
|
||||
fprintf(stderr,"wc : file not found %s\n",argv[loop]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(!fflag){
|
||||
fprintf(stderr,"usage : wc [-l -w -c] file [file...]\n");
|
||||
fprintf(stderr,"-l = count lines\n");
|
||||
fprintf(stderr,"-c = count characters\n");
|
||||
fprintf(stderr,"-w = count words\n");
|
||||
exit(1);
|
||||
}
|
||||
for(loop=0;loop<79; loop++){
|
||||
fprintf(stdout,"-");
|
||||
}
|
||||
fprintf(stdout,"\n");
|
||||
fprintf(stdout,"total : %25s ","") ;
|
||||
if(lflag){
|
||||
fprintf(stdout,"lines %5d ",totlines);
|
||||
}
|
||||
if(cflag){
|
||||
fprintf(stdout,"characters %5d ",totchars);
|
||||
}
|
||||
if(wflag){
|
||||
fprintf(stdout,"words %5d ",totwords);
|
||||
}
|
||||
fprintf(stdout,"\n");
|
||||
return(0);
|
||||
}
|
||||
29
examples/manual/yymore.lex
Normal file
29
examples/manual/yymore.lex
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* yymore.lex: An example of using yymore()
|
||||
* to good effect.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <memory.h>
|
||||
|
||||
void yyerror(char *message)
|
||||
{
|
||||
printf("Error: %s\n",message);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%x STRING
|
||||
|
||||
%%
|
||||
\" BEGIN(STRING);
|
||||
|
||||
<STRING>[^\\\n"]* yymore();
|
||||
<STRING><<EOF>> yyerror("EOF in string."); BEGIN(INITIAL);
|
||||
<STRING>\n yyerror("Unterminated string."); BEGIN(INITIAL);
|
||||
<STRING>\\\n yymore();
|
||||
<STRING>\" {
|
||||
yytext[yyleng-1] = '\0';
|
||||
printf("string = \"%s\"",yytext); BEGIN(INITIAL);
|
||||
}
|
||||
%%
|
||||
33
examples/manual/yymore2.lex
Normal file
33
examples/manual/yymore2.lex
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* yymore.lex: An example of using yymore()
|
||||
* to good effect.
|
||||
*/
|
||||
|
||||
%{
|
||||
#include <memory.h>
|
||||
|
||||
void yyerror(char *message)
|
||||
{
|
||||
printf("Error: %s\n",message);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
%x STRING
|
||||
|
||||
%%
|
||||
\" BEGIN(STRING);
|
||||
|
||||
<STRING>[^\\\n"]* yymore();
|
||||
<STRING><<EOF>> yyerror("EOF in string."); BEGIN(INITIAL);
|
||||
<STRING>\n yyerror("Unterminated string."); BEGIN(INITIAL);
|
||||
<STRING>\\\n {
|
||||
bcopy(yytext,yytext+2,yyleng-2);
|
||||
yytext += 2; yyleng -= 2;
|
||||
yymore();
|
||||
}
|
||||
<STRING>\" {
|
||||
yyleng -= 1; yytext[yyleng] = '\0';
|
||||
printf("string = \"%s\"",yytext); BEGIN(INITIAL);
|
||||
}
|
||||
%%
|
||||
7
examples/manual/yymoretest.dat
Normal file
7
examples/manual/yymoretest.dat
Normal file
@ -0,0 +1,7 @@
|
||||
"This is a test \
|
||||
of multi-line string \
|
||||
scanning in flex. \
|
||||
This may be breaking some law \
|
||||
of usage though..."
|
||||
|
||||
|
||||
58
examples/testxxLexer.l
Normal file
58
examples/testxxLexer.l
Normal file
@ -0,0 +1,58 @@
|
||||
// An example of using the flex C++ scanner class.
|
||||
|
||||
%option C++ noyywrap
|
||||
|
||||
%{
|
||||
int mylineno = 0;
|
||||
%}
|
||||
|
||||
string \"[^\n"]+\"
|
||||
|
||||
ws [ \t]+
|
||||
|
||||
alpha [A-Za-z]
|
||||
dig [0-9]
|
||||
name ({alpha}|{dig}|\$)({alpha}|{dig}|\_|\.|\-|\/|\$)*
|
||||
num1 [-+]?{dig}+\.?([eE][-+]?{dig}+)?
|
||||
num2 [-+]?{dig}*\.{dig}+([eE][-+]?{dig}+)?
|
||||
number {num1}|{num2}
|
||||
|
||||
%%
|
||||
|
||||
{ws} /* skip blanks and tabs */
|
||||
|
||||
"/*" {
|
||||
int c;
|
||||
|
||||
while((c = yyinput()) != 0)
|
||||
{
|
||||
if(c == '\n')
|
||||
++mylineno;
|
||||
|
||||
else if(c == '*')
|
||||
{
|
||||
if((c = yyinput()) == '/')
|
||||
break;
|
||||
else
|
||||
unput(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{number} cout << "number " << YYText() << '\n';
|
||||
|
||||
\n mylineno++;
|
||||
|
||||
{name} cout << "name " << YYText() << '\n';
|
||||
|
||||
{string} cout << "string " << YYText() << '\n';
|
||||
|
||||
%%
|
||||
|
||||
int main( int /* argc */, char** /* argv */ )
|
||||
{
|
||||
FlexLexer* lexer = new yyFlexLexer;
|
||||
while(lexer->yylex() != 0)
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
46
to.do/README
Normal file
46
to.do/README
Normal file
@ -0,0 +1,46 @@
|
||||
The contents of this directory are:
|
||||
|
||||
Wilhelms.todo
|
||||
This guy Gerhard Wilhelms did an exhaustive line-by-line
|
||||
study of flex, finding a large number of glitches; it's
|
||||
not clear how significant they are (some have subseuqently
|
||||
been fixed).
|
||||
|
||||
Wish-List
|
||||
A long list of cryptic one-line descriptions of various
|
||||
features people have asked for, or I've thought would be
|
||||
handy. If you have questions about particular ones, let
|
||||
me know.
|
||||
|
||||
reentrant
|
||||
A set of mods contributed by John Bossom for adding
|
||||
an option to flex to make reentrant scanners.
|
||||
|
||||
rflex-2.5.4-diffs
|
||||
A set of mods contributed by Chris Appleton for
|
||||
the same purpose.
|
||||
|
||||
streams
|
||||
email from David Madden about coping with non-blocking
|
||||
I/O.
|
||||
|
||||
unicode
|
||||
patches to support Unicode scanners, contributed
|
||||
by James A. Lauth.
|
||||
|
||||
|
||||
Of these, the ones that people frequently ask about are support for reentrant
|
||||
scanners and support for Unicode. I haven't played with the reentrant stuff
|
||||
so don't know how solid / well designed it is. I've sent out the Unicode
|
||||
stuff to numerous people and haven't received any complaints, so I imagine
|
||||
it works well.
|
||||
|
||||
Another thing people ask for fairly often is removing the limit on size
|
||||
of the scanners. There's some mail in the faqs/ directory about ways to
|
||||
do this by cranking some #define's in flexdef.h, though of course the best
|
||||
solution would be dynamic memory/resizing.
|
||||
|
||||
By the way, I have translated the core flex algorithms into a C++ regular
|
||||
expression class (but one that doesn't support the uglier stuff like
|
||||
yymore()/yyless(), trailing context, etc.). If you ever wind up wanting
|
||||
it, just let me know.
|
||||
711
to.do/Wilhelms.todo
Normal file
711
to.do/Wilhelms.todo
Normal file
@ -0,0 +1,711 @@
|
||||
PARSE.Y 344: ('rule'-rule " | re '$' ": There are some errors concerning
|
||||
trailing context. First of all the rule " re '$' " implies that this is
|
||||
no variable_trail_rule because the tail of it ( '$' ) has a fixed length
|
||||
of 1. The only possible reason for making this rule variable is when
|
||||
'previous_continued_action' is true. In this case 'variable_trail_rule'
|
||||
must be set and the beginning of the trailing part must be marked.
|
||||
However the variables 'varlength' and 'headcnt' have not the same meaning
|
||||
as in the rule " re2 re ". Here ( in the rule " re '$' " ) 'varlength'
|
||||
is true if the head ( 're' ) of the rule has variable length, and
|
||||
'headcnt' is still 0 because it isn't set during reduction of 're'.
|
||||
Therefore the test for a variable trailing rule
|
||||
" if ( ! varlength || headcnt != 0 ) "
|
||||
is wrong and should be removed.
|
||||
Also it is not necessary to set 'varlength' or 'headcnt' if you set
|
||||
" trailcnt = 1; ". If this rule is made variable then 'variable_trail_rule'
|
||||
is set and neither 'headcnt' nor 'trailcnt' are used in 'finish_rule()'.
|
||||
And if this rule is normal then the head may be variable or not, but in
|
||||
'finish_rule()' code is generated to reduce 'yy_cp' by 1.
|
||||
Finally I found no reason to create an epsilon-state and insert it in
|
||||
front of mkstate( '\n' ) instead of adding it behind. This epsilon-state
|
||||
should be marked as STATE_TRAILING_CONTEXT. Otherwise you get no warning
|
||||
of dangerous trailing context if you have a rule " x\n*$ " which was made
|
||||
variable with '|'.)
|
||||
|
||||
| re '$'
|
||||
{
|
||||
/* if ( trlcontxt )
|
||||
{
|
||||
synerr( "trailing context used twice" );
|
||||
$$ = mkstate( SYM_EPSILON );
|
||||
}
|
||||
|
||||
else */ if ( previous_continued_action )
|
||||
{
|
||||
/* see the comment in the rule for "re2 re"
|
||||
* above
|
||||
*/
|
||||
/* if ( ! varlength || headcnt != 0 ) */
|
||||
{
|
||||
fprintf( stderr,
|
||||
"%s: warning - trailing context rule at line %d made variable because\n",
|
||||
program_name, linenum );
|
||||
fprintf( stderr,
|
||||
" of preceding '|' action\n" );
|
||||
}
|
||||
|
||||
/* mark as variable */
|
||||
/* varlength = true;
|
||||
headcnt = 0; */
|
||||
|
||||
add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK )
|
||||
;
|
||||
variable_trail_rule = true;
|
||||
}
|
||||
|
||||
/* trlcontxt = true;
|
||||
|
||||
if ( ! varlength )
|
||||
headcnt = rulelen;
|
||||
|
||||
++rulelen; */
|
||||
trailcnt = 1;
|
||||
|
||||
current_state_type = STATE_TRAILING_CONTEXT;
|
||||
eps = mkstate( SYM_EPSILON );
|
||||
current_state_type = STATE_NORMAL;
|
||||
|
||||
$$ = link_machines( $1,
|
||||
link_machines( mkstate( '\n' ), eps ) );
|
||||
}
|
||||
|
||||
DFA.C 618: (ntod(): The arrays 'targstate[]' and 'targfreq[]' can be
|
||||
maintained in a better way. Up to now it is possible that states are added
|
||||
to 'targstate[]' more than once, because the state 'newds' from the call
|
||||
to snstods() creates a new entry in 'targstate[]'. But 'newds' may already
|
||||
exist in 'targstate[]' !
|
||||
Another point is that 'targfreq[]' is not updated if "caseins && ! useecs"
|
||||
is true.
|
||||
My algorithm should solve these problems. However it could be simplified
|
||||
by replacing 'newds' by 'targ' and removing the statement "targ = newds;".
|
||||
Remark to the second point: I decremented the targfreq-counter if 'sym'
|
||||
was an uppercase letter and incremented it if 'sym' was a lowercase
|
||||
letter. The index 'i' of 'targfreq[i]' points to the correct position in
|
||||
'targstate[]' even if a new state was added.)
|
||||
|
||||
for ( sym = 1; sym <= numecs; ++sym )
|
||||
{
|
||||
if ( symlist[sym] )
|
||||
{
|
||||
symlist[sym] = 0;
|
||||
|
||||
if ( duplist[sym] == NIL )
|
||||
{ /* symbol has unique out-transitions */
|
||||
numstates = symfollowset( dset, dsize, sym, nset );
|
||||
nset = epsclosure( nset, &numstates, accset,
|
||||
&nacc, &hashval );
|
||||
|
||||
if ( snstods( nset, numstates, accset,
|
||||
nacc, hashval, &newds ) )
|
||||
{
|
||||
totnst = totnst + numstates;
|
||||
++todo_next;
|
||||
numas += nacc;
|
||||
|
||||
if ( variable_trailing_context_rules && nacc > 0 )
|
||||
check_trailing_context( nset, numstates,
|
||||
accset, nacc );
|
||||
}
|
||||
|
||||
targ = newds;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* sym's equivalence class has the same transitions
|
||||
* as duplist(sym)'s equivalence class
|
||||
*/
|
||||
targ = state[duplist[sym]];
|
||||
}
|
||||
|
||||
state[sym] = targ;
|
||||
|
||||
if ( trace )
|
||||
fprintf( stderr, "\t%d\t%d\n", sym, targ );
|
||||
|
||||
/* update frequency count for destination state */
|
||||
|
||||
for ( i = 1; i <= targptr; ++i )
|
||||
if ( targstate[i] == targ )
|
||||
break;
|
||||
|
||||
if ( i <= targptr )
|
||||
{
|
||||
++targfreq[i];
|
||||
++numdup;
|
||||
}
|
||||
else
|
||||
{
|
||||
targfreq[++targptr] = 1;
|
||||
targstate[targptr] = targ;
|
||||
++numuniq;
|
||||
}
|
||||
|
||||
if ( caseins && ! useecs )
|
||||
{
|
||||
if ( sym >= 'A' && sym <= 'Z' )
|
||||
{
|
||||
--targfreq[i];
|
||||
--totaltrans;
|
||||
}
|
||||
else if ( sym >= 'a' && sym <= 'z' )
|
||||
{
|
||||
++targfreq[i];
|
||||
++totaltrans;
|
||||
}
|
||||
}
|
||||
|
||||
++totaltrans;
|
||||
duplist[sym] = NIL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
GEN.C 438: (gen_next_compressed_state(): I have rewritten the function
|
||||
'yy_try_NUL_trans()' so it really just tries to find out whether a
|
||||
transition on the NUL character goes to the jamstate or not. ( That means
|
||||
I removed each creation of backtracking information and the saving of the
|
||||
new state on the 'yy_state_buf[]'. )
|
||||
Therefore I removed the call for 'gen_backtracking()' here, because the
|
||||
function 'gen_next_compressed_state()' is also used in 'gen_NUL_trans()'.)
|
||||
|
||||
/* gen_backtracking(); */
|
||||
|
||||
GEN.C 587ff: (gen_next_state(): Since the backtracking information is not
|
||||
created in 'gen_next_compressed_state()' any more, it is done here
|
||||
before the next state is computed ( for "compressed" tables ). This
|
||||
removes the bug that the backtracking information is created twice if
|
||||
'nultrans' is not NULL and 'gen_next_compressed_state()' is called.
|
||||
Finally I had to insert the creation of a "{" and a "}", because there
|
||||
is a local variable created in 'gen_next_compressed_state()'. ( These are
|
||||
needed only when backtracking information is really created.) )
|
||||
|
||||
if ( ! fulltbl && ! fullspd )
|
||||
gen_backtracking();
|
||||
|
||||
if ( worry_about_NULs && nultrans )
|
||||
{
|
||||
indent_puts( "if ( *yy_cp )" );
|
||||
indent_up();
|
||||
indent_puts( "{" );
|
||||
}
|
||||
else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 )
|
||||
indent_puts( "{" );
|
||||
|
||||
if ( fulltbl )
|
||||
indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];",
|
||||
char_map );
|
||||
|
||||
else if ( fullspd )
|
||||
indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;",
|
||||
char_map );
|
||||
|
||||
else
|
||||
gen_next_compressed_state( char_map );
|
||||
|
||||
if ( worry_about_NULs && nultrans )
|
||||
{
|
||||
indent_puts( "}" );
|
||||
indent_down();
|
||||
indent_puts( "else" );
|
||||
indent_up();
|
||||
|
||||
indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" );
|
||||
indent_down();
|
||||
}
|
||||
else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 )
|
||||
indent_puts( "}" );
|
||||
|
||||
if ( fullspd || fulltbl )
|
||||
gen_backtracking();
|
||||
|
||||
if ( reject )
|
||||
indent_puts( "*yy_state_ptr++ = yy_current_state;" );
|
||||
}
|
||||
|
||||
GEN.C 553: (gen_next_match(): There is a problem if 'interactive' is true. In
|
||||
this case the scanner jams if the next state is the jamstate ( i.e.
|
||||
yy_base[yy_current_state] == jambase ). However the scanner reaches also
|
||||
the jamstate if the transition character is the NUL-character or if the
|
||||
end of the buffer is reached. Then in the EOB-action is decided whether
|
||||
this was really a NUL character or the end-of-buffer. ( If it was a NUL,
|
||||
scanning will be resumed. If it was the end-of-buffer, the buffer will be
|
||||
filled first, before scanning will be resumed. )
|
||||
These actions are not done if you use an 'interactive' scanner, because
|
||||
the EOB-action is not executed. Therefore you have to continue scanning,
|
||||
if you have just matched a NUL character ( i.e. *yy_cp == '\0' and
|
||||
yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) and if you are not
|
||||
already in the yamstate ( i.e. yy_current_state != jamstate ).
|
||||
Note that the '<' in " yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] "
|
||||
implies that the EOB action is *not* executed if the last match before the
|
||||
end-of-buffer was maximal.
|
||||
The following change in the algorithm results in a minor performance
|
||||
penalty because the additional conditions are tested only if you have
|
||||
reached the end of the match or if you are using NUL characters in your
|
||||
patterns.)
|
||||
|
||||
if ( interactive )
|
||||
{
|
||||
printf( "while ( yy_base[yy_current_state] != %d\n", jambase );
|
||||
set_indent( 4 );
|
||||
indent_puts( "|| ( *yy_cp == '\\0'" );
|
||||
indent_up();
|
||||
indent_puts(
|
||||
" && yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" );
|
||||
do_indent();
|
||||
printf( " && yy_current_state != %d ) );\n", jamstate );
|
||||
set_indent( 2 );
|
||||
}
|
||||
else
|
||||
printf( "while ( yy_current_state != %d );\n", jamstate );
|
||||
|
||||
GEN.C 341: (gen_find_action(): Question: The variables 'yy_full_match',
|
||||
'yy_full_state' and 'yy_full_lp' are used only in the REJECT macro. Why
|
||||
do you not also test here on 'real_reject' before you create code to set
|
||||
these variables ( like you did in line 327ff for the action of the case
|
||||
" ( yy_act & YY_TRAILING_MASK ) " ) ?)
|
||||
|
||||
New code beginning at line 338 to show the context:
|
||||
indent_puts( "else" );
|
||||
indent_up();
|
||||
indent_puts( "{" );
|
||||
|
||||
if ( real_reject )
|
||||
{
|
||||
/* remember matched text in case we back up due to REJECT */
|
||||
indent_puts( "yy_full_match = yy_cp;" );
|
||||
indent_puts( "yy_full_state = yy_state_ptr;" );
|
||||
indent_puts( "yy_full_lp = yy_lp;" );
|
||||
}
|
||||
|
||||
indent_puts( "break;" );
|
||||
indent_puts( "}" );
|
||||
indent_down();
|
||||
|
||||
indent_puts( "++yy_lp;" );
|
||||
indent_puts( "goto find_rule;" );
|
||||
}
|
||||
|
||||
|
||||
FLEX.SKEL 364,379: (YY_END_OF_BUFFER action: If it was really a NUL character
|
||||
which started this action, then 'yy_bp' points still at the beginning of
|
||||
the current run and 'yy_c_buf_p' points behind the NUL character.
|
||||
Contrast this with the situation after the call of 'yy_get_next_buffer()'!
|
||||
Therefore I removed the statement " yy_bp = yytext + YY_MORE_ADJ; "
|
||||
( line 379 ) and replaced the statement
|
||||
" yy_c_buf_p = yytext + yy_amount_of_matched_text; " ( line 364 ) by the
|
||||
easier one " yy_c_buf_p = --yy_cp; ". Here 'yy_cp' is also adjusted.
|
||||
This guarantees that both 'yy_c_buf_p' and 'yy_cp' point at the NUL
|
||||
character. Therefore 'yy_cp' will have the correct value when it is needed
|
||||
after the call to 'yy_try_NUL_trans()' ( when we know whether we make a
|
||||
transition or not ).
|
||||
|
||||
line 364:
|
||||
yy_c_buf_p = --yy_cp;
|
||||
|
||||
line 379:
|
||||
/* yy_bp = yytext + YY_MORE_ADJ; */
|
||||
|
||||
GEN.C 632: (gen_NUL_trans(): I have rewritten 'yy_try_NUL_trans()'. The new
|
||||
version just finds out whether a transition on the NUL character goes to
|
||||
the jamstate or not. See also my remarks to 'gen_next_compressed_state()'.
|
||||
Note that the test " yy_is_jam = (yy_current_state == jamstate); " is
|
||||
also used, if 'interactive' is true. Otherwise 'yy_try_NUL_trans()' would
|
||||
return 0, if the NUL character was the last character of a pattern
|
||||
( e.g. "x\0" ), and we therefore would not reach the last state.
|
||||
Remark: Change also the comment in FLEX.SKEL for this function.)
|
||||
|
||||
FLEX.SKEL, line 583:
|
||||
%% code to find the next state goes here
|
||||
|
||||
GEN.C, line 632ff:
|
||||
/* int need_backtracking = (num_backtracking > 0 && ! reject);
|
||||
|
||||
if ( need_backtracking )
|
||||
/ * we'll need yy_cp lying around for the gen_backtracking() * /
|
||||
indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); */
|
||||
|
||||
GEN.C, line 674ff:
|
||||
/* if ( reject )
|
||||
indent_puts( "*yy_state_ptr++ = yy_current_state;" ); */
|
||||
|
||||
do_indent();
|
||||
|
||||
/* if ( interactive )
|
||||
printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n",
|
||||
jambase );
|
||||
else */
|
||||
printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate );
|
||||
}
|
||||
|
||||
/* if we've entered an accepting state, backtrack; note that
|
||||
* compressed tables have *already* done such backtracking, so
|
||||
* we needn't bother with it again
|
||||
*/
|
||||
/* if ( need_backtracking && (fullspd || fulltbl) )
|
||||
{
|
||||
putchar( '\n' );
|
||||
indent_puts( "if ( ! yy_is_jam )" );
|
||||
indent_up();
|
||||
indent_puts( "{" );
|
||||
gen_backtracking();
|
||||
indent_puts( "}" );
|
||||
indent_down();
|
||||
} */
|
||||
}
|
||||
|
||||
GEN.C 1293: (make_tables(): The changed functionality of 'yy_try_NUL_trans()'
|
||||
implies changes in the EOB action. If the next state 'yy_next_state' is 0
|
||||
( i.e. the jamstate ), you can immediately jump to 'yy_find_action'.
|
||||
Remember that 'yy_cp' was already adjusted to point at the NUL !
|
||||
Also you must not use the backtracking information because the actual
|
||||
state 'yy_current_state' may be an accepting state.
|
||||
If 'yy_next_state' is not the jamstate, we make a transition on the NUL.
|
||||
This requires the following actions:
|
||||
- Create backtracking information for compressed tables *before* we make
|
||||
the transition on NUL.
|
||||
- Now increment 'yy_cp' and set 'yy_current_state' to 'yy_next_state'.
|
||||
( Note that 'yy_cp' points at the NUL up to now. )
|
||||
- Save the new state on the stack 'yy_state_buf[]' if 'reject' is true.
|
||||
- Create backtracking information *after* the transition, if 'fulltbl'
|
||||
or 'fullspd' is true.
|
||||
- Finally decide, if 'interactive' is true, whether scanning should be
|
||||
resumed at 'yy_match' or whether we have reached a final state and
|
||||
should jump to 'yy_find_action'. (Condition like in 'gen_next_match()'.)
|
||||
If 'interactive' is false, just resume scanning.)
|
||||
|
||||
Corresponding code in FLEX.SKEL beginning at line 381:
|
||||
if ( yy_next_state )
|
||||
{
|
||||
/* consume the NUL */
|
||||
%% code to do backtracking for compressed tables and set up yy_cp goes here
|
||||
}
|
||||
else
|
||||
goto yy_find_action;
|
||||
|
||||
Code in GEN.C beginning at line 1293:
|
||||
/* first, deal with backtracking and setting up yy_cp if the scanner
|
||||
* finds that it should JAM on the NUL
|
||||
*/
|
||||
skelout();
|
||||
set_indent( 6 );
|
||||
|
||||
if ( ! fulltbl && ! fullspd )
|
||||
gen_backtracking();
|
||||
|
||||
indent_puts( "++yy_cp;" );
|
||||
indent_puts( "yy_current_state = yy_next_state;" );
|
||||
|
||||
if ( reject )
|
||||
indent_puts( "*yy_state_ptr++ = yy_current_state;" );
|
||||
|
||||
if ( fulltbl || fullspd )
|
||||
gen_backtracking();
|
||||
|
||||
if ( interactive )
|
||||
{
|
||||
do_indent();
|
||||
printf( "if ( yy_base[yy_current_state] != %d\n", jambase );
|
||||
indent_up();
|
||||
indent_puts( "|| ( *yy_cp == '\\0'" );
|
||||
indent_puts( "&& yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" );
|
||||
do_indent();
|
||||
printf( "&& yy_current_state != %d ) )\n", jamstate );
|
||||
indent_puts( "goto yy_match;" );
|
||||
indent_down();
|
||||
indent_puts( "else" );
|
||||
indent_up();
|
||||
indent_puts( "goto yy_find_action;" );
|
||||
indent_down();
|
||||
}
|
||||
else
|
||||
indent_puts( "goto yy_match;" );
|
||||
|
||||
/* if ( fullspd || fulltbl )
|
||||
indent_puts( "yy_cp = yy_c_buf_p;" );
|
||||
|
||||
else
|
||||
{ / * compressed table * /
|
||||
if ( ! reject && ! interactive )
|
||||
{
|
||||
/ * do the guaranteed-needed backtrack to figure out the match * /
|
||||
indent_puts( "yy_cp = yy_last_accepting_cpos;" );
|
||||
indent_puts( "yy_current_state = yy_last_accepting_state;" );
|
||||
}
|
||||
} */
|
||||
|
||||
FLEX.SKEL 513: (yy_get_next_buffer(): Here is an error if 'yymore()' is active
|
||||
in the last match (i.e. yy_doing_yy_more == 1 and yy_more_len > 0). Then
|
||||
'number_to_move' will be (1 + yy_more_len), i.e. the previous character
|
||||
plus the additional characters for using 'yymore()'.)
|
||||
|
||||
if ( number_to_move == 1 + YY_MORE_ADJ )
|
||||
{
|
||||
ret_val = EOB_ACT_END_OF_FILE;
|
||||
yy_current_buffer->yy_eof_status = EOF_DONE;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
ret_val = EOB_ACT_LAST_MATCH;
|
||||
yy_current_buffer->yy_eof_status = EOF_PENDING;
|
||||
}
|
||||
}
|
||||
|
||||
GEN.C 1317: (make_tables(): In the generation of 'yy_get_previous_state()' the
|
||||
variable 'yy_bp' must be set to 'yytext + YY_MORE_ADJ' if 'bol_needed' is
|
||||
true. Otherwise 'yy_bp' points eventually at the beginning of the
|
||||
yymore-prefix instead of the current run.)
|
||||
|
||||
if ( bol_needed )
|
||||
indent_puts( "register YY_CHAR *yy_bp = yytext + YY_MORE_ADJ;\n" );
|
||||
|
||||
FLEX.SKEL 589ff: (yyunput(): The function 'yyunput()' should be rewritten.
|
||||
First of all the example for 'unput()' in file flexdoc doesn't work:
|
||||
{
|
||||
int i;
|
||||
unput( ')' );
|
||||
for ( i = yyleng - 1; i >= 0; --i )
|
||||
unput( yytext[i] );
|
||||
unput( '(' );
|
||||
}
|
||||
The actual version of 'yyunput()' modifies 'yyleng'. Therefore 'yyleng' is
|
||||
decremented by " unput( ')' ) " and the pattern to be pushed back has lost
|
||||
its last character. To avoid this just copy the 'yytext'-string and
|
||||
'yyleng' before you call 'unput()'.
|
||||
Another point is that 'yytext' and 'yyleng' could be maintained in a
|
||||
better way. ( Up to now 'yyleng' can become negative ! )
|
||||
I think it's better to say that the pushed back pattern should fulfill
|
||||
the beginning-of-line-condition if and only if the old pattern does
|
||||
( excluding a possibly existing 'yymore'-prefix ! ).
|
||||
Up to now you have problems if there is a 'yymore'-prefix, because
|
||||
'yytext' will be corrupted by YY_DO_BEFORE_ACTION. ( This macro sets
|
||||
'yytext' to 'yy_bp - yy_more_len', but our 'yy_bp' points already at the
|
||||
beginning of the 'yymore'-prefix. )
|
||||
|
||||
My version of 'yyunput()' reduces the 'yytext'-string by 1 for every
|
||||
pushed back character and decrements 'yyleng' until 'yytext' is the empty
|
||||
string. The beginning-of-line-condition is preserved when 'bol_needed' is
|
||||
true. ( Then the character before the current run is copied in front of
|
||||
the pushed back character. ) If there is a 'yymore'-prefix, 'yy_more_len'
|
||||
will be decremented if 'yy_cp' reaches the beginning of the current run.
|
||||
|
||||
Remark: The parameter 'yytext' in " yyunput( c, yytext ) " is not really
|
||||
necessary since 'yytext' is a global variable. You could also set
|
||||
" register YY_CHAR *yy_bp = yytext; " at the beginning of 'yyunput()'.)
|
||||
|
||||
Replace lines 622 - 623 in FLEX.SKEL:
|
||||
|
||||
if ( yy_cp > yy_bp && yy_cp[-1] == '\n' )
|
||||
yy_cp[-2] = '\n';
|
||||
|
||||
by
|
||||
|
||||
%% code to adjust yy_bp and yy_more_len goes here
|
||||
|
||||
Add in GEN.C a function 'gen_yyunput()':
|
||||
/* generate code to adjust yy_bp and yy_more_len in yyunput
|
||||
*/
|
||||
|
||||
void gen_yyunput()
|
||||
|
||||
{
|
||||
if ( yymore_used )
|
||||
indent_puts( "yy_bp += YY_MORE_ADJ;\n" );
|
||||
|
||||
if ( bol_needed )
|
||||
indent_puts( "yy_cp[-2] = yy_bp[-1];\n" );
|
||||
|
||||
if ( yymore_used )
|
||||
{
|
||||
indent_puts( "if ( (yy_cp == yy_bp) && YY_MORE_ADJ )" );
|
||||
indent_up();
|
||||
indent_puts( "--yy_more_len;" );
|
||||
indent_down();
|
||||
indent_puts( "else" );
|
||||
indent_up();
|
||||
indent_puts( "--yy_bp;" );
|
||||
indent_down();
|
||||
}
|
||||
else
|
||||
indent_puts( "--yy_bp;" );
|
||||
}
|
||||
|
||||
Finally add in the function 'make_tables()' behind the call of
|
||||
'gen_NUL_trans()' in line 1328:
|
||||
|
||||
skelout();
|
||||
gen_yyunput();
|
||||
|
||||
FLEX.SKEL 642,658: (input(): There is an error in 'input()' if the end of
|
||||
'yy_current_buffer' is reached and 'yymore' is active. Then
|
||||
'yy_get_next_buffer()' is called which function assumes that 'yytext'
|
||||
points at the beginning of the 'yymore'-prefix. This function can't
|
||||
recognize the end of the input stream correctly and therefore returns
|
||||
EOB_ACT_LAST_MATCH instead of EOB_ACT_END_OF_FILE. Also if the end of
|
||||
the input file isn't reached yet (EOB_ACT_CONTINUE_SCAN) at least one
|
||||
character will be lost.
|
||||
To avoid this error just turn off 'yy_doing_yy_more'. Then you need
|
||||
not to adjust with YY_MORE_ADJ in lines 667 and 682. However you have to
|
||||
use a function 'gen_input()', because 'yy_doing_yy_more' does not exist
|
||||
if 'yymore_used' is false.
|
||||
|
||||
( Another solution is to adjust 'yytext':
|
||||
" yytext = yy_c_buf_p - YY_MORE_ADJ; ", line 658. )
|
||||
|
||||
I think the trick with "yy_did_buffer_switch_on_eof" should be done here
|
||||
the same way as in the YY_END_OF_BUFFER action.
|
||||
Finally I removed the variable 'yy_cp' and used 'yy_c_buf_p' instead.)
|
||||
|
||||
#ifdef __cplusplus
|
||||
static int yyinput()
|
||||
#else
|
||||
static int input()
|
||||
#endif
|
||||
|
||||
{
|
||||
int c;
|
||||
|
||||
*yy_c_buf_p = yy_hold_char; /* yy_cp not needed */
|
||||
|
||||
if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
|
||||
{
|
||||
/* yy_c_buf_p now points to the character we want to return.
|
||||
* If this occurs *before* the EOB characters, then it's a
|
||||
* valid NUL; if not, then we've hit the end of the buffer.
|
||||
*/
|
||||
if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
|
||||
/* this was really a NUL */
|
||||
*yy_c_buf_p = '\0';
|
||||
|
||||
else
|
||||
{ /* need more input */
|
||||
%% code to turn off yy_doing_yy_more and yy_more_len goes here
|
||||
yytext = yy_c_buf_p;
|
||||
++yy_c_buf_p;
|
||||
|
||||
switch ( yy_get_next_buffer() )
|
||||
{
|
||||
case EOB_ACT_END_OF_FILE:
|
||||
{
|
||||
yy_did_buffer_switch_on_eof = 0;
|
||||
|
||||
if ( yywrap() )
|
||||
{
|
||||
yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */
|
||||
return ( EOF );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( ! yy_did_buffer_switch_on_eof )
|
||||
YY_NEW_FILE;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
return ( yyinput() );
|
||||
#else
|
||||
return ( input() );
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
case EOB_ACT_CONTINUE_SCAN:
|
||||
yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */
|
||||
break;
|
||||
|
||||
case EOB_ACT_LAST_MATCH:
|
||||
#ifdef __cplusplus
|
||||
YY_FATAL_ERROR( "unexpected last match in yyinput()" );
|
||||
#else
|
||||
YY_FATAL_ERROR( "unexpected last match in input()" );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c = *yy_c_buf_p;
|
||||
yy_hold_char = *++yy_c_buf_p;
|
||||
|
||||
return ( c );
|
||||
}
|
||||
|
||||
Add in GEN.C a function 'gen_input()':
|
||||
/* generate code to turn off yy_doing_yy_more and yy_more_len in input
|
||||
*/
|
||||
|
||||
void gen_input()
|
||||
|
||||
{
|
||||
if ( yymore_used )
|
||||
indent_puts( "yy_doing_yy_more = yy_more_len = 0;" );
|
||||
}
|
||||
|
||||
Finally add in the function 'make_tables()' behind the call of
|
||||
'gen_yyunput()':
|
||||
|
||||
set_indent( 3 );
|
||||
skelout();
|
||||
gen_input();
|
||||
|
||||
PARSE.Y 54: ( 'goal'-rule: If there is no rule in the input file, the end of
|
||||
the prolog is not marked yet, because 'flexscan()' is still in the start
|
||||
condition <SECT2PROLOG> and the rule <SECT2PROLOG><<EOF>> is not done up
|
||||
to now. Therefore mark the end of prolog here, before you add the default
|
||||
rule. I test here on " num_rules == 1 ", because the 'initforrule'-rule
|
||||
increments 'num_rules' before this action is executed.)
|
||||
|
||||
if ( num_rules == 1 )
|
||||
fprintf( temp_action_file, "%%%% end of prolog\n" )
|
||||
;
|
||||
|
||||
SCAN.L 255: ( '<SECT2PROLOG><<EOF>>'-rule: If there are no rules at all in
|
||||
the input file, then this rule will be executed at the end of
|
||||
'make_tables()'. At this point 'temp_action_file' was closed for writing
|
||||
and has been reopened for reading. The macro MARK_END_OF_PROLOG will
|
||||
therefore lead to a write-error.
|
||||
To avoid this error add the condition " if ( num_rules == 0 ) ". If this
|
||||
rule is executed at the end of 'make_tables()' there will be at least the
|
||||
default rule, i.e. 'num_rules' will be greater than 0.
|
||||
Remark: This correction together with the one before will allow an input
|
||||
file which just consists of "%%". ( Copy 'stdin' to 'stdout'. ))
|
||||
|
||||
<SECT2PROLOG><<EOF>> {
|
||||
if ( num_rules == 0 )
|
||||
MARK_END_OF_PROLOG;
|
||||
yyterminate();
|
||||
}
|
||||
|
||||
MISC.C 376: ( flexfatal(): The call of 'flexend( 1 )' will lead to an
|
||||
infinite loop if 'flexfatal()' is called from 'flexend()'. I therefore
|
||||
introduced the flag 'doing_flexend' to prevent 'flexend()' to be called
|
||||
more than once.)
|
||||
|
||||
Replace the function call 'flexend( 1 );' in GEN.C, line 376, by
|
||||
if ( ! doing_flexend )
|
||||
flexend( 1 );
|
||||
|
||||
Set 'doing_flexend' at the beginning of 'flexend()' in MAIN.C, line 195:
|
||||
doing_flexend = true;
|
||||
|
||||
Add in FLEXDEF.H, line 381, the declaration of 'doing_flexend':
|
||||
extern int yymore_used, reject, real_reject, continued_action, doing_flexend;
|
||||
|
||||
Add in FLEXDEF.H, line 376, a comment for this variable:
|
||||
* doing_flexend - true if flexend() has been started
|
||||
|
||||
Initialize 'doing_flexend' in 'flexinit()' in MAIN.C, line 401:
|
||||
yymore_used = continued_action = reject = doing_flexend = false;
|
||||
|
||||
FLEX.SKEL 94: ( 'YY_INPUT()'-macro: I have problems with 'fileno()' and
|
||||
'read()'.
|
||||
I used the C Compiler of the BORLAND C++ Compiler and compiled the created
|
||||
scanner with the option 'ANSI keywords'.
|
||||
In this compiler the prototype of the function 'read(...)' is declared in
|
||||
the header file 'io.h' and not in 'stdio.h'. Therefore I get a warning.
|
||||
Real trouble caused 'fileno' which is defined as macro in 'stdio.h':
|
||||
#define fileno(f) ((f)->fd)
|
||||
However this macro does not belong to the 'ANSI keywords' because it is
|
||||
define'd under the condition " #if !__STDC__ ". Therefore I get a warning
|
||||
and a linker error that the function 'fileno()' does not exist.
|
||||
(I can avoid this problem by adding the above define-macro in the *.l file
|
||||
or by replacing the option 'ANSI keywords' by 'Borland C++ keywords'.))
|
||||
123
to.do/Wish-List
Normal file
123
to.do/Wish-List
Normal file
@ -0,0 +1,123 @@
|
||||
start conditions given own name space by making them structure fields
|
||||
#define BEGIN(x) yy_start_state = yy_states->x
|
||||
reentrant/
|
||||
streams/
|
||||
yylineno maintained per input buffer
|
||||
use yyconst instead of const, to fix __STDC__ == 0 problem
|
||||
scan input for unput()
|
||||
-CF/-Cf support interactive scanners
|
||||
reject_really_used -> maintain_backup_tables
|
||||
full library encapsulation: flex'ing on the fly
|
||||
fix MAX_MNS/MARKER_DIFFERENCE to not be a hard limit
|
||||
Two flags to warn when something is seen that lex or posix might interpret
|
||||
differently; this should be quite doable as -l already exists. Proposed
|
||||
names: -Wl, -Wp.
|
||||
reentrant C scanners
|
||||
yy_fseek() for positioning in input file
|
||||
set-able "at beginning of line" , no more unput() trashes yytext?
|
||||
yy_unput_string(); unput() shifts yytext to preserve it, grows buffer as needed
|
||||
yy_malloc_type as void* so can be easily switched to char* for poor
|
||||
hopeless bastards running SunSoft stuff?
|
||||
public "TODO" file, requesting help?
|
||||
test -P to make sure it's not broken now due to e.g. yy_scan_string
|
||||
%option
|
||||
hook for treating input interactively even if not isatty()
|
||||
scan.l:22:error message :-( (see flex.todo)
|
||||
document yy_fill_buffer
|
||||
lint, gcc-lint
|
||||
-lfl removed from flex.1
|
||||
merge 2.4.6, e.g., NEWS
|
||||
'|' action copies action instead of omitting break
|
||||
if yy_current_buffer defined on entry to yylex(), don't promote nil yyin
|
||||
to stdin, etc.
|
||||
multibyte character flex
|
||||
|
||||
|
||||
ANSI only
|
||||
multiple inclusion of <stdlib.h>?
|
||||
[=...=] POSIX stuff
|
||||
+flex.todo
|
||||
yylineno, yycol by checking for whether rules can match embedded newlines,
|
||||
only trailing newlines, always trailing newlines, or no newlines
|
||||
compute transition path to each DFA state, to aid in backtracking
|
||||
for each state, store pointer to predecessor, character for xtion
|
||||
merge flex.1, flexdoc.1?
|
||||
bison++ interface
|
||||
YYLEXER_NAME
|
||||
out-line FlexLexer destructors
|
||||
GNU readline contrib?
|
||||
isatty() decl?
|
||||
#ifdef chud for unput() etc. not being used?
|
||||
"../scan.l", line 207: warning: ::yy_did_buffer_switch_on_eof defined but not used
|
||||
cc -c -g scan.c
|
||||
"scan.cc", line 1752: warning: statement not reached
|
||||
alloca.c removed from Makefile
|
||||
// comments
|
||||
|
||||
output partitioning for e.g., scanning tables, actions, etc.
|
||||
|
||||
497 09/11 14:17-PDT 3450 To:t_bonner@oscar Re: Modifying yytext in an actio
|
||||
MISC stuff non-writeable
|
||||
texinfo version of manual
|
||||
|
||||
ALSO: document how to do so (including no need to redefine unput()),
|
||||
whether feature added or not
|
||||
|
||||
example of "error" backtracking rules as opposed to "catch-all"
|
||||
get rid of get_previous_state via accepting #'s tied to previous state #'s
|
||||
-p tells something about backtracking
|
||||
easy way to scan strings instead of files
|
||||
input() across buffer boundaries, buffer overflow; unput() fix
|
||||
start state stack
|
||||
NLSTATE - sets "in newline" state; also mechanism to clear "in newline" state
|
||||
checks for bogus backtrack rules ... - rule shadowing
|
||||
document incompatibility with lex when unput()'ing a newline
|
||||
after a newline has been read
|
||||
document that comments are not allowed on definition lines
|
||||
foo bar /* the "foo" definition ... */
|
||||
perhaps indented code in section 2 leads to warnings?
|
||||
#line directives for code at beginning of scanner routine
|
||||
nuke %used etc.
|
||||
hooks for direct access to the buffer, e.g. for flushing it
|
||||
options in .l file as well as on command line; particularly the rename-prefix
|
||||
option
|
||||
clarify "eat up * not followed by /" in <comment> example; move it to
|
||||
performance, offer simpler version for start states
|
||||
hook for finding out how much text can be safely pushed back
|
||||
the .backtrack code knows how to identify characters that cause transitions
|
||||
(you wanted this for some clearer error messages for the
|
||||
"default rule can be matched")
|
||||
yy_switch_to_buffer sets yy_init to 0?
|
||||
handy library routines, such as yy_C_comment(), yy_C_string(),
|
||||
obey #line directives in input; first, get rid of # comments ...
|
||||
flex.h header for declarations of e.g., yymore(), yytext?
|
||||
but what about %array making the yytext definition out of date?
|
||||
merge w/ okeeffe code
|
||||
rearrange the Performance Considerations section so that the easy
|
||||
fixes come first
|
||||
copyright notice in manuals?
|
||||
input() updates yytext and yyleng; perhaps unput too???;
|
||||
right now it trashes them (doesn't restore '\0')
|
||||
document that yyleng can now be modified
|
||||
except if yymore() used?
|
||||
anchoring allowed inside ()'s - (^abc|def$)
|
||||
unput() propagates non-newline state too?
|
||||
complain about invalid anchoring - foo(^abc), (^abc)+
|
||||
library in its own directory
|
||||
yylineno
|
||||
example in flexdoc on YY_INPUT reading from input()
|
||||
redesign for retargetability (i.e., use w/ other languages ...)
|
||||
clean up escape expansion
|
||||
bison @N
|
||||
example for doc. on scanning strings w/ escapes in them:
|
||||
POSIX/
|
||||
get rid of duplicated code between "re2 re" rule and "re '$'" rule
|
||||
preformatted man pages for VMS sites, possibly using col -b to get rid
|
||||
of backspaces ...
|
||||
slurp entire input file into mega-buffer; allows pointers to in-place
|
||||
identifiers
|
||||
lex compatibility flag
|
||||
update flags in docs
|
||||
-n removed from POSIX?
|
||||
"MAKE = ..." shouldn't be commented out, or else bigtest can fail
|
||||
BSD man macros
|
||||
3182
to.do/flex.rmail
Normal file
3182
to.do/flex.rmail
Normal file
File diff suppressed because it is too large
Load Diff
195
to.do/unicode/FlexLexer.h
Normal file
195
to.do/unicode/FlexLexer.h
Normal file
@ -0,0 +1,195 @@
|
||||
// $Header$
|
||||
|
||||
// FlexLexer.h -- define interfaces for lexical analyzer classes generated
|
||||
// by flex
|
||||
|
||||
// Copyright (c) 1993 The Regents of the University of California.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is derived from software contributed to Berkeley by
|
||||
// Kent Williams and Tom Epperly.
|
||||
//
|
||||
// Redistribution and use in source and binary forms are permitted provided
|
||||
// that: (1) source distributions retain this entire copyright notice and
|
||||
// comment, and (2) distributions including binaries display the following
|
||||
// acknowledgement: ``This product includes software developed by the
|
||||
// University of California, Berkeley and its contributors'' in the
|
||||
// documentation or other materials provided with the distribution and in
|
||||
// all advertising materials mentioning features or use of this software.
|
||||
// Neither the name of the University nor the names of its contributors may
|
||||
// be used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
// This file defines FlexLexer, an abstract class which specifies the
|
||||
// external interface provided to flex C++ lexer objects, and yyFlexLexer,
|
||||
// which defines a particular lexer class.
|
||||
//
|
||||
// If you want to create multiple lexer classes, you use the -P flag
|
||||
// to rename each yyFlexLexer to some other xxFlexLexer. You then
|
||||
// include <FlexLexer.h> in your other sources once per lexer class:
|
||||
//
|
||||
// #undef yyFlexLexer
|
||||
// #define yyFlexLexer xxFlexLexer
|
||||
// #include <FlexLexer.h>
|
||||
//
|
||||
// #undef yyFlexLexer
|
||||
// #define yyFlexLexer zzFlexLexer
|
||||
// #include <FlexLexer.h>
|
||||
// ...
|
||||
//
|
||||
// Since this header is generic for all sizes of flex scanners, you must
|
||||
// define the type YY_CHAR before including it:
|
||||
//
|
||||
// typedef xxx YY_CHAR;
|
||||
// #include <FlexLexer.h>
|
||||
// ...
|
||||
//
|
||||
// where xxx = char for 7-bit scanners, unsigned char for 8-bit, and
|
||||
// wchar_t for 16-bit.
|
||||
|
||||
#ifndef __FLEX_LEXER_H
|
||||
// Never included before - need to define base class.
|
||||
#define __FLEX_LEXER_H
|
||||
#include <iostream.h>
|
||||
|
||||
extern "C++" {
|
||||
|
||||
struct yy_buffer_state;
|
||||
typedef int yy_state_type;
|
||||
|
||||
class FlexLexer {
|
||||
public:
|
||||
virtual ~FlexLexer() { }
|
||||
|
||||
const YY_CHAR* YYText() { return yytext; }
|
||||
int YYLeng() { return yyleng; }
|
||||
|
||||
virtual void
|
||||
yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0;
|
||||
virtual struct yy_buffer_state*
|
||||
yy_create_buffer( istream* s, int size ) = 0;
|
||||
virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0;
|
||||
virtual void yyrestart( istream* s ) = 0;
|
||||
|
||||
virtual int yylex() = 0;
|
||||
|
||||
// Call yylex with new input/output sources.
|
||||
int yylex( istream* new_in, ostream* new_out = 0 )
|
||||
{
|
||||
switch_streams( new_in, new_out );
|
||||
return yylex();
|
||||
}
|
||||
|
||||
// Switch to new input/output streams. A nil stream pointer
|
||||
// indicates "keep the current one".
|
||||
virtual void switch_streams( istream* new_in = 0,
|
||||
ostream* new_out = 0 ) = 0;
|
||||
|
||||
int lineno() const { return yylineno; }
|
||||
|
||||
int debug() const { return yy_flex_debug; }
|
||||
void set_debug( int flag ) { yy_flex_debug = flag; }
|
||||
|
||||
protected:
|
||||
YY_CHAR* yytext;
|
||||
int yyleng;
|
||||
int yylineno; // only maintained if you use %option yylineno
|
||||
int yy_flex_debug; // only has effect with -d or "%option debug"
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(yyFlexLexer) || ! defined(yyFlexLexerOnce)
|
||||
// Either this is the first time through (yyFlexLexerOnce not defined),
|
||||
// or this is a repeated include to define a different flavor of
|
||||
// yyFlexLexer, as discussed in the flex man page.
|
||||
#define yyFlexLexerOnce
|
||||
|
||||
class yyFlexLexer : public FlexLexer {
|
||||
public:
|
||||
// arg_yyin and arg_yyout default to the cin and cout, but we
|
||||
// only make that assignment when initializing in yylex().
|
||||
yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 );
|
||||
|
||||
virtual ~yyFlexLexer();
|
||||
|
||||
void yy_switch_to_buffer( struct yy_buffer_state* new_buffer );
|
||||
struct yy_buffer_state* yy_create_buffer( istream* s, int size );
|
||||
void yy_delete_buffer( struct yy_buffer_state* b );
|
||||
void yyrestart( istream* s );
|
||||
|
||||
virtual int yylex();
|
||||
virtual void switch_streams( istream* new_in, ostream* new_out );
|
||||
|
||||
protected:
|
||||
virtual int LexerInput( YY_CHAR* buf, int max_size );
|
||||
virtual void LexerOutput( const YY_CHAR* buf, int size );
|
||||
virtual void LexerError( const char* msg );
|
||||
|
||||
void yyunput( int c, YY_CHAR* buf_ptr );
|
||||
int yyinput();
|
||||
|
||||
void yy_load_buffer_state();
|
||||
void yy_init_buffer( struct yy_buffer_state* b, istream* s );
|
||||
void yy_flush_buffer( struct yy_buffer_state* b );
|
||||
|
||||
int yy_start_stack_ptr;
|
||||
int yy_start_stack_depth;
|
||||
int* yy_start_stack;
|
||||
|
||||
void yy_push_state( int new_state );
|
||||
void yy_pop_state();
|
||||
int yy_top_state();
|
||||
|
||||
yy_state_type yy_get_previous_state();
|
||||
yy_state_type yy_try_NUL_trans( yy_state_type current_state );
|
||||
int yy_get_next_buffer();
|
||||
|
||||
istream* yyin; // input source for default LexerInput
|
||||
ostream* yyout; // output sink for default LexerOutput
|
||||
|
||||
struct yy_buffer_state* yy_current_buffer;
|
||||
|
||||
// yy_hold_char holds the character lost when yytext is formed.
|
||||
YY_CHAR yy_hold_char;
|
||||
|
||||
// Number of characters read into yy_ch_buf.
|
||||
int yy_n_chars;
|
||||
|
||||
// Points to current character in buffer.
|
||||
YY_CHAR* yy_c_buf_p;
|
||||
|
||||
int yy_init; // whether we need to initialize
|
||||
int yy_start; // start state number
|
||||
|
||||
// Flag which is used to allow yywrap()'s to do buffer switches
|
||||
// instead of setting up a fresh yyin. A bit of a hack ...
|
||||
int yy_did_buffer_switch_on_eof;
|
||||
|
||||
// The following are not always needed, but may be depending
|
||||
// on use of certain flex features (like REJECT or yymore()).
|
||||
|
||||
yy_state_type yy_last_accepting_state;
|
||||
YY_CHAR* yy_last_accepting_cpos;
|
||||
|
||||
yy_state_type* yy_state_buf;
|
||||
yy_state_type* yy_state_ptr;
|
||||
|
||||
YY_CHAR* yy_full_match;
|
||||
int* yy_full_state;
|
||||
int yy_full_lp;
|
||||
|
||||
int yy_lp;
|
||||
int yy_looking_for_trail_begin;
|
||||
|
||||
int yy_more_flag;
|
||||
int yy_more_len;
|
||||
int yy_more_offset;
|
||||
int yy_prev_more_offset;
|
||||
};
|
||||
|
||||
#endif
|
||||
149
to.do/unicode/ccl.c
Normal file
149
to.do/unicode/ccl.c
Normal file
@ -0,0 +1,149 @@
|
||||
/* ccl - routines for character classes */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* $Header$ */
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
/* ccladd - add a single character to a ccl */
|
||||
|
||||
void ccladd( cclp, ch )
|
||||
int cclp;
|
||||
int ch;
|
||||
{
|
||||
int ind, len, newpos, i;
|
||||
|
||||
check_char( ch );
|
||||
|
||||
len = ccllen[cclp];
|
||||
ind = cclmap[cclp];
|
||||
|
||||
/* check to see if the character is already in the ccl */
|
||||
|
||||
for ( i = 0; i < len; ++i )
|
||||
if ( ccltbl[ind + i] == ch )
|
||||
return;
|
||||
|
||||
newpos = ind + len;
|
||||
|
||||
if ( newpos >= current_max_ccl_tbl_size )
|
||||
{
|
||||
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
ccltbl = reallocate_wchar_array( ccltbl,
|
||||
current_max_ccl_tbl_size );
|
||||
}
|
||||
|
||||
ccllen[cclp] = len + 1;
|
||||
ccltbl[newpos] = ch;
|
||||
}
|
||||
|
||||
|
||||
/* cclinit - return an empty ccl */
|
||||
|
||||
int cclinit()
|
||||
{
|
||||
if ( ++lastccl >= current_maxccls )
|
||||
{
|
||||
current_maxccls += MAX_CCLS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
cclmap = reallocate_integer_array( cclmap, current_maxccls );
|
||||
ccllen = reallocate_integer_array( ccllen, current_maxccls );
|
||||
cclng = reallocate_integer_array( cclng, current_maxccls );
|
||||
}
|
||||
|
||||
if ( lastccl == 1 )
|
||||
/* we're making the first ccl */
|
||||
cclmap[lastccl] = 0;
|
||||
|
||||
else
|
||||
/* The new pointer is just past the end of the last ccl.
|
||||
* Since the cclmap points to the \first/ character of a
|
||||
* ccl, adding the length of the ccl to the cclmap pointer
|
||||
* will produce a cursor to the first free space.
|
||||
*/
|
||||
cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
|
||||
|
||||
ccllen[lastccl] = 0;
|
||||
cclng[lastccl] = 0; /* ccl's start out life un-negated */
|
||||
|
||||
return lastccl;
|
||||
}
|
||||
|
||||
|
||||
/* cclnegate - negate the given ccl */
|
||||
|
||||
void cclnegate( cclp )
|
||||
int cclp;
|
||||
{
|
||||
cclng[cclp] = 1;
|
||||
}
|
||||
|
||||
|
||||
/* list_character_set - list the members of a set of characters in CCL form
|
||||
*
|
||||
* Writes to the given file a character-class representation of those
|
||||
* characters present in the given CCL. A character is present if it
|
||||
* has a non-zero value in the cset array.
|
||||
*/
|
||||
|
||||
void list_character_set( file, cset )
|
||||
FILE *file;
|
||||
int cset[];
|
||||
{
|
||||
register int i;
|
||||
|
||||
putc( '[', file );
|
||||
|
||||
for ( i = 0; i < csize; ++i )
|
||||
{
|
||||
if ( cset[i] )
|
||||
{
|
||||
register int start_char = i;
|
||||
|
||||
putc( ' ', file );
|
||||
|
||||
fputs( readable_form( i ), file );
|
||||
|
||||
while ( ++i < csize && cset[i] )
|
||||
;
|
||||
|
||||
if ( i - 1 > start_char )
|
||||
/* this was a run */
|
||||
fprintf( file, "-%s", readable_form( i - 1 ) );
|
||||
|
||||
putc( ' ', file );
|
||||
}
|
||||
}
|
||||
|
||||
putc( ']', file );
|
||||
}
|
||||
102
to.do/unicode/changes.txt
Normal file
102
to.do/unicode/changes.txt
Normal file
@ -0,0 +1,102 @@
|
||||
Summary of changes for flex Unicode support
|
||||
|
||||
- ccl.c
|
||||
- ccladd()
|
||||
- changed call to reallocate_Character_array to reallocate_wchar_array
|
||||
|
||||
- ecs.c
|
||||
- mkeccl()
|
||||
- changed type of ccls from Char to wchar_t
|
||||
|
||||
- flex.1
|
||||
- added description of -U option
|
||||
- added extra qualifier to -Ca option regarding usage with -U
|
||||
- modified -C, -Cf, and -CF options regarding usage with -U
|
||||
|
||||
- flex.skl
|
||||
- changed all references of char (except error messages) to YY_CHAR
|
||||
- added new insertion point for defining YY_CHAR and YY_SC_TO_UI()
|
||||
- yy_scan_bytes()
|
||||
- renamed to yy_scan_chars to avoid confusion with 2-byte chars
|
||||
- renamed param bytes to chars
|
||||
- ECHO
|
||||
- redefined C version as fwrite(yytext, sizeof(YY_CHAR), yyleng, yyout)
|
||||
- YY_INPUT
|
||||
- removed char* cast on param buf of C++ version
|
||||
- yyFlexLexer::LexerInput()
|
||||
- changed get() call to read((unsigned char *) buf, sizeof(YY_CHAR))
|
||||
- changed read() call to read((unsigned char *) buf, max_size *
|
||||
sizeof(YY_CHAR))
|
||||
- changed gcount() call to gcount() / sizeof(YY_CHAR)
|
||||
- yyFlexLexer::LexerOutput()
|
||||
- changed write() call to write((unsigned char *) buf, size *
|
||||
sizeof(YY_CHAR))
|
||||
- yy_get_next_buffer()
|
||||
- yy_flex_realloc() call
|
||||
- changed param b->yy_buf_size + 2 to
|
||||
(b->yy_buf_size + 2) * sizeof(YY_CHAR)
|
||||
- input() and yyFlexLexer::yyinput()
|
||||
- changed line c = *(unsigned char *) yy_c_buf_p; to
|
||||
c = YY_SC_TO_UI(*yy_c_buf_p);
|
||||
|
||||
- flexdef.h
|
||||
- defined CSIZE as 65536
|
||||
- changed myesc() proto to return int
|
||||
- changed type of ccltbl from Char * to wchar_t *
|
||||
- added allocate_wchar_array() and reallocate_wchar_array() macros
|
||||
- changed mkeccl() proto's first param to wchar_t[]
|
||||
- changed cshell() proto's first param to wchar_t[]
|
||||
|
||||
|
||||
- FlexLexer.h
|
||||
- changed all references of char (except error messages) to YY_CHAR
|
||||
- added description about typedef'ing YY_CHAR before inclusion
|
||||
|
||||
- gen.c
|
||||
- changed appropriate references of char in output strings to YY_CHAR
|
||||
- added C_uchar_decl and C_ushort_decl for 16-bit yy_ec type bump-down
|
||||
- genecs()
|
||||
- added code for 16-bit yy_ec type bump; -Ca bumps type to long
|
||||
- make_tables()
|
||||
- YY_INPUT
|
||||
- read redefined as read(..., max_size * sizeof(YY_CHAR))
|
||||
- added code to use getwc() and WEOF for 16-bit interactive
|
||||
- fread redefined as fread(buf, sizeof(YY_CHAR), max_size, yyin)
|
||||
|
||||
- main.c
|
||||
- changed type of ccltbl from Char * to wchar_t *
|
||||
- check_options()
|
||||
- changed default csize from CSIZE to 256 due to redef of CSIZE
|
||||
- added code to check for options incompatible with -U
|
||||
- added code to ouput typedef of YY_CHAR to skeleton, plus extra call to
|
||||
skelout() to get down to original insertion point
|
||||
- flexend()
|
||||
- added code to print "U" when printing stats
|
||||
- flexinit()
|
||||
- added code set csize for option -U
|
||||
- changed assignment of csize in option -8 from CSIZE to 256 due to redef
|
||||
of CSIZE
|
||||
- readin()
|
||||
- changed appropriate references of char in output strings to YY_CHAR
|
||||
- removed output of YY_CHAR typedef; now located in check_options()
|
||||
- usage()
|
||||
- added fprintf for -U usage
|
||||
|
||||
- misc.c
|
||||
- check_char()
|
||||
- added code to distinguish chars needing -8 and -U flags
|
||||
- cshell()
|
||||
- changed type of v from Char to wchar_t
|
||||
- changed type of k from Char to wchar_t
|
||||
- myesc()
|
||||
- now returns an int to handle 16-bit escape sequences
|
||||
- changed esc_char from Char to unsigned int as per htoi() and otoi()
|
||||
|
||||
- scan.l
|
||||
- changed ESCSEQ to accept 6 digit octal escapes and 4 digit hex escapes
|
||||
- removed myesc() and ndlookup() protos
|
||||
- added option "16bit"
|
||||
|
||||
- tblcmp.c
|
||||
- mktemplate
|
||||
- changed type of transset from Char to wchar_t
|
||||
225
to.do/unicode/ecs.c
Normal file
225
to.do/unicode/ecs.c
Normal file
@ -0,0 +1,225 @@
|
||||
/* ecs - equivalence class routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* $Header$ */
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
/* ccl2ecl - convert character classes to set of equivalence classes */
|
||||
|
||||
void ccl2ecl()
|
||||
{
|
||||
int i, ich, newlen, cclp, ccls, cclmec;
|
||||
|
||||
for ( i = 1; i <= lastccl; ++i )
|
||||
{
|
||||
/* We loop through each character class, and for each character
|
||||
* in the class, add the character's equivalence class to the
|
||||
* new "character" class we are creating. Thus when we are all
|
||||
* done, character classes will really consist of collections
|
||||
* of equivalence classes
|
||||
*/
|
||||
|
||||
newlen = 0;
|
||||
cclp = cclmap[i];
|
||||
|
||||
for ( ccls = 0; ccls < ccllen[i]; ++ccls )
|
||||
{
|
||||
ich = ccltbl[cclp + ccls];
|
||||
cclmec = ecgroup[ich];
|
||||
|
||||
if ( cclmec > 0 )
|
||||
{
|
||||
ccltbl[cclp + newlen] = cclmec;
|
||||
++newlen;
|
||||
}
|
||||
}
|
||||
|
||||
ccllen[i] = newlen;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* cre8ecs - associate equivalence class numbers with class members
|
||||
*
|
||||
* fwd is the forward linked-list of equivalence class members. bck
|
||||
* is the backward linked-list, and num is the number of class members.
|
||||
*
|
||||
* Returned is the number of classes.
|
||||
*/
|
||||
|
||||
int cre8ecs( fwd, bck, num )
|
||||
int fwd[], bck[], num;
|
||||
{
|
||||
int i, j, numcl;
|
||||
|
||||
numcl = 0;
|
||||
|
||||
/* Create equivalence class numbers. From now on, ABS( bck(x) )
|
||||
* is the equivalence class number for object x. If bck(x)
|
||||
* is positive, then x is the representative of its equivalence
|
||||
* class.
|
||||
*/
|
||||
for ( i = 1; i <= num; ++i )
|
||||
if ( bck[i] == NIL )
|
||||
{
|
||||
bck[i] = ++numcl;
|
||||
for ( j = fwd[i]; j != NIL; j = fwd[j] )
|
||||
bck[j] = -numcl;
|
||||
}
|
||||
|
||||
return numcl;
|
||||
}
|
||||
|
||||
|
||||
/* mkeccl - update equivalence classes based on character class xtions
|
||||
*
|
||||
* synopsis
|
||||
* Char ccls[];
|
||||
* int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
|
||||
* void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
|
||||
* int llsiz, int NUL_mapping );
|
||||
*
|
||||
* ccls contains the elements of the character class, lenccl is the
|
||||
* number of elements in the ccl, fwd is the forward link-list of equivalent
|
||||
* characters, bck is the backward link-list, and llsiz size of the link-list.
|
||||
*
|
||||
* NUL_mapping is the value which NUL (0) should be mapped to.
|
||||
*/
|
||||
|
||||
void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
|
||||
wchar_t ccls[];
|
||||
int lenccl, fwd[], bck[], llsiz, NUL_mapping;
|
||||
{
|
||||
int cclp, oldec, newec;
|
||||
int cclm, i, j;
|
||||
static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */
|
||||
|
||||
/* Note that it doesn't matter whether or not the character class is
|
||||
* negated. The same results will be obtained in either case.
|
||||
*/
|
||||
|
||||
cclp = 0;
|
||||
|
||||
while ( cclp < lenccl )
|
||||
{
|
||||
cclm = ccls[cclp];
|
||||
|
||||
if ( NUL_mapping && cclm == 0 )
|
||||
cclm = NUL_mapping;
|
||||
|
||||
oldec = bck[cclm];
|
||||
newec = cclm;
|
||||
|
||||
j = cclp + 1;
|
||||
|
||||
for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
|
||||
{ /* look for the symbol in the character class */
|
||||
for ( ; j < lenccl; ++j )
|
||||
{
|
||||
register int ccl_char;
|
||||
|
||||
if ( NUL_mapping && ccls[j] == 0 )
|
||||
ccl_char = NUL_mapping;
|
||||
else
|
||||
ccl_char = ccls[j];
|
||||
|
||||
if ( ccl_char > i )
|
||||
break;
|
||||
|
||||
if ( ccl_char == i && ! cclflags[j] )
|
||||
{
|
||||
/* We found an old companion of cclm
|
||||
* in the ccl. Link it into the new
|
||||
* equivalence class and flag it as
|
||||
* having been processed.
|
||||
*/
|
||||
|
||||
bck[i] = newec;
|
||||
fwd[newec] = i;
|
||||
newec = i;
|
||||
/* Set flag so we don't reprocess. */
|
||||
cclflags[j] = 1;
|
||||
|
||||
/* Get next equivalence class member. */
|
||||
/* continue 2 */
|
||||
goto next_pt;
|
||||
}
|
||||
}
|
||||
|
||||
/* Symbol isn't in character class. Put it in the old
|
||||
* equivalence class.
|
||||
*/
|
||||
|
||||
bck[i] = oldec;
|
||||
|
||||
if ( oldec != NIL )
|
||||
fwd[oldec] = i;
|
||||
|
||||
oldec = i;
|
||||
|
||||
next_pt: ;
|
||||
}
|
||||
|
||||
if ( bck[cclm] != NIL || oldec != bck[cclm] )
|
||||
{
|
||||
bck[cclm] = NIL;
|
||||
fwd[oldec] = NIL;
|
||||
}
|
||||
|
||||
fwd[newec] = NIL;
|
||||
|
||||
/* Find next ccl member to process. */
|
||||
|
||||
for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
|
||||
{
|
||||
/* Reset "doesn't need processing" flag. */
|
||||
cclflags[cclp] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkechar - create equivalence class for single character */
|
||||
|
||||
void mkechar( tch, fwd, bck )
|
||||
int tch, fwd[], bck[];
|
||||
{
|
||||
/* If until now the character has been a proper subset of
|
||||
* an equivalence class, break it away to create a new ec
|
||||
*/
|
||||
|
||||
if ( fwd[tch] != NIL )
|
||||
bck[fwd[tch]] = bck[tch];
|
||||
|
||||
if ( bck[tch] != NIL )
|
||||
fwd[bck[tch]] = fwd[tch];
|
||||
|
||||
fwd[tch] = NIL;
|
||||
bck[tch] = NIL;
|
||||
}
|
||||
4099
to.do/unicode/flex.1
Normal file
4099
to.do/unicode/flex.1
Normal file
File diff suppressed because it is too large
Load Diff
1542
to.do/unicode/flex.skl
Normal file
1542
to.do/unicode/flex.skl
Normal file
File diff suppressed because it is too large
Load Diff
1062
to.do/unicode/flexdef.h
Normal file
1062
to.do/unicode/flexdef.h
Normal file
File diff suppressed because it is too large
Load Diff
1650
to.do/unicode/gen.c
Normal file
1650
to.do/unicode/gen.c
Normal file
File diff suppressed because it is too large
Load Diff
1228
to.do/unicode/main.c
Normal file
1228
to.do/unicode/main.c
Normal file
File diff suppressed because it is too large
Load Diff
894
to.do/unicode/misc.c
Normal file
894
to.do/unicode/misc.c
Normal file
@ -0,0 +1,894 @@
|
||||
/* misc - miscellaneous flex routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* $Header$ */
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
|
||||
void action_define( defname, value )
|
||||
char *defname;
|
||||
int value;
|
||||
{
|
||||
char buf[MAXLINE];
|
||||
|
||||
if ( (int) strlen( defname ) > MAXLINE / 2 )
|
||||
{
|
||||
format_pinpoint_message( _( "name \"%s\" ridiculously long" ),
|
||||
defname );
|
||||
return;
|
||||
}
|
||||
|
||||
sprintf( buf, "#define %s %d\n", defname, value );
|
||||
add_action( buf );
|
||||
}
|
||||
|
||||
|
||||
void add_action( new_text )
|
||||
char *new_text;
|
||||
{
|
||||
int len = strlen( new_text );
|
||||
|
||||
while ( len + action_index >= action_size - 10 /* slop */ )
|
||||
{
|
||||
int new_size = action_size * 2;
|
||||
|
||||
if ( new_size <= 0 )
|
||||
/* Increase just a little, to try to avoid overflow
|
||||
* on 16-bit machines.
|
||||
*/
|
||||
action_size += action_size / 8;
|
||||
else
|
||||
action_size = new_size;
|
||||
|
||||
action_array =
|
||||
reallocate_character_array( action_array, action_size );
|
||||
}
|
||||
|
||||
strcpy( &action_array[action_index], new_text );
|
||||
|
||||
action_index += len;
|
||||
}
|
||||
|
||||
|
||||
/* allocate_array - allocate memory for an integer array of the given size */
|
||||
|
||||
void *allocate_array( size, element_size )
|
||||
int size;
|
||||
size_t element_size;
|
||||
{
|
||||
register void *mem;
|
||||
size_t num_bytes = element_size * size;
|
||||
|
||||
mem = flex_alloc( num_bytes );
|
||||
if ( ! mem )
|
||||
flexfatal(
|
||||
_( "memory allocation failed in allocate_array()" ) );
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
|
||||
/* all_lower - true if a string is all lower-case */
|
||||
|
||||
int all_lower( str )
|
||||
register char *str;
|
||||
{
|
||||
while ( *str )
|
||||
{
|
||||
if ( ! isascii( (Char) *str ) || ! islower( *str ) )
|
||||
return 0;
|
||||
++str;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* all_upper - true if a string is all upper-case */
|
||||
|
||||
int all_upper( str )
|
||||
register char *str;
|
||||
{
|
||||
while ( *str )
|
||||
{
|
||||
if ( ! isascii( (Char) *str ) || ! isupper( *str ) )
|
||||
return 0;
|
||||
++str;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* bubble - bubble sort an integer array in increasing order
|
||||
*
|
||||
* synopsis
|
||||
* int v[n], n;
|
||||
* void bubble( v, n );
|
||||
*
|
||||
* description
|
||||
* sorts the first n elements of array v and replaces them in
|
||||
* increasing order.
|
||||
*
|
||||
* passed
|
||||
* v - the array to be sorted
|
||||
* n - the number of elements of 'v' to be sorted
|
||||
*/
|
||||
|
||||
void bubble( v, n )
|
||||
int v[], n;
|
||||
{
|
||||
register int i, j, k;
|
||||
|
||||
for ( i = n; i > 1; --i )
|
||||
for ( j = 1; j < i; ++j )
|
||||
if ( v[j] > v[j + 1] ) /* compare */
|
||||
{
|
||||
k = v[j]; /* exchange */
|
||||
v[j] = v[j + 1];
|
||||
v[j + 1] = k;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* check_char - checks a character to make sure it's within the range
|
||||
* we're expecting. If not, generates fatal error message
|
||||
* and exits.
|
||||
*/
|
||||
|
||||
void check_char( c )
|
||||
int c;
|
||||
{
|
||||
if ( c >= CSIZE )
|
||||
lerrsf( _( "bad character '%s' detected in check_char()" ),
|
||||
readable_form( c ) );
|
||||
|
||||
if ( c >= csize )
|
||||
{
|
||||
if ( c < 256 )
|
||||
lerrsf(
|
||||
_( "scanner requires -8 flag to use the character %s" ),
|
||||
readable_form( c ) );
|
||||
else
|
||||
lerrsf(
|
||||
_( "scanner requires -U flag to use the character %s" ),
|
||||
readable_form( c ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* clower - replace upper-case letter to lower-case */
|
||||
|
||||
Char clower( c )
|
||||
register int c;
|
||||
{
|
||||
return (Char) ((isascii( c ) && isupper( c )) ? tolower( c ) : c);
|
||||
}
|
||||
|
||||
|
||||
/* copy_string - returns a dynamically allocated copy of a string */
|
||||
|
||||
char *copy_string( str )
|
||||
register const char *str;
|
||||
{
|
||||
register const char *c1;
|
||||
register char *c2;
|
||||
char *copy;
|
||||
unsigned int size;
|
||||
|
||||
/* find length */
|
||||
for ( c1 = str; *c1; ++c1 )
|
||||
;
|
||||
|
||||
size = (c1 - str + 1) * sizeof( char );
|
||||
copy = (char *) flex_alloc( size );
|
||||
|
||||
if ( copy == NULL )
|
||||
flexfatal( _( "dynamic memory failure in copy_string()" ) );
|
||||
|
||||
for ( c2 = copy; (*c2++ = *str++) != 0; )
|
||||
;
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
/* copy_unsigned_string -
|
||||
* returns a dynamically allocated copy of a (potentially) unsigned string
|
||||
*/
|
||||
|
||||
Char *copy_unsigned_string( str )
|
||||
register Char *str;
|
||||
{
|
||||
register Char *c;
|
||||
Char *copy;
|
||||
|
||||
/* find length */
|
||||
for ( c = str; *c; ++c )
|
||||
;
|
||||
|
||||
copy = allocate_Character_array( c - str + 1 );
|
||||
|
||||
for ( c = copy; (*c++ = *str++) != 0; )
|
||||
;
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
||||
/* cshell - shell sort a character array in increasing order
|
||||
*
|
||||
* synopsis
|
||||
*
|
||||
* Char v[n];
|
||||
* int n, special_case_0;
|
||||
* cshell( v, n, special_case_0 );
|
||||
*
|
||||
* description
|
||||
* Does a shell sort of the first n elements of array v.
|
||||
* If special_case_0 is true, then any element equal to 0
|
||||
* is instead assumed to have infinite weight.
|
||||
*
|
||||
* passed
|
||||
* v - array to be sorted
|
||||
* n - number of elements of v to be sorted
|
||||
*/
|
||||
|
||||
void cshell( v, n, special_case_0 )
|
||||
wchar_t v[];
|
||||
int n, special_case_0;
|
||||
{
|
||||
int gap, i, j, jg;
|
||||
wchar_t k;
|
||||
|
||||
for ( gap = n / 2; gap > 0; gap = gap / 2 )
|
||||
for ( i = gap; i < n; ++i )
|
||||
for ( j = i - gap; j >= 0; j = j - gap )
|
||||
{
|
||||
jg = j + gap;
|
||||
|
||||
if ( special_case_0 )
|
||||
{
|
||||
if ( v[jg] == 0 )
|
||||
break;
|
||||
|
||||
else if ( v[j] != 0 && v[j] <= v[jg] )
|
||||
break;
|
||||
}
|
||||
|
||||
else if ( v[j] <= v[jg] )
|
||||
break;
|
||||
|
||||
k = v[j];
|
||||
v[j] = v[jg];
|
||||
v[jg] = k;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* dataend - finish up a block of data declarations */
|
||||
|
||||
void dataend()
|
||||
{
|
||||
if ( datapos > 0 )
|
||||
dataflush();
|
||||
|
||||
/* add terminator for initialization; { for vi */
|
||||
outn( " } ;\n" );
|
||||
|
||||
dataline = 0;
|
||||
datapos = 0;
|
||||
}
|
||||
|
||||
|
||||
/* dataflush - flush generated data statements */
|
||||
|
||||
void dataflush()
|
||||
{
|
||||
outc( '\n' );
|
||||
|
||||
if ( ++dataline >= NUMDATALINES )
|
||||
{
|
||||
/* Put out a blank line so that the table is grouped into
|
||||
* large blocks that enable the user to find elements easily.
|
||||
*/
|
||||
outc( '\n' );
|
||||
dataline = 0;
|
||||
}
|
||||
|
||||
/* Reset the number of characters written on the current line. */
|
||||
datapos = 0;
|
||||
}
|
||||
|
||||
|
||||
/* flexerror - report an error message and terminate */
|
||||
|
||||
void flexerror( msg )
|
||||
const char msg[];
|
||||
{
|
||||
fprintf( stderr, "%s: %s\n", program_name, msg );
|
||||
flexend( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* flexfatal - report a fatal error message and terminate */
|
||||
|
||||
void flexfatal( msg )
|
||||
const char msg[];
|
||||
{
|
||||
fprintf( stderr, _( "%s: fatal internal error, %s\n" ),
|
||||
program_name, msg );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
|
||||
/* htoi - convert a hexadecimal digit string to an integer value */
|
||||
|
||||
int htoi( str )
|
||||
Char str[];
|
||||
{
|
||||
unsigned int result;
|
||||
|
||||
(void) sscanf( (char *) str, "%x", &result );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* lerrif - report an error message formatted with one integer argument */
|
||||
|
||||
void lerrif( msg, arg )
|
||||
const char msg[];
|
||||
int arg;
|
||||
{
|
||||
char errmsg[MAXLINE];
|
||||
(void) sprintf( errmsg, msg, arg );
|
||||
flexerror( errmsg );
|
||||
}
|
||||
|
||||
|
||||
/* lerrsf - report an error message formatted with one string argument */
|
||||
|
||||
void lerrsf( msg, arg )
|
||||
const char msg[], arg[];
|
||||
{
|
||||
char errmsg[MAXLINE];
|
||||
|
||||
(void) sprintf( errmsg, msg, arg );
|
||||
flexerror( errmsg );
|
||||
}
|
||||
|
||||
|
||||
/* line_directive_out - spit out a "#line" statement */
|
||||
|
||||
void line_directive_out( output_file, do_infile )
|
||||
FILE *output_file;
|
||||
int do_infile;
|
||||
{
|
||||
char directive[MAXLINE], filename[MAXLINE];
|
||||
char *s1, *s2, *s3;
|
||||
static char line_fmt[] = "#line %d \"%s\"\n";
|
||||
|
||||
if ( ! gen_line_dirs )
|
||||
return;
|
||||
|
||||
if ( (do_infile && ! infilename) || (! do_infile && ! outfilename) )
|
||||
/* don't know the filename to use, skip */
|
||||
return;
|
||||
|
||||
s1 = do_infile ? infilename : outfilename;
|
||||
s2 = filename;
|
||||
s3 = &filename[sizeof( filename ) - 2];
|
||||
|
||||
while ( s2 < s3 && *s1 )
|
||||
{
|
||||
if ( *s1 == '\\' )
|
||||
/* Escape the '\' */
|
||||
*s2++ = '\\';
|
||||
|
||||
*s2++ = *s1++;
|
||||
}
|
||||
|
||||
*s2 = '\0';
|
||||
|
||||
if ( do_infile )
|
||||
sprintf( directive, line_fmt, linenum, filename );
|
||||
else
|
||||
{
|
||||
if ( output_file == stdout )
|
||||
/* Account for the line directive itself. */
|
||||
++out_linenum;
|
||||
|
||||
sprintf( directive, line_fmt, out_linenum, filename );
|
||||
}
|
||||
|
||||
/* If output_file is nil then we should put the directive in
|
||||
* the accumulated actions.
|
||||
*/
|
||||
if ( output_file )
|
||||
{
|
||||
fputs( directive, output_file );
|
||||
}
|
||||
else
|
||||
add_action( directive );
|
||||
}
|
||||
|
||||
|
||||
/* mark_defs1 - mark the current position in the action array as
|
||||
* representing where the user's section 1 definitions end
|
||||
* and the prolog begins
|
||||
*/
|
||||
void mark_defs1()
|
||||
{
|
||||
defs1_offset = 0;
|
||||
action_array[action_index++] = '\0';
|
||||
action_offset = prolog_offset = action_index;
|
||||
action_array[action_index] = '\0';
|
||||
}
|
||||
|
||||
|
||||
/* mark_prolog - mark the current position in the action array as
|
||||
* representing the end of the action prolog
|
||||
*/
|
||||
void mark_prolog()
|
||||
{
|
||||
action_array[action_index++] = '\0';
|
||||
action_offset = action_index;
|
||||
action_array[action_index] = '\0';
|
||||
}
|
||||
|
||||
|
||||
/* mk2data - generate a data statement for a two-dimensional array
|
||||
*
|
||||
* Generates a data statement initializing the current 2-D array to "value".
|
||||
*/
|
||||
void mk2data( value )
|
||||
int value;
|
||||
{
|
||||
if ( datapos >= NUMDATAITEMS )
|
||||
{
|
||||
outc( ',' );
|
||||
dataflush();
|
||||
}
|
||||
|
||||
if ( datapos == 0 )
|
||||
/* Indent. */
|
||||
out( " " );
|
||||
|
||||
else
|
||||
outc( ',' );
|
||||
|
||||
++datapos;
|
||||
|
||||
out_dec( "%5d", value );
|
||||
}
|
||||
|
||||
|
||||
/* mkdata - generate a data statement
|
||||
*
|
||||
* Generates a data statement initializing the current array element to
|
||||
* "value".
|
||||
*/
|
||||
void mkdata( value )
|
||||
int value;
|
||||
{
|
||||
if ( datapos >= NUMDATAITEMS )
|
||||
{
|
||||
outc( ',' );
|
||||
dataflush();
|
||||
}
|
||||
|
||||
if ( datapos == 0 )
|
||||
/* Indent. */
|
||||
out( " " );
|
||||
else
|
||||
outc( ',' );
|
||||
|
||||
++datapos;
|
||||
|
||||
out_dec( "%5d", value );
|
||||
}
|
||||
|
||||
|
||||
/* myctoi - return the integer represented by a string of digits */
|
||||
|
||||
int myctoi( array )
|
||||
char array[];
|
||||
{
|
||||
int val = 0;
|
||||
|
||||
(void) sscanf( array, "%d", &val );
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
/* myesc - return character corresponding to escape sequence */
|
||||
|
||||
int myesc( array )
|
||||
Char array[];
|
||||
{
|
||||
Char c;
|
||||
unsigned int esc_char;
|
||||
|
||||
switch ( array[1] )
|
||||
{
|
||||
case 'b': return '\b';
|
||||
case 'f': return '\f';
|
||||
case 'n': return '\n';
|
||||
case 'r': return '\r';
|
||||
case 't': return '\t';
|
||||
|
||||
#if __STDC__
|
||||
case 'a': return '\a';
|
||||
case 'v': return '\v';
|
||||
#else
|
||||
case 'a': return '\007';
|
||||
case 'v': return '\013';
|
||||
#endif
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
{ /* \<octal> */
|
||||
int sptr = 1;
|
||||
|
||||
while ( isascii( array[sptr] ) &&
|
||||
isdigit( array[sptr] ) )
|
||||
/* Don't increment inside loop control
|
||||
* because if isdigit() is a macro it might
|
||||
* expand into multiple increments ...
|
||||
*/
|
||||
++sptr;
|
||||
|
||||
c = array[sptr];
|
||||
array[sptr] = '\0';
|
||||
|
||||
esc_char = otoi( array + 1 );
|
||||
|
||||
array[sptr] = c;
|
||||
|
||||
return esc_char;
|
||||
}
|
||||
|
||||
case 'x':
|
||||
{ /* \x<hex> */
|
||||
int sptr = 2;
|
||||
|
||||
while ( isascii( array[sptr] ) &&
|
||||
isxdigit( (char) array[sptr] ) )
|
||||
/* Don't increment inside loop control
|
||||
* because if isdigit() is a macro it might
|
||||
* expand into multiple increments ...
|
||||
*/
|
||||
++sptr;
|
||||
|
||||
c = array[sptr];
|
||||
array[sptr] = '\0';
|
||||
|
||||
esc_char = htoi( array + 2 );
|
||||
|
||||
array[sptr] = c;
|
||||
|
||||
return esc_char;
|
||||
}
|
||||
|
||||
default:
|
||||
return array[1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* otoi - convert an octal digit string to an integer value */
|
||||
|
||||
int otoi( str )
|
||||
Char str[];
|
||||
{
|
||||
unsigned int result;
|
||||
|
||||
(void) sscanf( (char *) str, "%o", &result );
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* out - various flavors of outputing a (possibly formatted) string for the
|
||||
* generated scanner, keeping track of the line count.
|
||||
*/
|
||||
|
||||
void out( str )
|
||||
const char str[];
|
||||
{
|
||||
fputs( str, stdout );
|
||||
out_line_count( str );
|
||||
}
|
||||
|
||||
void out_dec( fmt, n )
|
||||
const char fmt[];
|
||||
int n;
|
||||
{
|
||||
printf( fmt, n );
|
||||
out_line_count( fmt );
|
||||
}
|
||||
|
||||
void out_dec2( fmt, n1, n2 )
|
||||
const char fmt[];
|
||||
int n1, n2;
|
||||
{
|
||||
printf( fmt, n1, n2 );
|
||||
out_line_count( fmt );
|
||||
}
|
||||
|
||||
void out_hex( fmt, x )
|
||||
const char fmt[];
|
||||
unsigned int x;
|
||||
{
|
||||
printf( fmt, x );
|
||||
out_line_count( fmt );
|
||||
}
|
||||
|
||||
void out_line_count( str )
|
||||
const char str[];
|
||||
{
|
||||
register int i;
|
||||
|
||||
for ( i = 0; str[i]; ++i )
|
||||
if ( str[i] == '\n' )
|
||||
++out_linenum;
|
||||
}
|
||||
|
||||
void out_str( fmt, str )
|
||||
const char fmt[], str[];
|
||||
{
|
||||
printf( fmt, str );
|
||||
out_line_count( fmt );
|
||||
out_line_count( str );
|
||||
}
|
||||
|
||||
void out_str3( fmt, s1, s2, s3 )
|
||||
const char fmt[], s1[], s2[], s3[];
|
||||
{
|
||||
printf( fmt, s1, s2, s3 );
|
||||
out_line_count( fmt );
|
||||
out_line_count( s1 );
|
||||
out_line_count( s2 );
|
||||
out_line_count( s3 );
|
||||
}
|
||||
|
||||
void out_str_dec( fmt, str, n )
|
||||
const char fmt[], str[];
|
||||
int n;
|
||||
{
|
||||
printf( fmt, str, n );
|
||||
out_line_count( fmt );
|
||||
out_line_count( str );
|
||||
}
|
||||
|
||||
void outc( c )
|
||||
int c;
|
||||
{
|
||||
putc( c, stdout );
|
||||
|
||||
if ( c == '\n' )
|
||||
++out_linenum;
|
||||
}
|
||||
|
||||
void outn( str )
|
||||
const char str[];
|
||||
{
|
||||
puts( str );
|
||||
out_line_count( str );
|
||||
++out_linenum;
|
||||
}
|
||||
|
||||
|
||||
/* readable_form - return the the human-readable form of a character
|
||||
*
|
||||
* The returned string is in static storage.
|
||||
*/
|
||||
|
||||
char *readable_form( c )
|
||||
register int c;
|
||||
{
|
||||
static char rform[10];
|
||||
|
||||
if ( (c >= 0 && c < 32) || c >= 127 )
|
||||
{
|
||||
switch ( c )
|
||||
{
|
||||
case '\b': return "\\b";
|
||||
case '\f': return "\\f";
|
||||
case '\n': return "\\n";
|
||||
case '\r': return "\\r";
|
||||
case '\t': return "\\t";
|
||||
|
||||
#if __STDC__
|
||||
case '\a': return "\\a";
|
||||
case '\v': return "\\v";
|
||||
#endif
|
||||
|
||||
default:
|
||||
(void) sprintf( rform, "\\%.3o",
|
||||
(unsigned int) c );
|
||||
return rform;
|
||||
}
|
||||
}
|
||||
|
||||
else if ( c == ' ' )
|
||||
return "' '";
|
||||
|
||||
else
|
||||
{
|
||||
rform[0] = c;
|
||||
rform[1] = '\0';
|
||||
|
||||
return rform;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* reallocate_array - increase the size of a dynamic array */
|
||||
|
||||
void *reallocate_array( array, size, element_size )
|
||||
void *array;
|
||||
int size;
|
||||
size_t element_size;
|
||||
{
|
||||
register void *new_array;
|
||||
size_t num_bytes = element_size * size;
|
||||
|
||||
new_array = flex_realloc( array, num_bytes );
|
||||
if ( ! new_array )
|
||||
flexfatal( _( "attempt to increase array size failed" ) );
|
||||
|
||||
return new_array;
|
||||
}
|
||||
|
||||
|
||||
/* skelout - write out one section of the skeleton file
|
||||
*
|
||||
* Description
|
||||
* Copies skelfile or skel array to stdout until a line beginning with
|
||||
* "%%" or EOF is found.
|
||||
*/
|
||||
void skelout()
|
||||
{
|
||||
char buf_storage[MAXLINE];
|
||||
char *buf = buf_storage;
|
||||
int do_copy = 1;
|
||||
|
||||
/* Loop pulling lines either from the skelfile, if we're using
|
||||
* one, or from the skel[] array.
|
||||
*/
|
||||
while ( skelfile ?
|
||||
(fgets( buf, MAXLINE, skelfile ) != NULL) :
|
||||
((buf = (char *) skel[skel_ind++]) != 0) )
|
||||
{ /* copy from skel array */
|
||||
if ( buf[0] == '%' )
|
||||
{ /* control line */
|
||||
switch ( buf[1] )
|
||||
{
|
||||
case '%':
|
||||
return;
|
||||
|
||||
case '+':
|
||||
do_copy = C_plus_plus;
|
||||
break;
|
||||
|
||||
case '-':
|
||||
do_copy = ! C_plus_plus;
|
||||
break;
|
||||
|
||||
case '*':
|
||||
do_copy = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
flexfatal(
|
||||
_( "bad line in skeleton file" ) );
|
||||
}
|
||||
}
|
||||
|
||||
else if ( do_copy )
|
||||
{
|
||||
if ( skelfile )
|
||||
/* Skeleton file reads include final
|
||||
* newline, skel[] array does not.
|
||||
*/
|
||||
out( buf );
|
||||
else
|
||||
outn( buf );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* transition_struct_out - output a yy_trans_info structure
|
||||
*
|
||||
* outputs the yy_trans_info structure with the two elements, element_v and
|
||||
* element_n. Formats the output with spaces and carriage returns.
|
||||
*/
|
||||
|
||||
void transition_struct_out( element_v, element_n )
|
||||
int element_v, element_n;
|
||||
{
|
||||
out_dec2( " {%4d,%4d },", element_v, element_n );
|
||||
|
||||
datapos += TRANS_STRUCT_PRINT_LENGTH;
|
||||
|
||||
if ( datapos >= 79 - TRANS_STRUCT_PRINT_LENGTH )
|
||||
{
|
||||
outc( '\n' );
|
||||
|
||||
if ( ++dataline % 10 == 0 )
|
||||
outc( '\n' );
|
||||
|
||||
datapos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* The following is only needed when building flex's parser using certain
|
||||
* broken versions of bison.
|
||||
*/
|
||||
void *yy_flex_xmalloc( size )
|
||||
int size;
|
||||
{
|
||||
void *result = flex_alloc( (size_t) size );
|
||||
|
||||
if ( ! result )
|
||||
flexfatal(
|
||||
_( "memory allocation failed in yy_flex_xmalloc()" ) );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* zero_out - set a region of memory to 0
|
||||
*
|
||||
* Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero.
|
||||
*/
|
||||
|
||||
void zero_out( region_ptr, size_in_bytes )
|
||||
char *region_ptr;
|
||||
size_t size_in_bytes;
|
||||
{
|
||||
register char *rp, *rp_end;
|
||||
|
||||
rp = region_ptr;
|
||||
rp_end = region_ptr + size_in_bytes;
|
||||
|
||||
while ( rp < rp_end )
|
||||
*rp++ = 0;
|
||||
}
|
||||
710
to.do/unicode/scan.l
Normal file
710
to.do/unicode/scan.l
Normal file
@ -0,0 +1,710 @@
|
||||
/* scan.l - scanner for flex input */
|
||||
|
||||
%{
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* $Header$ */
|
||||
|
||||
#include "flexdef.h"
|
||||
#include "parse.h"
|
||||
|
||||
#define ACTION_ECHO add_action( yytext )
|
||||
#define ACTION_IFDEF(def, should_define) \
|
||||
{ \
|
||||
if ( should_define ) \
|
||||
action_define( def, 1 ); \
|
||||
}
|
||||
|
||||
#define MARK_END_OF_PROLOG mark_prolog();
|
||||
|
||||
#define YY_DECL \
|
||||
int flexscan()
|
||||
|
||||
#define RETURNCHAR \
|
||||
yylval = (unsigned char) yytext[0]; \
|
||||
return CHAR;
|
||||
|
||||
#define RETURNNAME \
|
||||
strcpy( nmstr, yytext ); \
|
||||
return NAME;
|
||||
|
||||
#define PUT_BACK_STRING(str, start) \
|
||||
for ( i = strlen( str ) - 1; i >= start; --i ) \
|
||||
unput((str)[i])
|
||||
|
||||
#define CHECK_REJECT(str) \
|
||||
if ( all_upper( str ) ) \
|
||||
reject = true;
|
||||
|
||||
#define CHECK_YYMORE(str) \
|
||||
if ( all_lower( str ) ) \
|
||||
yymore_used = true;
|
||||
%}
|
||||
|
||||
%option caseless nodefault outfile="scan.c" stack noyy_top_state
|
||||
%option nostdinit
|
||||
|
||||
%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
|
||||
%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
|
||||
%x OPTION LINEDIR
|
||||
|
||||
WS [[:blank:]]+
|
||||
OPTWS [[:blank:]]*
|
||||
NOT_WS [^[:blank:]\n]
|
||||
|
||||
NL \r?\n
|
||||
|
||||
NAME ([[:alpha:]_][[:alnum:]_-]*)
|
||||
NOT_NAME [^[:alpha:]_*\n]+
|
||||
|
||||
SCNAME {NAME}
|
||||
|
||||
ESCSEQ (\\([^\n]|[0-7]{1,6}|x[[:xdigit:]]{1,4}))
|
||||
|
||||
FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ})
|
||||
CCL_CHAR ([^\\\n\]]|{ESCSEQ})
|
||||
CCL_EXPR ("[:"[[:alpha:]]+":]")
|
||||
|
||||
LEXOPT [aceknopr]
|
||||
|
||||
%%
|
||||
static int bracelevel, didadef, indented_code;
|
||||
static int doing_rule_action = false;
|
||||
static int option_sense;
|
||||
|
||||
int doing_codeblock = false;
|
||||
int i;
|
||||
Char nmdef[MAXLINE];
|
||||
|
||||
|
||||
<INITIAL>{
|
||||
^{WS} indented_code = true; BEGIN(CODEBLOCK);
|
||||
^"/*" ACTION_ECHO; yy_push_state( COMMENT );
|
||||
^#{OPTWS}line{WS} yy_push_state( LINEDIR );
|
||||
^"%s"{NAME}? return SCDECL;
|
||||
^"%x"{NAME}? return XSCDECL;
|
||||
^"%{".*{NL} {
|
||||
++linenum;
|
||||
line_directive_out( (FILE *) 0, 1 );
|
||||
indented_code = false;
|
||||
BEGIN(CODEBLOCK);
|
||||
}
|
||||
|
||||
{WS} /* discard */
|
||||
|
||||
^"%%".* {
|
||||
sectnum = 2;
|
||||
bracelevel = 0;
|
||||
mark_defs1();
|
||||
line_directive_out( (FILE *) 0, 1 );
|
||||
BEGIN(SECT2PROLOG);
|
||||
return SECTEND;
|
||||
}
|
||||
|
||||
^"%pointer".*{NL} yytext_is_array = false; ++linenum;
|
||||
^"%array".*{NL} yytext_is_array = true; ++linenum;
|
||||
|
||||
^"%option" BEGIN(OPTION); return OPTION_OP;
|
||||
|
||||
^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */
|
||||
^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */
|
||||
|
||||
^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) );
|
||||
|
||||
^{NAME} {
|
||||
strcpy( nmstr, yytext );
|
||||
didadef = false;
|
||||
BEGIN(PICKUPDEF);
|
||||
}
|
||||
|
||||
{SCNAME} RETURNNAME;
|
||||
^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */
|
||||
{OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */
|
||||
}
|
||||
|
||||
|
||||
<COMMENT>{
|
||||
"*/" ACTION_ECHO; yy_pop_state();
|
||||
"*" ACTION_ECHO;
|
||||
[^*\n]+ ACTION_ECHO;
|
||||
[^*\n]*{NL} ++linenum; ACTION_ECHO;
|
||||
}
|
||||
|
||||
<LINEDIR>{
|
||||
\n yy_pop_state();
|
||||
[[:digit:]]+ linenum = myctoi( yytext );
|
||||
|
||||
\"[^"\n]*\" {
|
||||
flex_free( (void *) infilename );
|
||||
infilename = copy_string( yytext + 1 );
|
||||
infilename[strlen( infilename ) - 1] = '\0';
|
||||
}
|
||||
. /* ignore spurious characters */
|
||||
}
|
||||
|
||||
<CODEBLOCK>{
|
||||
^"%}".*{NL} ++linenum; BEGIN(INITIAL);
|
||||
|
||||
{NAME}|{NOT_NAME}|. ACTION_ECHO;
|
||||
|
||||
{NL} {
|
||||
++linenum;
|
||||
ACTION_ECHO;
|
||||
if ( indented_code )
|
||||
BEGIN(INITIAL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
<PICKUPDEF>{
|
||||
{WS} /* separates name and definition */
|
||||
|
||||
{NOT_WS}.* {
|
||||
strcpy( (char *) nmdef, yytext );
|
||||
|
||||
/* Skip trailing whitespace. */
|
||||
for ( i = strlen( (char *) nmdef ) - 1;
|
||||
i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
|
||||
--i )
|
||||
;
|
||||
|
||||
nmdef[i + 1] = '\0';
|
||||
|
||||
ndinstal( nmstr, nmdef );
|
||||
didadef = true;
|
||||
}
|
||||
|
||||
{NL} {
|
||||
if ( ! didadef )
|
||||
synerr( _( "incomplete name definition" ) );
|
||||
BEGIN(INITIAL);
|
||||
++linenum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
<OPTION>{
|
||||
{NL} ++linenum; BEGIN(INITIAL);
|
||||
{WS} option_sense = true;
|
||||
|
||||
"=" return '=';
|
||||
|
||||
no option_sense = ! option_sense;
|
||||
|
||||
7bit csize = option_sense ? 128 : 256;
|
||||
8bit csize = option_sense ? 256 : 128;
|
||||
16bit csize = option_sense ? 65536 : 256;
|
||||
|
||||
align long_align = option_sense;
|
||||
always-interactive {
|
||||
action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
|
||||
}
|
||||
array yytext_is_array = option_sense;
|
||||
backup backing_up_report = option_sense;
|
||||
batch interactive = ! option_sense;
|
||||
"c++" C_plus_plus = option_sense;
|
||||
caseful|case-sensitive caseins = ! option_sense;
|
||||
caseless|case-insensitive caseins = option_sense;
|
||||
debug ddebug = option_sense;
|
||||
default spprdflt = ! option_sense;
|
||||
ecs useecs = option_sense;
|
||||
fast {
|
||||
useecs = usemecs = false;
|
||||
use_read = fullspd = true;
|
||||
}
|
||||
full {
|
||||
useecs = usemecs = false;
|
||||
use_read = fulltbl = true;
|
||||
}
|
||||
input ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
|
||||
interactive interactive = option_sense;
|
||||
lex-compat lex_compat = option_sense;
|
||||
main {
|
||||
action_define( "YY_MAIN", option_sense );
|
||||
do_yywrap = ! option_sense;
|
||||
}
|
||||
meta-ecs usemecs = option_sense;
|
||||
never-interactive {
|
||||
action_define( "YY_NEVER_INTERACTIVE", option_sense );
|
||||
}
|
||||
perf-report performance_report += option_sense ? 1 : -1;
|
||||
pointer yytext_is_array = ! option_sense;
|
||||
read use_read = option_sense;
|
||||
reject reject_really_used = option_sense;
|
||||
stack action_define( "YY_STACK_USED", option_sense );
|
||||
stdinit do_stdinit = option_sense;
|
||||
stdout use_stdout = option_sense;
|
||||
unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
|
||||
verbose printstats = option_sense;
|
||||
warn nowarn = ! option_sense;
|
||||
yylineno do_yylineno = option_sense;
|
||||
yymore yymore_really_used = option_sense;
|
||||
yywrap do_yywrap = option_sense;
|
||||
|
||||
yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
|
||||
yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
|
||||
yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
|
||||
|
||||
yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
|
||||
yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
|
||||
yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
|
||||
|
||||
outfile return OPT_OUTFILE;
|
||||
prefix return OPT_PREFIX;
|
||||
yyclass return OPT_YYCLASS;
|
||||
|
||||
\"[^"\n]*\" {
|
||||
strcpy( nmstr, yytext + 1 );
|
||||
nmstr[strlen( nmstr ) - 1] = '\0';
|
||||
return NAME;
|
||||
}
|
||||
|
||||
(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. {
|
||||
format_synerr( _( "unrecognized %%option: %s" ),
|
||||
yytext );
|
||||
BEGIN(RECOVER);
|
||||
}
|
||||
}
|
||||
|
||||
<RECOVER>.*{NL} ++linenum; BEGIN(INITIAL);
|
||||
|
||||
|
||||
<SECT2PROLOG>{
|
||||
^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */
|
||||
^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */
|
||||
|
||||
^{WS}.* ACTION_ECHO; /* indented code in prolog */
|
||||
|
||||
^{NOT_WS}.* { /* non-indented code */
|
||||
if ( bracelevel <= 0 )
|
||||
{ /* not in %{ ... %} */
|
||||
yyless( 0 ); /* put it all back */
|
||||
yy_set_bol( 1 );
|
||||
mark_prolog();
|
||||
BEGIN(SECT2);
|
||||
}
|
||||
else
|
||||
ACTION_ECHO;
|
||||
}
|
||||
|
||||
.* ACTION_ECHO;
|
||||
{NL} ++linenum; ACTION_ECHO;
|
||||
|
||||
<<EOF>> {
|
||||
mark_prolog();
|
||||
sectnum = 0;
|
||||
yyterminate(); /* to stop the parser */
|
||||
}
|
||||
}
|
||||
|
||||
<SECT2>{
|
||||
^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */
|
||||
|
||||
^{OPTWS}"%{" {
|
||||
indented_code = false;
|
||||
doing_codeblock = true;
|
||||
bracelevel = 1;
|
||||
BEGIN(PERCENT_BRACE_ACTION);
|
||||
}
|
||||
|
||||
^{OPTWS}"<" BEGIN(SC); return '<';
|
||||
^{OPTWS}"^" return '^';
|
||||
\" BEGIN(QUOTE); return '"';
|
||||
"{"/[[:digit:]] BEGIN(NUM); return '{';
|
||||
"$"/([[:blank:]]|{NL}) return '$';
|
||||
|
||||
{WS}"%{" {
|
||||
bracelevel = 1;
|
||||
BEGIN(PERCENT_BRACE_ACTION);
|
||||
|
||||
if ( in_rule )
|
||||
{
|
||||
doing_rule_action = true;
|
||||
in_rule = false;
|
||||
return '\n';
|
||||
}
|
||||
}
|
||||
{WS}"|".*{NL} continued_action = true; ++linenum; return '\n';
|
||||
|
||||
^{WS}"/*" {
|
||||
yyless( yyleng - 2 ); /* put back '/', '*' */
|
||||
bracelevel = 0;
|
||||
continued_action = false;
|
||||
BEGIN(ACTION);
|
||||
}
|
||||
|
||||
^{WS} /* allow indented rules */
|
||||
|
||||
{WS} {
|
||||
/* This rule is separate from the one below because
|
||||
* otherwise we get variable trailing context, so
|
||||
* we can't build the scanner using -{f,F}.
|
||||
*/
|
||||
bracelevel = 0;
|
||||
continued_action = false;
|
||||
BEGIN(ACTION);
|
||||
|
||||
if ( in_rule )
|
||||
{
|
||||
doing_rule_action = true;
|
||||
in_rule = false;
|
||||
return '\n';
|
||||
}
|
||||
}
|
||||
|
||||
{OPTWS}{NL} {
|
||||
bracelevel = 0;
|
||||
continued_action = false;
|
||||
BEGIN(ACTION);
|
||||
unput( '\n' ); /* so <ACTION> sees it */
|
||||
|
||||
if ( in_rule )
|
||||
{
|
||||
doing_rule_action = true;
|
||||
in_rule = false;
|
||||
return '\n';
|
||||
}
|
||||
}
|
||||
|
||||
^{OPTWS}"<<EOF>>" |
|
||||
"<<EOF>>" return EOF_OP;
|
||||
|
||||
^"%%".* {
|
||||
sectnum = 3;
|
||||
BEGIN(SECT3);
|
||||
yyterminate(); /* to stop the parser */
|
||||
}
|
||||
|
||||
"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* {
|
||||
int cclval;
|
||||
|
||||
strcpy( nmstr, yytext );
|
||||
|
||||
/* Check to see if we've already encountered this
|
||||
* ccl.
|
||||
*/
|
||||
if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
|
||||
{
|
||||
if ( input() != ']' )
|
||||
synerr( _( "bad character class" ) );
|
||||
|
||||
yylval = cclval;
|
||||
++cclreuse;
|
||||
return PREVCCL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We fudge a bit. We know that this ccl will
|
||||
* soon be numbered as lastccl + 1 by cclinit.
|
||||
*/
|
||||
cclinstal( (Char *) nmstr, lastccl + 1 );
|
||||
|
||||
/* Push back everything but the leading bracket
|
||||
* so the ccl can be rescanned.
|
||||
*/
|
||||
yyless( 1 );
|
||||
|
||||
BEGIN(FIRSTCCL);
|
||||
return '[';
|
||||
}
|
||||
}
|
||||
|
||||
"{"{NAME}"}" {
|
||||
register Char *nmdefptr;
|
||||
|
||||
strcpy( nmstr, yytext + 1 );
|
||||
nmstr[yyleng - 2] = '\0'; /* chop trailing brace */
|
||||
|
||||
if ( (nmdefptr = ndlookup( nmstr )) == 0 )
|
||||
format_synerr(
|
||||
_( "undefined definition {%s}" ),
|
||||
nmstr );
|
||||
|
||||
else
|
||||
{ /* push back name surrounded by ()'s */
|
||||
int len = strlen( (char *) nmdefptr );
|
||||
|
||||
if ( lex_compat || nmdefptr[0] == '^' ||
|
||||
(len > 0 && nmdefptr[len - 1] == '$') )
|
||||
{ /* don't use ()'s after all */
|
||||
PUT_BACK_STRING((char *) nmdefptr, 0);
|
||||
|
||||
if ( nmdefptr[0] == '^' )
|
||||
BEGIN(CARETISBOL);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
unput(')');
|
||||
PUT_BACK_STRING((char *) nmdefptr, 0);
|
||||
unput('(');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[/|*+?.(){}] return (unsigned char) yytext[0];
|
||||
. RETURNCHAR;
|
||||
}
|
||||
|
||||
|
||||
<SC>{
|
||||
[,*] return (unsigned char) yytext[0];
|
||||
">" BEGIN(SECT2); return '>';
|
||||
">"/^ BEGIN(CARETISBOL); return '>';
|
||||
{SCNAME} RETURNNAME;
|
||||
. {
|
||||
format_synerr( _( "bad <start condition>: %s" ),
|
||||
yytext );
|
||||
}
|
||||
}
|
||||
|
||||
<CARETISBOL>"^" BEGIN(SECT2); return '^';
|
||||
|
||||
|
||||
<QUOTE>{
|
||||
[^"\n] RETURNCHAR;
|
||||
\" BEGIN(SECT2); return '"';
|
||||
|
||||
{NL} {
|
||||
synerr( _( "missing quote" ) );
|
||||
BEGIN(SECT2);
|
||||
++linenum;
|
||||
return '"';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
<FIRSTCCL>{
|
||||
"^"/[^-\]\n] BEGIN(CCL); return '^';
|
||||
"^"/("-"|"]") return '^';
|
||||
. BEGIN(CCL); RETURNCHAR;
|
||||
}
|
||||
|
||||
<CCL>{
|
||||
-/[^\]\n] return '-';
|
||||
[^\]\n] RETURNCHAR;
|
||||
"]" BEGIN(SECT2); return ']';
|
||||
.|{NL} {
|
||||
synerr( _( "bad character class" ) );
|
||||
BEGIN(SECT2);
|
||||
return ']';
|
||||
}
|
||||
}
|
||||
|
||||
<FIRSTCCL,CCL>{
|
||||
"[:alnum:]" BEGIN(CCL); return CCE_ALNUM;
|
||||
"[:alpha:]" BEGIN(CCL); return CCE_ALPHA;
|
||||
"[:blank:]" BEGIN(CCL); return CCE_BLANK;
|
||||
"[:cntrl:]" BEGIN(CCL); return CCE_CNTRL;
|
||||
"[:digit:]" BEGIN(CCL); return CCE_DIGIT;
|
||||
"[:graph:]" BEGIN(CCL); return CCE_GRAPH;
|
||||
"[:lower:]" BEGIN(CCL); return CCE_LOWER;
|
||||
"[:print:]" BEGIN(CCL); return CCE_PRINT;
|
||||
"[:punct:]" BEGIN(CCL); return CCE_PUNCT;
|
||||
"[:space:]" BEGIN(CCL); return CCE_SPACE;
|
||||
"[:upper:]" BEGIN(CCL); return CCE_UPPER;
|
||||
"[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT;
|
||||
{CCL_EXPR} {
|
||||
format_synerr(
|
||||
_( "bad character class expression: %s" ),
|
||||
yytext );
|
||||
BEGIN(CCL); return CCE_ALNUM;
|
||||
}
|
||||
}
|
||||
|
||||
<NUM>{
|
||||
[[:digit:]]+ {
|
||||
yylval = myctoi( yytext );
|
||||
return NUMBER;
|
||||
}
|
||||
|
||||
"," return ',';
|
||||
"}" BEGIN(SECT2); return '}';
|
||||
|
||||
. {
|
||||
synerr( _( "bad character inside {}'s" ) );
|
||||
BEGIN(SECT2);
|
||||
return '}';
|
||||
}
|
||||
|
||||
{NL} {
|
||||
synerr( _( "missing }" ) );
|
||||
BEGIN(SECT2);
|
||||
++linenum;
|
||||
return '}';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
<PERCENT_BRACE_ACTION>{
|
||||
{OPTWS}"%}".* bracelevel = 0;
|
||||
|
||||
<ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT );
|
||||
|
||||
<CODEBLOCK,ACTION>{
|
||||
"reject" {
|
||||
ACTION_ECHO;
|
||||
CHECK_REJECT(yytext);
|
||||
}
|
||||
"yymore" {
|
||||
ACTION_ECHO;
|
||||
CHECK_YYMORE(yytext);
|
||||
}
|
||||
}
|
||||
|
||||
{NAME}|{NOT_NAME}|. ACTION_ECHO;
|
||||
{NL} {
|
||||
++linenum;
|
||||
ACTION_ECHO;
|
||||
if ( bracelevel == 0 ||
|
||||
(doing_codeblock && indented_code) )
|
||||
{
|
||||
if ( doing_rule_action )
|
||||
add_action( "\tYY_BREAK\n" );
|
||||
|
||||
doing_rule_action = doing_codeblock = false;
|
||||
BEGIN(SECT2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
|
||||
<ACTION>{
|
||||
"{" ACTION_ECHO; ++bracelevel;
|
||||
"}" ACTION_ECHO; --bracelevel;
|
||||
[^[:alpha:]_{}"'/\n]+ ACTION_ECHO;
|
||||
{NAME} ACTION_ECHO;
|
||||
"'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */
|
||||
\" ACTION_ECHO; BEGIN(ACTION_STRING);
|
||||
{NL} {
|
||||
++linenum;
|
||||
ACTION_ECHO;
|
||||
if ( bracelevel == 0 )
|
||||
{
|
||||
if ( doing_rule_action )
|
||||
add_action( "\tYY_BREAK\n" );
|
||||
|
||||
doing_rule_action = false;
|
||||
BEGIN(SECT2);
|
||||
}
|
||||
}
|
||||
. ACTION_ECHO;
|
||||
}
|
||||
|
||||
<ACTION_STRING>{
|
||||
[^"\\\n]+ ACTION_ECHO;
|
||||
\\. ACTION_ECHO;
|
||||
{NL} ++linenum; ACTION_ECHO;
|
||||
\" ACTION_ECHO; BEGIN(ACTION);
|
||||
. ACTION_ECHO;
|
||||
}
|
||||
|
||||
<COMMENT,ACTION,ACTION_STRING><<EOF>> {
|
||||
synerr( _( "EOF encountered inside an action" ) );
|
||||
yyterminate();
|
||||
}
|
||||
|
||||
|
||||
<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} {
|
||||
yylval = myesc( (Char *) yytext );
|
||||
|
||||
if ( YY_START == FIRSTCCL )
|
||||
BEGIN(CCL);
|
||||
|
||||
return CHAR;
|
||||
}
|
||||
|
||||
|
||||
<SECT3>{
|
||||
.*(\n?) ECHO;
|
||||
<<EOF>> sectnum = 0; yyterminate();
|
||||
}
|
||||
|
||||
<*>.|\n format_synerr( _( "bad character: %s" ), yytext );
|
||||
|
||||
%%
|
||||
|
||||
|
||||
int yywrap()
|
||||
{
|
||||
if ( --num_input_files > 0 )
|
||||
{
|
||||
set_input_file( *++input_files );
|
||||
return 0;
|
||||
}
|
||||
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* set_input_file - open the given file (if NULL, stdin) for scanning */
|
||||
|
||||
void set_input_file( file )
|
||||
char *file;
|
||||
{
|
||||
if ( file && strcmp( file, "-" ) )
|
||||
{
|
||||
infilename = copy_string( file );
|
||||
yyin = fopen( infilename, "r" );
|
||||
|
||||
if ( yyin == NULL )
|
||||
lerrsf( _( "can't open %s" ), file );
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
yyin = stdin;
|
||||
infilename = copy_string( "<stdin>" );
|
||||
}
|
||||
|
||||
linenum = 1;
|
||||
}
|
||||
|
||||
|
||||
/* Wrapper routines for accessing the scanner's malloc routines. */
|
||||
|
||||
void *flex_alloc( size )
|
||||
size_t size;
|
||||
{
|
||||
return (void *) malloc( size );
|
||||
}
|
||||
|
||||
void *flex_realloc( ptr, size )
|
||||
void *ptr;
|
||||
size_t size;
|
||||
{
|
||||
return (void *) realloc( ptr, size );
|
||||
}
|
||||
|
||||
void flex_free( ptr )
|
||||
void *ptr;
|
||||
{
|
||||
if ( ptr )
|
||||
free( ptr );
|
||||
}
|
||||
887
to.do/unicode/tblcmp.c
Normal file
887
to.do/unicode/tblcmp.c
Normal file
@ -0,0 +1,887 @@
|
||||
/* tblcmp - table compression routines */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1990 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Vern Paxson.
|
||||
*
|
||||
* The United States Government has rights in this work pursuant
|
||||
* to contract no. DE-AC03-76SF00098 between the United States
|
||||
* Department of Energy and the University of California.
|
||||
*
|
||||
* Redistribution and use in source and binary forms are permitted provided
|
||||
* that: (1) source distributions retain this entire copyright notice and
|
||||
* comment, and (2) distributions including binaries display the following
|
||||
* acknowledgement: ``This product includes software developed by the
|
||||
* University of California, Berkeley and its contributors'' in the
|
||||
* documentation or other materials provided with the distribution and in
|
||||
* all advertising materials mentioning features or use of this software.
|
||||
* Neither the name of the University nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
/* $Header$ */
|
||||
|
||||
#include "flexdef.h"
|
||||
|
||||
|
||||
/* declarations for functions that have forward references */
|
||||
|
||||
void mkentry PROTO((register int*, int, int, int, int));
|
||||
void mkprot PROTO((int[], int, int));
|
||||
void mktemplate PROTO((int[], int, int));
|
||||
void mv2front PROTO((int));
|
||||
int tbldiff PROTO((int[], int, int[]));
|
||||
|
||||
|
||||
/* bldtbl - build table entries for dfa state
|
||||
*
|
||||
* synopsis
|
||||
* int state[numecs], statenum, totaltrans, comstate, comfreq;
|
||||
* bldtbl( state, statenum, totaltrans, comstate, comfreq );
|
||||
*
|
||||
* State is the statenum'th dfa state. It is indexed by equivalence class and
|
||||
* gives the number of the state to enter for a given equivalence class.
|
||||
* totaltrans is the total number of transitions out of the state. Comstate
|
||||
* is that state which is the destination of the most transitions out of State.
|
||||
* Comfreq is how many transitions there are out of State to Comstate.
|
||||
*
|
||||
* A note on terminology:
|
||||
* "protos" are transition tables which have a high probability of
|
||||
* either being redundant (a state processed later will have an identical
|
||||
* transition table) or nearly redundant (a state processed later will have
|
||||
* many of the same out-transitions). A "most recently used" queue of
|
||||
* protos is kept around with the hope that most states will find a proto
|
||||
* which is similar enough to be usable, and therefore compacting the
|
||||
* output tables.
|
||||
* "templates" are a special type of proto. If a transition table is
|
||||
* homogeneous or nearly homogeneous (all transitions go to the same
|
||||
* destination) then the odds are good that future states will also go
|
||||
* to the same destination state on basically the same character set.
|
||||
* These homogeneous states are so common when dealing with large rule
|
||||
* sets that they merit special attention. If the transition table were
|
||||
* simply made into a proto, then (typically) each subsequent, similar
|
||||
* state will differ from the proto for two out-transitions. One of these
|
||||
* out-transitions will be that character on which the proto does not go
|
||||
* to the common destination, and one will be that character on which the
|
||||
* state does not go to the common destination. Templates, on the other
|
||||
* hand, go to the common state on EVERY transition character, and therefore
|
||||
* cost only one difference.
|
||||
*/
|
||||
|
||||
void bldtbl( state, statenum, totaltrans, comstate, comfreq )
|
||||
int state[], statenum, totaltrans, comstate, comfreq;
|
||||
{
|
||||
int extptr, extrct[2][CSIZE + 1];
|
||||
int mindiff, minprot, i, d;
|
||||
|
||||
/* If extptr is 0 then the first array of extrct holds the result
|
||||
* of the "best difference" to date, which is those transitions
|
||||
* which occur in "state" but not in the proto which, to date,
|
||||
* has the fewest differences between itself and "state". If
|
||||
* extptr is 1 then the second array of extrct hold the best
|
||||
* difference. The two arrays are toggled between so that the
|
||||
* best difference to date can be kept around and also a difference
|
||||
* just created by checking against a candidate "best" proto.
|
||||
*/
|
||||
|
||||
extptr = 0;
|
||||
|
||||
/* If the state has too few out-transitions, don't bother trying to
|
||||
* compact its tables.
|
||||
*/
|
||||
|
||||
if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
|
||||
mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
|
||||
|
||||
else
|
||||
{
|
||||
/* "checkcom" is true if we should only check "state" against
|
||||
* protos which have the same "comstate" value.
|
||||
*/
|
||||
int checkcom =
|
||||
comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
|
||||
|
||||
minprot = firstprot;
|
||||
mindiff = totaltrans;
|
||||
|
||||
if ( checkcom )
|
||||
{
|
||||
/* Find first proto which has the same "comstate". */
|
||||
for ( i = firstprot; i != NIL; i = protnext[i] )
|
||||
if ( protcomst[i] == comstate )
|
||||
{
|
||||
minprot = i;
|
||||
mindiff = tbldiff( state, minprot,
|
||||
extrct[extptr] );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* Since we've decided that the most common destination
|
||||
* out of "state" does not occur with a high enough
|
||||
* frequency, we set the "comstate" to zero, assuring
|
||||
* that if this state is entered into the proto list,
|
||||
* it will not be considered a template.
|
||||
*/
|
||||
comstate = 0;
|
||||
|
||||
if ( firstprot != NIL )
|
||||
{
|
||||
minprot = firstprot;
|
||||
mindiff = tbldiff( state, minprot,
|
||||
extrct[extptr] );
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have the first interesting proto in "minprot". If
|
||||
* it matches within the tolerances set for the first proto,
|
||||
* we don't want to bother scanning the rest of the proto list
|
||||
* to see if we have any other reasonable matches.
|
||||
*/
|
||||
|
||||
if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
|
||||
{
|
||||
/* Not a good enough match. Scan the rest of the
|
||||
* protos.
|
||||
*/
|
||||
for ( i = minprot; i != NIL; i = protnext[i] )
|
||||
{
|
||||
d = tbldiff( state, i, extrct[1 - extptr] );
|
||||
if ( d < mindiff )
|
||||
{
|
||||
extptr = 1 - extptr;
|
||||
mindiff = d;
|
||||
minprot = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if the proto we've decided on as our best bet is close
|
||||
* enough to the state we want to match to be usable.
|
||||
*/
|
||||
|
||||
if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
|
||||
{
|
||||
/* No good. If the state is homogeneous enough,
|
||||
* we make a template out of it. Otherwise, we
|
||||
* make a proto.
|
||||
*/
|
||||
|
||||
if ( comfreq * 100 >=
|
||||
totaltrans * TEMPLATE_SAME_PERCENTAGE )
|
||||
mktemplate( state, statenum, comstate );
|
||||
|
||||
else
|
||||
{
|
||||
mkprot( state, statenum, comstate );
|
||||
mkentry( state, numecs, statenum,
|
||||
JAMSTATE, totaltrans );
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{ /* use the proto */
|
||||
mkentry( extrct[extptr], numecs, statenum,
|
||||
prottbl[minprot], mindiff );
|
||||
|
||||
/* If this state was sufficiently different from the
|
||||
* proto we built it from, make it, too, a proto.
|
||||
*/
|
||||
|
||||
if ( mindiff * 100 >=
|
||||
totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
|
||||
mkprot( state, statenum, comstate );
|
||||
|
||||
/* Since mkprot added a new proto to the proto queue,
|
||||
* it's possible that "minprot" is no longer on the
|
||||
* proto queue (if it happened to have been the last
|
||||
* entry, it would have been bumped off). If it's
|
||||
* not there, then the new proto took its physical
|
||||
* place (though logically the new proto is at the
|
||||
* beginning of the queue), so in that case the
|
||||
* following call will do nothing.
|
||||
*/
|
||||
|
||||
mv2front( minprot );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* cmptmps - compress template table entries
|
||||
*
|
||||
* Template tables are compressed by using the 'template equivalence
|
||||
* classes', which are collections of transition character equivalence
|
||||
* classes which always appear together in templates - really meta-equivalence
|
||||
* classes.
|
||||
*/
|
||||
|
||||
void cmptmps()
|
||||
{
|
||||
int tmpstorage[CSIZE + 1];
|
||||
register int *tmp = tmpstorage, i, j;
|
||||
int totaltrans, trans;
|
||||
|
||||
peakpairs = numtemps * numecs + tblend;
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
/* Create equivalence classes based on data gathered on
|
||||
* template transitions.
|
||||
*/
|
||||
nummecs = cre8ecs( tecfwd, tecbck, numecs );
|
||||
}
|
||||
|
||||
else
|
||||
nummecs = numecs;
|
||||
|
||||
while ( lastdfa + numtemps + 1 >= current_max_dfas )
|
||||
increase_max_dfas();
|
||||
|
||||
/* Loop through each template. */
|
||||
|
||||
for ( i = 1; i <= numtemps; ++i )
|
||||
{
|
||||
/* Number of non-jam transitions out of this template. */
|
||||
totaltrans = 0;
|
||||
|
||||
for ( j = 1; j <= numecs; ++j )
|
||||
{
|
||||
trans = tnxt[numecs * i + j];
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
/* The absolute value of tecbck is the
|
||||
* meta-equivalence class of a given
|
||||
* equivalence class, as set up by cre8ecs().
|
||||
*/
|
||||
if ( tecbck[j] > 0 )
|
||||
{
|
||||
tmp[tecbck[j]] = trans;
|
||||
|
||||
if ( trans > 0 )
|
||||
++totaltrans;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
tmp[j] = trans;
|
||||
|
||||
if ( trans > 0 )
|
||||
++totaltrans;
|
||||
}
|
||||
}
|
||||
|
||||
/* It is assumed (in a rather subtle way) in the skeleton
|
||||
* that if we're using meta-equivalence classes, the def[]
|
||||
* entry for all templates is the jam template, i.e.,
|
||||
* templates never default to other non-jam table entries
|
||||
* (e.g., another template)
|
||||
*/
|
||||
|
||||
/* Leave room for the jam-state after the last real state. */
|
||||
mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* expand_nxt_chk - expand the next check arrays */
|
||||
|
||||
void expand_nxt_chk()
|
||||
{
|
||||
register int old_max = current_max_xpairs;
|
||||
|
||||
current_max_xpairs += MAX_XPAIRS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
nxt = reallocate_integer_array( nxt, current_max_xpairs );
|
||||
chk = reallocate_integer_array( chk, current_max_xpairs );
|
||||
|
||||
zero_out( (char *) (chk + old_max),
|
||||
(size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) );
|
||||
}
|
||||
|
||||
|
||||
/* find_table_space - finds a space in the table for a state to be placed
|
||||
*
|
||||
* synopsis
|
||||
* int *state, numtrans, block_start;
|
||||
* int find_table_space();
|
||||
*
|
||||
* block_start = find_table_space( state, numtrans );
|
||||
*
|
||||
* State is the state to be added to the full speed transition table.
|
||||
* Numtrans is the number of out-transitions for the state.
|
||||
*
|
||||
* find_table_space() returns the position of the start of the first block (in
|
||||
* chk) able to accommodate the state
|
||||
*
|
||||
* In determining if a state will or will not fit, find_table_space() must take
|
||||
* into account the fact that an end-of-buffer state will be added at [0],
|
||||
* and an action number will be added in [-1].
|
||||
*/
|
||||
|
||||
int find_table_space( state, numtrans )
|
||||
int *state, numtrans;
|
||||
{
|
||||
/* Firstfree is the position of the first possible occurrence of two
|
||||
* consecutive unused records in the chk and nxt arrays.
|
||||
*/
|
||||
register int i;
|
||||
register int *state_ptr, *chk_ptr;
|
||||
register int *ptr_to_last_entry_in_state;
|
||||
|
||||
/* If there are too many out-transitions, put the state at the end of
|
||||
* nxt and chk.
|
||||
*/
|
||||
if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
|
||||
{
|
||||
/* If table is empty, return the first available spot in
|
||||
* chk/nxt, which should be 1.
|
||||
*/
|
||||
if ( tblend < 2 )
|
||||
return 1;
|
||||
|
||||
/* Start searching for table space near the end of
|
||||
* chk/nxt arrays.
|
||||
*/
|
||||
i = tblend - numecs;
|
||||
}
|
||||
|
||||
else
|
||||
/* Start searching for table space from the beginning
|
||||
* (skipping only the elements which will definitely not
|
||||
* hold the new state).
|
||||
*/
|
||||
i = firstfree;
|
||||
|
||||
while ( 1 ) /* loops until a space is found */
|
||||
{
|
||||
while ( i + numecs >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
/* Loops until space for end-of-buffer and action number
|
||||
* are found.
|
||||
*/
|
||||
while ( 1 )
|
||||
{
|
||||
/* Check for action number space. */
|
||||
if ( chk[i - 1] == 0 )
|
||||
{
|
||||
/* Check for end-of-buffer space. */
|
||||
if ( chk[i] == 0 )
|
||||
break;
|
||||
|
||||
else
|
||||
/* Since i != 0, there is no use
|
||||
* checking to see if (++i) - 1 == 0,
|
||||
* because that's the same as i == 0,
|
||||
* so we skip a space.
|
||||
*/
|
||||
i += 2;
|
||||
}
|
||||
|
||||
else
|
||||
++i;
|
||||
|
||||
while ( i + numecs >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
}
|
||||
|
||||
/* If we started search from the beginning, store the new
|
||||
* firstfree for the next call of find_table_space().
|
||||
*/
|
||||
if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
|
||||
firstfree = i + 1;
|
||||
|
||||
/* Check to see if all elements in chk (and therefore nxt)
|
||||
* that are needed for the new state have not yet been taken.
|
||||
*/
|
||||
|
||||
state_ptr = &state[1];
|
||||
ptr_to_last_entry_in_state = &chk[i + numecs + 1];
|
||||
|
||||
for ( chk_ptr = &chk[i + 1];
|
||||
chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr )
|
||||
if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
|
||||
break;
|
||||
|
||||
if ( chk_ptr == ptr_to_last_entry_in_state )
|
||||
return i;
|
||||
|
||||
else
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* inittbl - initialize transition tables
|
||||
*
|
||||
* Initializes "firstfree" to be one beyond the end of the table. Initializes
|
||||
* all "chk" entries to be zero.
|
||||
*/
|
||||
void inittbl()
|
||||
{
|
||||
register int i;
|
||||
|
||||
zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) );
|
||||
|
||||
tblend = 0;
|
||||
firstfree = tblend + 1;
|
||||
numtemps = 0;
|
||||
|
||||
if ( usemecs )
|
||||
{
|
||||
/* Set up doubly-linked meta-equivalence classes; these
|
||||
* are sets of equivalence classes which all have identical
|
||||
* transitions out of TEMPLATES.
|
||||
*/
|
||||
|
||||
tecbck[1] = NIL;
|
||||
|
||||
for ( i = 2; i <= numecs; ++i )
|
||||
{
|
||||
tecbck[i] = i - 1;
|
||||
tecfwd[i - 1] = i;
|
||||
}
|
||||
|
||||
tecfwd[numecs] = NIL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkdeftbl - make the default, "jam" table entries */
|
||||
|
||||
void mkdeftbl()
|
||||
{
|
||||
int i;
|
||||
|
||||
jamstate = lastdfa + 1;
|
||||
|
||||
++tblend; /* room for transition on end-of-buffer character */
|
||||
|
||||
while ( tblend + numecs >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
/* Add in default end-of-buffer transition. */
|
||||
nxt[tblend] = end_of_buffer_state;
|
||||
chk[tblend] = jamstate;
|
||||
|
||||
for ( i = 1; i <= numecs; ++i )
|
||||
{
|
||||
nxt[tblend + i] = 0;
|
||||
chk[tblend + i] = jamstate;
|
||||
}
|
||||
|
||||
jambase = tblend;
|
||||
|
||||
base[jamstate] = jambase;
|
||||
def[jamstate] = 0;
|
||||
|
||||
tblend += numecs;
|
||||
++numtemps;
|
||||
}
|
||||
|
||||
|
||||
/* mkentry - create base/def and nxt/chk entries for transition array
|
||||
*
|
||||
* synopsis
|
||||
* int state[numchars + 1], numchars, statenum, deflink, totaltrans;
|
||||
* mkentry( state, numchars, statenum, deflink, totaltrans );
|
||||
*
|
||||
* "state" is a transition array "numchars" characters in size, "statenum"
|
||||
* is the offset to be used into the base/def tables, and "deflink" is the
|
||||
* entry to put in the "def" table entry. If "deflink" is equal to
|
||||
* "JAMSTATE", then no attempt will be made to fit zero entries of "state"
|
||||
* (i.e., jam entries) into the table. It is assumed that by linking to
|
||||
* "JAMSTATE" they will be taken care of. In any case, entries in "state"
|
||||
* marking transitions to "SAME_TRANS" are treated as though they will be
|
||||
* taken care of by whereever "deflink" points. "totaltrans" is the total
|
||||
* number of transitions out of the state. If it is below a certain threshold,
|
||||
* the tables are searched for an interior spot that will accommodate the
|
||||
* state array.
|
||||
*/
|
||||
|
||||
void mkentry( state, numchars, statenum, deflink, totaltrans )
|
||||
register int *state;
|
||||
int numchars, statenum, deflink, totaltrans;
|
||||
{
|
||||
register int minec, maxec, i, baseaddr;
|
||||
int tblbase, tbllast;
|
||||
|
||||
if ( totaltrans == 0 )
|
||||
{ /* there are no out-transitions */
|
||||
if ( deflink == JAMSTATE )
|
||||
base[statenum] = JAMSTATE;
|
||||
else
|
||||
base[statenum] = 0;
|
||||
|
||||
def[statenum] = deflink;
|
||||
return;
|
||||
}
|
||||
|
||||
for ( minec = 1; minec <= numchars; ++minec )
|
||||
{
|
||||
if ( state[minec] != SAME_TRANS )
|
||||
if ( state[minec] != 0 || deflink != JAMSTATE )
|
||||
break;
|
||||
}
|
||||
|
||||
if ( totaltrans == 1 )
|
||||
{
|
||||
/* There's only one out-transition. Save it for later to fill
|
||||
* in holes in the tables.
|
||||
*/
|
||||
stack1( statenum, minec, state[minec], deflink );
|
||||
return;
|
||||
}
|
||||
|
||||
for ( maxec = numchars; maxec > 0; --maxec )
|
||||
{
|
||||
if ( state[maxec] != SAME_TRANS )
|
||||
if ( state[maxec] != 0 || deflink != JAMSTATE )
|
||||
break;
|
||||
}
|
||||
|
||||
/* Whether we try to fit the state table in the middle of the table
|
||||
* entries we have already generated, or if we just take the state
|
||||
* table at the end of the nxt/chk tables, we must make sure that we
|
||||
* have a valid base address (i.e., non-negative). Note that
|
||||
* negative base addresses dangerous at run-time (because indexing
|
||||
* the nxt array with one and a low-valued character will access
|
||||
* memory before the start of the array.
|
||||
*/
|
||||
|
||||
/* Find the first transition of state that we need to worry about. */
|
||||
if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
|
||||
{
|
||||
/* Attempt to squeeze it into the middle of the tables. */
|
||||
baseaddr = firstfree;
|
||||
|
||||
while ( baseaddr < minec )
|
||||
{
|
||||
/* Using baseaddr would result in a negative base
|
||||
* address below; find the next free slot.
|
||||
*/
|
||||
for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
|
||||
;
|
||||
}
|
||||
|
||||
while ( baseaddr + maxec - minec + 1 >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
for ( i = minec; i <= maxec; ++i )
|
||||
if ( state[i] != SAME_TRANS &&
|
||||
(state[i] != 0 || deflink != JAMSTATE) &&
|
||||
chk[baseaddr + i - minec] != 0 )
|
||||
{ /* baseaddr unsuitable - find another */
|
||||
for ( ++baseaddr;
|
||||
baseaddr < current_max_xpairs &&
|
||||
chk[baseaddr] != 0; ++baseaddr )
|
||||
;
|
||||
|
||||
while ( baseaddr + maxec - minec + 1 >=
|
||||
current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
/* Reset the loop counter so we'll start all
|
||||
* over again next time it's incremented.
|
||||
*/
|
||||
|
||||
i = minec - 1;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
/* Ensure that the base address we eventually generate is
|
||||
* non-negative.
|
||||
*/
|
||||
baseaddr = MAX( tblend + 1, minec );
|
||||
}
|
||||
|
||||
tblbase = baseaddr - minec;
|
||||
tbllast = tblbase + maxec;
|
||||
|
||||
while ( tbllast + 1 >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
base[statenum] = tblbase;
|
||||
def[statenum] = deflink;
|
||||
|
||||
for ( i = minec; i <= maxec; ++i )
|
||||
if ( state[i] != SAME_TRANS )
|
||||
if ( state[i] != 0 || deflink != JAMSTATE )
|
||||
{
|
||||
nxt[tblbase + i] = state[i];
|
||||
chk[tblbase + i] = statenum;
|
||||
}
|
||||
|
||||
if ( baseaddr == firstfree )
|
||||
/* Find next free slot in tables. */
|
||||
for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
|
||||
;
|
||||
|
||||
tblend = MAX( tblend, tbllast );
|
||||
}
|
||||
|
||||
|
||||
/* mk1tbl - create table entries for a state (or state fragment) which
|
||||
* has only one out-transition
|
||||
*/
|
||||
|
||||
void mk1tbl( state, sym, onenxt, onedef )
|
||||
int state, sym, onenxt, onedef;
|
||||
{
|
||||
if ( firstfree < sym )
|
||||
firstfree = sym;
|
||||
|
||||
while ( chk[firstfree] != 0 )
|
||||
if ( ++firstfree >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
|
||||
base[state] = firstfree - sym;
|
||||
def[state] = onedef;
|
||||
chk[firstfree] = state;
|
||||
nxt[firstfree] = onenxt;
|
||||
|
||||
if ( firstfree > tblend )
|
||||
{
|
||||
tblend = firstfree++;
|
||||
|
||||
if ( firstfree >= current_max_xpairs )
|
||||
expand_nxt_chk();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* mkprot - create new proto entry */
|
||||
|
||||
void mkprot( state, statenum, comstate )
|
||||
int state[], statenum, comstate;
|
||||
{
|
||||
int i, slot, tblbase;
|
||||
|
||||
if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
|
||||
{
|
||||
/* Gotta make room for the new proto by dropping last entry in
|
||||
* the queue.
|
||||
*/
|
||||
slot = lastprot;
|
||||
lastprot = protprev[lastprot];
|
||||
protnext[lastprot] = NIL;
|
||||
}
|
||||
|
||||
else
|
||||
slot = numprots;
|
||||
|
||||
protnext[slot] = firstprot;
|
||||
|
||||
if ( firstprot != NIL )
|
||||
protprev[firstprot] = slot;
|
||||
|
||||
firstprot = slot;
|
||||
prottbl[slot] = statenum;
|
||||
protcomst[slot] = comstate;
|
||||
|
||||
/* Copy state into save area so it can be compared with rapidly. */
|
||||
tblbase = numecs * (slot - 1);
|
||||
|
||||
for ( i = 1; i <= numecs; ++i )
|
||||
protsave[tblbase + i] = state[i];
|
||||
}
|
||||
|
||||
|
||||
/* mktemplate - create a template entry based on a state, and connect the state
|
||||
* to it
|
||||
*/
|
||||
|
||||
void mktemplate( state, statenum, comstate )
|
||||
int state[], statenum, comstate;
|
||||
{
|
||||
int i, numdiff, tmpbase, tmp[CSIZE + 1];
|
||||
wchar_t transset[CSIZE + 1];
|
||||
int tsptr;
|
||||
|
||||
++numtemps;
|
||||
|
||||
tsptr = 0;
|
||||
|
||||
/* Calculate where we will temporarily store the transition table
|
||||
* of the template in the tnxt[] array. The final transition table
|
||||
* gets created by cmptmps().
|
||||
*/
|
||||
|
||||
tmpbase = numtemps * numecs;
|
||||
|
||||
if ( tmpbase + numecs >= current_max_template_xpairs )
|
||||
{
|
||||
current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
|
||||
|
||||
++num_reallocs;
|
||||
|
||||
tnxt = reallocate_integer_array( tnxt,
|
||||
current_max_template_xpairs );
|
||||
}
|
||||
|
||||
for ( i = 1; i <= numecs; ++i )
|
||||
if ( state[i] == 0 )
|
||||
tnxt[tmpbase + i] = 0;
|
||||
else
|
||||
{
|
||||
transset[tsptr++] = i;
|
||||
tnxt[tmpbase + i] = comstate;
|
||||
}
|
||||
|
||||
if ( usemecs )
|
||||
mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
|
||||
|
||||
mkprot( tnxt + tmpbase, -numtemps, comstate );
|
||||
|
||||
/* We rely on the fact that mkprot adds things to the beginning
|
||||
* of the proto queue.
|
||||
*/
|
||||
|
||||
numdiff = tbldiff( state, firstprot, tmp );
|
||||
mkentry( tmp, numecs, statenum, -numtemps, numdiff );
|
||||
}
|
||||
|
||||
|
||||
/* mv2front - move proto queue element to front of queue */
|
||||
|
||||
void mv2front( qelm )
|
||||
int qelm;
|
||||
{
|
||||
if ( firstprot != qelm )
|
||||
{
|
||||
if ( qelm == lastprot )
|
||||
lastprot = protprev[lastprot];
|
||||
|
||||
protnext[protprev[qelm]] = protnext[qelm];
|
||||
|
||||
if ( protnext[qelm] != NIL )
|
||||
protprev[protnext[qelm]] = protprev[qelm];
|
||||
|
||||
protprev[qelm] = NIL;
|
||||
protnext[qelm] = firstprot;
|
||||
protprev[firstprot] = qelm;
|
||||
firstprot = qelm;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* place_state - place a state into full speed transition table
|
||||
*
|
||||
* State is the statenum'th state. It is indexed by equivalence class and
|
||||
* gives the number of the state to enter for a given equivalence class.
|
||||
* Transnum is the number of out-transitions for the state.
|
||||
*/
|
||||
|
||||
void place_state( state, statenum, transnum )
|
||||
int *state, statenum, transnum;
|
||||
{
|
||||
register int i;
|
||||
register int *state_ptr;
|
||||
int position = find_table_space( state, transnum );
|
||||
|
||||
/* "base" is the table of start positions. */
|
||||
base[statenum] = position;
|
||||
|
||||
/* Put in action number marker; this non-zero number makes sure that
|
||||
* find_table_space() knows that this position in chk/nxt is taken
|
||||
* and should not be used for another accepting number in another
|
||||
* state.
|
||||
*/
|
||||
chk[position - 1] = 1;
|
||||
|
||||
/* Put in end-of-buffer marker; this is for the same purposes as
|
||||
* above.
|
||||
*/
|
||||
chk[position] = 1;
|
||||
|
||||
/* Place the state into chk and nxt. */
|
||||
state_ptr = &state[1];
|
||||
|
||||
for ( i = 1; i <= numecs; ++i, ++state_ptr )
|
||||
if ( *state_ptr != 0 )
|
||||
{
|
||||
chk[position + i] = i;
|
||||
nxt[position + i] = *state_ptr;
|
||||
}
|
||||
|
||||
if ( position + numecs > tblend )
|
||||
tblend = position + numecs;
|
||||
}
|
||||
|
||||
|
||||
/* stack1 - save states with only one out-transition to be processed later
|
||||
*
|
||||
* If there's room for another state on the "one-transition" stack, the
|
||||
* state is pushed onto it, to be processed later by mk1tbl. If there's
|
||||
* no room, we process the sucker right now.
|
||||
*/
|
||||
|
||||
void stack1( statenum, sym, nextstate, deflink )
|
||||
int statenum, sym, nextstate, deflink;
|
||||
{
|
||||
if ( onesp >= ONE_STACK_SIZE - 1 )
|
||||
mk1tbl( statenum, sym, nextstate, deflink );
|
||||
|
||||
else
|
||||
{
|
||||
++onesp;
|
||||
onestate[onesp] = statenum;
|
||||
onesym[onesp] = sym;
|
||||
onenext[onesp] = nextstate;
|
||||
onedef[onesp] = deflink;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* tbldiff - compute differences between two state tables
|
||||
*
|
||||
* "state" is the state array which is to be extracted from the pr'th
|
||||
* proto. "pr" is both the number of the proto we are extracting from
|
||||
* and an index into the save area where we can find the proto's complete
|
||||
* state table. Each entry in "state" which differs from the corresponding
|
||||
* entry of "pr" will appear in "ext".
|
||||
*
|
||||
* Entries which are the same in both "state" and "pr" will be marked
|
||||
* as transitions to "SAME_TRANS" in "ext". The total number of differences
|
||||
* between "state" and "pr" is returned as function value. Note that this
|
||||
* number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
|
||||
*/
|
||||
|
||||
int tbldiff( state, pr, ext )
|
||||
int state[], pr, ext[];
|
||||
{
|
||||
register int i, *sp = state, *ep = ext, *protp;
|
||||
register int numdiff = 0;
|
||||
|
||||
protp = &protsave[numecs * (pr - 1)];
|
||||
|
||||
for ( i = numecs; i > 0; --i )
|
||||
{
|
||||
if ( *++protp == *++sp )
|
||||
*++ep = SAME_TRANS;
|
||||
else
|
||||
{
|
||||
*++ep = *sp;
|
||||
++numdiff;
|
||||
}
|
||||
}
|
||||
|
||||
return numdiff;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user