adding the rest of vern's files

2026-01-26 15:39:06 +00:00 · 2001-05-01 20:47:11 +00:00 · 2001-05-01 20:47:11 +00:00 · 2eae880030
commit 2eae880030
parent 26e78464e7
52 changed files with 19026 additions and 0 deletions
--- a/examples/README
+++ b/examples/README
@ -0,0 +1,15 @@
+This directory contains some examples of what you can do with
+flex. These files are not tested regularly so you might have to tinker
+a bit before they work for you. Updates, new files and patches are welcome.
+
+	- debflex.awk, an awk script for anotating flex debug output.
+	  It presently only works with gawk and mawk, not with "old"
+	  or "new" awk.
+
+	- testxxLexer.l, a sample C++ program that uses flex's scanner
+	  class option ("-+").
+
+	- fastwc/, a subdirectory containing examples of how to use flex
+	  to write progressively higher-performance versions of the Unix
+	  "wc" utility.  This certainly should work with 2.5, but hasn't
+	  been tested.
--- a/examples/debflex.awk
+++ b/examples/debflex.awk
@ -0,0 +1,119 @@
+# Clarify the flex debug trace by substituting first line of each rule.
+# Francois Pinard <pinard@iro.umontreal.ca>, July 1990.
+#
+# Rewritten to process correctly \n's in scanner input.
+# BEGIN section modified to correct a collection of rules.
+# Michal Jaegermann <michal@phys.ualberta.ca>, December 1993
+#
+# Sample usage:
+#	flex -d PROGRAM.l
+#	gcc -o PROGRAM PROGRAM.c -lfl
+#	PROGRAM 2>&1 | gawk -f debflex.awk PROGRAM.l
+#
+# (VP's note: this script presently does not work with either "old" or
+#  "new" awk; fixes so it does will be welcome)
+
+BEGIN {
+    # Insure proper usage.
+
+    if (ARGC != 2) {
+	print "usage: gawk -f debflex.awk FLEX_SOURCE <DEBUG_OUTPUT";
+	exit (1);
+    }
+
+    # Remove and save the name of flex source.
+
+    source = ARGV[1];
+    ARGC--;
+
+    # Swallow the flex source file.
+
+    line = 0;
+    section = 1;
+    while (getline <source) {
+
+	# Count the lines.
+
+	line++;
+
+	# Count the sections.  When encountering section 3,
+	# break out of the awk BEGIN block.
+
+	if (match ($0, /^%%/)) {
+	    section++;
+	    if (section == 3) {
+		break;
+	    }
+	}
+	else {
+	    # Only the lines in section 2 which do not begin in a
+	    # tab or space might be referred to by the flex debug
+	    # trace.  Save only those lines.
+
+	    if (section == 2 && match ($0, /^[^ \t]/)) {
+		rules[line] = $0;
+	    }
+	}
+    }
+    dashes = "-----------------------------------------------------------";
+    collect = "";
+    line = 0;
+}
+
+# collect complete rule output from a scanner
+$0 !~ /^--/ {
+    collect = collect "\n" $0;
+    next;
+}
+# otherwise we have a new rule - process what we got so far
+{
+    process();
+}
+# and the same thing if we hit EOF
+END {
+    process();
+}
+
+function process() {
+
+    # splitting this way we loose some double dashes and
+    # left parentheses from echoed input - a small price to pay
+    n = split(collect, field, "\n--|[(]");
+
+    # this loop kicks in only when we already collected something
+    for (i = 1; i <= n; i++) {
+	if (0 != line) {
+	    # we do not care for traces of newlines.
+	    if (0 == match(field[i], /\"\n+\"[)]/)) {
+		if (rules[line]) {
+		    text = field[i];
+		    while ( ++i <= n) {
+			text = text field[i];
+		    }
+		    printf("%s:%d: %-8s -- %s\n",
+			   source, line, text, rules[line]);
+		}
+		else {
+		    print;
+		    printf "%s:%d: *** No such rule.\n", source, line;
+		}
+	    }
+	    line = 0;
+	    break;
+	}
+	if ("" != field[i]) {
+	    if ("end of buffer or a NUL)" == field[i]) {
+		print dashes;  # Simplify trace of buffer reloads
+		continue;
+	    }
+	    if (match(field[i], /accepting rule at line /)) {
+		# force interpretation of line as a number
+		line = 0 + substr(field[i], RLENGTH);
+		continue;
+	    }
+	    # echo everything else
+	    printf("--%s\n", field[i]);
+	}
+    }
+    collect = "\n" $0;  # ... and start next trace
+}
--- a/examples/manual/ChangeLog
+++ b/examples/manual/ChangeLog
@ -0,0 +1,24 @@
+Tue Oct  5 21:51:59 1993  Vern Paxson
+
+	* Removed FILTER/ subdirectory.
+
+	* Removed alloca.c.
+
+	* Changed Makefile definition of CC to just "gcc -g", removed
+	  assumption of alloca being present.
+
+	* Added pointer to MISC/fastwc/ to wc.lex.
+
+Tue Jun  8 15:47:39 1993  Gavin Thomas Nicol  (nick at sillybugs)
+
+	* Changed main() in wc.lex extensively. The old version would not
+	  work correctly without the YY_NEW_FILE. (lex handles the older
+	  version OK though).
+
+	* Added a rule to expr.lex to handle whitespace. The old version
+ 	  reported an illegal character.
+
+	* Added -traditional to the gcc flags because the flex definition
+	  for free() clashes with some systems that have old header files.
+	  
+
--- a/examples/manual/Makefile
+++ b/examples/manual/Makefile
@ -0,0 +1,88 @@
+#############################################################
+#
+# Makefile     : Makefile for Flex examples.
+# Author       : G.T.Nicol
+# Last Updated : 1993/10/05
+#
+# If you use bison, you may have to supply an alloca
+#
+#############################################################
+
+CC       = gcc -g
+LEX      = flex -i -I 
+YACC     = bison -d -y
+ALLOCA   =
+
+############################################################
+#
+# DO NOT CHANGE ANYTHING FROM HERE ON !!!!!!!!! 
+#
+############################################################
+
+all: expr front myname eof wc replace user_act string1\
+     string2 yymore numbers dates cat
+
+expr:   expr.y expr.lex
+	$(YACC) expr.y
+	$(LEX)  expr.lex
+	$(CC) -o expr lex.yy.c y.tab.c $(ALLOCA) -ll -lm
+
+front:  front.y front.lex
+	$(YACC) front.y
+	$(LEX)  front.lex
+	$(CC) -o front lex.yy.c y.tab.c $(ALLOCA) -ll -lm
+
+numbers: numbers.lex
+	$(LEX)  numbers.lex
+	$(CC)   lex.yy.c -o numbers
+
+dates: dates.lex
+	$(LEX)  dates.lex
+	$(CC)   lex.yy.c -o dates -ll
+
+yymore: yymore.lex
+	$(LEX)  yymore.lex
+	$(CC)   lex.yy.c -o yymore -ll
+
+string1: string1.lex
+	$(LEX)  string1.lex
+	$(CC)   lex.yy.c -o string1 -ll
+
+string2: string2.lex
+	$(LEX)  string2.lex
+	$(CC)   lex.yy.c -o string2 -ll
+
+myname: myname.lex
+	$(LEX)  myname.lex
+	$(CC)   lex.yy.c -o myname -ll
+
+myname2: myname2.lex
+	$(LEX)  myname2.lex
+	$(CC)   lex.yy.c -o myname2 -ll
+
+eof:    eof_rules.lex
+	$(LEX)  eof_rules.lex
+	$(CC)   lex.yy.c -o eof -ll
+
+wc:     wc.lex
+	$(LEX)  wc.lex
+	$(CC)   lex.yy.c -o wc -ll
+
+cat:    cat.lex
+	$(LEX)  cat.lex
+	$(CC)   lex.yy.c -o cat -ll
+
+replace: replace.lex
+	$(LEX)  replace.lex
+	$(CC)   lex.yy.c -o replace -ll
+
+user_act: expr.y expr.lex
+	$(LEX)  user_act.lex
+	$(CC) -o user_act lex.yy.c -ll 
+
+clean:
+	rm -f *.BAK *.o core *~* *.a 
+	rm -f *.tab.h *.tab.c
+	rm -f myname expr lex.yy.c *.out eof wc yymore
+	rm -f replace front user_act string1 string2
+	rm -f dates numbers cat
--- a/examples/manual/README
+++ b/examples/manual/README
@ -0,0 +1,17 @@
+This directory contains the example programs from the manual, and a few 
+other things as well. To make all the programs, simply type "make",
+and assuming you have flex and gcc, all will be well.
+
+To build the programs individually, type
+
+   make program_name
+
+For example:
+
+   make expr
+
+
+The subdirectory FILTER contains a collection of the silly filters
+that have appeared on the Internet over the years. The author of the
+flex manual has included them for fun, but does not guarantee they will
+work with flex, or even work at all.
--- a/examples/manual/cat.lex
+++ b/examples/manual/cat.lex
@ -0,0 +1,45 @@
+/*
+ * cat.lex: A demonstration of YY_NEW_FILE.
+ */
+
+%{
+#include <stdio.h>
+
+char **names = NULL;
+int  current = 1;
+%}
+
+%%
+<<EOF>> {
+           current += 1;
+           if(names[current] != NULL){
+              yyin = fopen(names[current],"r");
+              if(yyin == NULL){
+                fprintf(stderr,"cat: unable to open %s\n",
+                        names[current]);
+                yyterminate();
+              }
+              YY_NEW_FILE;
+           } else {
+             yyterminate();
+           }
+        }
+%%
+
+int main(int argc, char **argv)
+{
+    if(argc < 2){
+       fprintf(stderr,"Usage: cat files....\n");
+       exit(1);
+    }
+    names = argv;
+
+    yyin = fopen(names[current],"r");
+    if(yyin == NULL){
+      fprintf(stderr,"cat: unable to open %s\n",
+              names[current]);
+      yyterminate();
+    }
+
+    yylex();
+}
--- a/examples/manual/dates.lex
+++ b/examples/manual/dates.lex
@ -0,0 +1,106 @@
+/*
+ * dates.lex: An example of using start states to
+ *            distinguish between different date formats.
+ */
+
+%{
+#include <ctype.h>
+
+char month[20],dow[20],day[20],year[20];
+
+%}
+
+skip        of|the|[ \t,]*
+
+mon	    (mon(day)?)
+tue	    (tue(sday)?)
+wed	    (wed(nesday)?)
+thu	    (thu(rsday)?)
+fri	    (fri(day)?)
+sat	    (sat(urday)?)
+sun	    (sun(day)?)
+
+day_of_the_week	({mon}|{tue}|{wed}|{thu}|{fri}|{sat}|{sun})
+
+jan	    (jan(uary)?)
+feb	    (feb(ruary)?)
+mar	    (mar(ch)?)
+apr	    (apr(il)?)
+may	    (may)
+jun	    (jun(e)?)
+jul	    (jul(y)?)
+aug	    (aug(ust)?)
+sep	    (sep(tember)?)
+oct	    (oct(ober)?)
+nov	    (nov(ember)?)
+dec	    (dec(ember)?)
+
+first_half  ({jan}|{feb}|{mar}|{apr}|{may}|{jun})
+second_half ({jul}|{aug}|{sep}|{oct}|{nov}|{dec})
+month 	    {first_half}|{second_half}
+
+nday         [1-9]|[1-2][0-9]|3[0-1]
+nmonth       [1-9]|1[0-2]
+nyear        [0-9]{1,4}
+
+year_ext    (ad|AD|bc|BC)?
+day_ext     (st|nd|rd|th)?
+
+%s LONG SHORT
+%s DAY DAY_FIRST YEAR_FIRST YEAR_LAST YFMONTH YLMONTH
+
+%%
+
+  /* the default is month-day-year */
+
+<LONG>{day_of_the_week}    strcpy(dow,yytext); 
+<LONG>{month}              strcpy(month,yytext); BEGIN(DAY);
+
+  /* handle the form: day-month-year */
+
+<LONG>{nday}{day_ext}      strcpy(day,yytext);   BEGIN(DAY_FIRST);
+<DAY_FIRST>{month}         strcpy(month,yytext); BEGIN(LONG);
+<DAY>{nday}{day_ext}       strcpy(day,yytext);   BEGIN(LONG);  
+
+<LONG>{nyear}{year_ext}  {
+                           printf("Long:\n");
+                           printf("  DOW   : %s \n",dow);
+                           printf("  Day   : %s \n",day);
+                           printf("  Month : %s \n",month);
+                           printf("  Year  : %s \n",yytext);
+                           strcpy(dow,"");
+                           strcpy(day,"");
+                           strcpy(month,"");
+                         }
+
+  /* handle dates of the form: day-month-year */
+
+<SHORT>{nday}              strcpy(day,yytext);  BEGIN(YEAR_LAST);
+<YEAR_LAST>{nmonth}        strcpy(month,yytext);BEGIN(YLMONTH);
+<YLMONTH>{nyear}           strcpy(year,yytext); BEGIN(SHORT);
+
+  /* handle dates of the form: year-month-day */
+
+<SHORT>{nyear}             strcpy(year,yytext); BEGIN(YEAR_FIRST);
+<YEAR_FIRST>{nmonth}       strcpy(month,yytext);BEGIN(YFMONTH);
+<YFMONTH>{nday}            strcpy(day,yytext);  BEGIN(SHORT);
+
+
+<SHORT>\n                {
+                           printf("Short:\n");
+                           printf("  Day   : %s \n",day);
+                           printf("  Month : %s \n",month);
+                           printf("  Year  : %s \n",year);
+                           strcpy(year,""); 
+                           strcpy(day,"");
+                           strcpy(month,"");
+                         }
+
+long\n                      BEGIN(LONG);
+short\n                     BEGIN(SHORT);
+
+{skip}*
+\n
+.
+
+
--- a/examples/manual/datetest.dat
+++ b/examples/manual/datetest.dat
@ -0,0 +1,28 @@
+short
+1989:12:23
+1989:11:12
+23:12:1989
+11:12:1989
+1989/12/23
+1989/11/12
+23/12/1989
+11/12/1989
+1989-12-23
+1989-11-12
+23-12-1989
+11-12-1989
+long
+Friday the 5th of January, 1989
+Friday, 5th of January, 1989
+Friday, January 5th, 1989
+Fri, January 5th, 1989
+Fri, Jan 5th, 1989
+Fri, Jan 5, 1989
+FriJan 5, 1989
+FriJan5, 1989
+FriJan51989
+Jan51989
+
+
+
+
--- a/examples/manual/eof_rules.lex
+++ b/examples/manual/eof_rules.lex
@ -0,0 +1,65 @@
+/*
+ * eof_rules.lex : An example of using multiple buffers
+ *                 EOF rules, and start states
+ */
+
+%{
+                              
+#define MAX_NEST 10                   
+
+YY_BUFFER_STATE include_stack[MAX_NEST];
+int             include_count = -1;
+
+%}
+
+
+%x INCLUDE
+
+%%
+
+^"#include"[ \t]*\"  BEGIN(INCLUDE);
+<INCLUDE>\"          BEGIN(INITIAL); 
+<INCLUDE>[^\"]+ {      /* get the include file name */
+          if ( include_count >= MAX_NEST){
+             fprintf( stderr, "Too many include files" );
+             exit( 1 );
+          }
+
+          include_stack[++include_count] = YY_CURRENT_BUFFER;
+
+          yyin = fopen( yytext, "r" );
+          if ( ! yyin ){
+             fprintf( stderr, "Unable to open \"%s\"\n",yytext);
+             exit( 1 );
+          }
+
+          yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
+
+          BEGIN(INITIAL);
+        }
+<INCLUDE><<EOF>> 
+        {
+            fprintf( stderr, "EOF in include" );
+            yyterminate();
+        }
+<<EOF>> {
+          if ( include_count <= 0 ){
+            yyterminate();
+          } else {
+            yy_delete_buffer(include_stack[include_count--] );
+            yy_switch_to_buffer(include_stack[include_count] );
+            BEGIN(INCLUDE);
+          }
+        }
+[a-z]+               ECHO;
+.|\n                 ECHO;
+
+
+
+
+
+
+
+
+
+
--- a/examples/manual/eof_test01.txt
+++ b/examples/manual/eof_test01.txt
@ -0,0 +1,17 @@
+This is test file #1
+
+-------------------------------------------------
+
+We will now include test #2 in a standard way.
+
+#include "eof_test02.txt"
+
+-------------------------------------------------
+
+And now we will include test # 2 with a different
+format
+
+#include "eof_test02.txt"
+-------------------------------------------------
+
+and this is the end of the test.
--- a/examples/manual/eof_test02.txt
+++ b/examples/manual/eof_test02.txt
@ -0,0 +1,8 @@
+INCLUDE #2
+
+This is the second file that will
+be included.
+
+>>> Foo are GNU?
+
+#include "eof_test03.txt"
--- a/examples/manual/eof_test03.txt
+++ b/examples/manual/eof_test03.txt
@ -0,0 +1,7 @@
+INCLUDE #3
+
+This is the third file that will
+be included.
+
+>>> echo "I am `whoami`!!"
+
--- a/examples/manual/expr.lex
+++ b/examples/manual/expr.lex
@ -0,0 +1,35 @@
+/*
+ * expr.lex : Scanner for a simple
+ *            expression parser.
+ */
+
+%{
+#include "y.tab.h"
+
+%}
+
+%%
+
+[0-9]+     { yylval.val = atol(yytext);
+             return(NUMBER);
+           }
+[0-9]+\.[0-9]+ { 
+             sscanf(yytext,"%f",&yylval.val);
+             return(NUMBER);
+           }
+"+"        return(PLUS);
+"-"        return(MINUS);
+"*"        return(MULT);
+"/"        return(DIV);
+"^"        return(EXPON);
+"("        return(LB);
+")"        return(RB);
+\n         return(EOL);
+[\t ]*     /* throw away whitespace */
+.          { yyerror("Illegal character"); 
+             return(EOL);
+           }
+%%
+
+
+
--- a/examples/manual/expr.y
+++ b/examples/manual/expr.y
@ -0,0 +1,64 @@
+/*
+ * expr.y : A simple yacc expression parser
+ *          Based on the Bison manual example. 
+ */
+
+%{
+#include <stdio.h>
+#include <math.h>
+
+%}
+
+%union {
+   float val;
+}
+
+%token NUMBER
+%token PLUS MINUS MULT DIV EXPON
+%token EOL
+%token LB RB
+
+%left  MINUS PLUS
+%left  MULT DIV
+%right EXPON
+
+%type  <val> exp NUMBER
+
+%%
+input   :
+        | input line
+        ;
+
+line    : EOL
+        | exp EOL { printf("%g\n",$1);}
+
+exp     : NUMBER                 { $$ = $1;        }
+        | exp PLUS  exp          { $$ = $1 + $3;   }
+        | exp MINUS exp          { $$ = $1 - $3;   }
+        | exp MULT  exp          { $$ = $1 * $3;   }
+        | exp DIV   exp          { $$ = $1 / $3;   }
+        | MINUS  exp %prec MINUS { $$ = -$2;       }
+        | exp EXPON exp          { $$ = pow($1,$3);}
+        | LB exp RB                      { $$ = $2;        }
+        ;
+
+%%
+
+yyerror(char *message)
+{
+  printf("%s\n",message);
+}
+
+int main(int argc, char *argv[])
+{
+  yyparse();
+  return(0);
+}
+
+
+
+
+
+
+
+
--- a/examples/manual/front.lex
+++ b/examples/manual/front.lex
@ -0,0 +1,40 @@
+%{
+#include <stdio.h>
+#include <string.h>
+#include "y.tab.h"                 /* this comes from bison        */
+
+#define TRUE  1
+#define FALSE 0
+
+#define copy_and_return(token_type) { strcpy(yylval.name,yytext); \
+                                      return(token_type); }
+
+int             yylexlinenum = 0;  /* so we can count lines        */
+%}
+
+%%
+                            /* Lexical scanning rules begin from here.  */
+
+MEN|WOMEN|STOCKS|TREES      copy_and_return(NOUN)
+MISTAKES|GNUS|EMPLOYEES     copy_and_return(NOUN)
+LOSERS|USERS|CARS|WINDOWS   copy_and_return(NOUN)
+
+DATABASE|NETWORK|FSF|GNU    copy_and_return(PROPER_NOUN)
+COMPANY|HOUSE|OFFICE|LPF    copy_and_return(PROPER_NOUN)
+
+THE|THIS|THAT|THOSE         copy_and_return(DECLARATIVE)
+
+ALL|FIRST|LAST              copy_and_return(CONDITIONAL)
+
+FIND|SEARCH|SORT|ERASE|KILL copy_and_return(VERB)
+ADD|REMOVE|DELETE|PRINT     copy_and_return(VERB)
+
+QUICKLY|SLOWLY|CAREFULLY    copy_and_return(ADVERB)
+
+IN|AT|ON|AROUND|INSIDE|ON   copy_and_return(POSITIONAL)
+
+"."                         return(PERIOD);                             
+"\n"                        yylexlinenum++; return(NEWLINE);            
+.                                                                       
+%%
+
--- a/examples/manual/front.y
+++ b/examples/manual/front.y
@ -0,0 +1,118 @@
+/* C code supplied at the beginning of the file.  */
+
+%{
+
+#include <stdio.h>                          
+#include <string.h>
+
+extern int  yylexlinenum;                   /* these are in YYlex      */
+extern char *yytext;                        /* current token           */
+
+
+%}
+
+/* Keywords and reserved words begin here.  */
+
+%union{                                     /* this is the data union  */
+    char   name[128];                       /* names                   */
+}
+
+/*-------------------- the reserved words -----------------------------*/
+
+%token PERIOD
+%token NEWLINE
+%token POSITIONAL
+
+%token VERB
+%token ADVERB
+
+%token PROPER_NOUN
+%token NOUN
+
+%token DECLARATIVE
+%token CONDITIONAL
+
+
+%type  <name> declarative
+%type  <name> verb_phrase
+%type  <name> noun_phrase
+%type  <name> position_phrase
+%type  <name> adverb
+
+%type  <name> POSITIONAL VERB ADVERB PROPER_NOUN 
+%type  <name> NOUN DECLARATIVE CONDITIONAL
+
+%%
+
+sentence_list : sentence
+              | sentence_list NEWLINE sentence
+              ;
+
+
+sentence : verb_phrase noun_phrase position_phrase adverb period 
+           {
+             printf("I understand that sentence.\n");
+             printf("VP = %s \n",$1);
+             printf("NP = %s \n",$2);
+             printf("PP = %s \n",$3);
+             printf("AD = %s \n",$4);
+           }
+         | { yyerror("That's a strange sentence !!");  }
+         ;
+
+position_phrase : POSITIONAL  declarative PROPER_NOUN 
+                  {
+                    sprintf($$,"%s %s %s",$1,$2,$3);
+                  }   
+                | /* empty */ { strcpy($$,""); }
+                ;
+               
+
+verb_phrase : VERB { strcpy($$,$1); strcat($$," "); }
+            | adverb VERB  
+              {
+                sprintf($$,"%s %s",$1,$2);
+              }
+            ;
+
+adverb : ADVERB      { strcpy($$,$1); }
+       | /* empty */ { strcpy($$,""); }
+       ;
+
+noun_phrase : DECLARATIVE NOUN 
+              {
+                sprintf($$,"%s %s",$1,$2);
+              }
+            | CONDITIONAL declarative NOUN 
+                  {
+                    sprintf($$,"%s %s %s",$1,$2,$3);
+                  }   
+            | NOUN { strcpy($$,$1); strcat($$," "); }
+            ;
+
+declarative : DECLARATIVE { strcpy($$,$1); }
+            | /* empty */ { strcpy($$,""); }
+            ;
+
+period : /* empty */
+       | PERIOD
+       ;
+
+
+%%
+
+/* Supplied main() and yyerror() functions.  */
+
+int main(int argc, char *argv[])
+{
+  yyparse();   /* parse the file          */
+  return(0);
+}
+
+int yyerror(char *message)
+{
+  extern FILE *yyout;
+
+  fprintf(yyout,"\nError at line %5d. (%s) \n",
+                     yylexlinenum,message);
+}
--- a/examples/manual/j2t.lex
+++ b/examples/manual/j2t.lex
@ -0,0 +1,442 @@
+/*
+ * j2t.lex : An example of the use (possibly abuse!)
+ *           of start states.
+ */
+
+%{
+#define MAX_STATES 1024
+#define TRUE  1
+#define FALSE 0
+
+#define CHAPTER   "@chapter"
+#define SECTION   "@section"
+#define SSECTION  "@subsection"
+#define SSSECTION "@subsubsection"
+
+int  states[MAX_STATES];
+int  statep = 0;
+
+int  need_closing = FALSE;
+
+char buffer[YY_BUF_SIZE];
+
+extern char *yytext;
+
+/*
+ * set up the head of the *.texinfo file the program
+ * will produce. This is a standard texinfo header.
+ */
+
+void print_header(void)
+{
+   printf("\\input texinfo @c -*-texinfo-*-\n");
+   printf("@c           %c**start of header\n",'%');
+   printf("@setfilename       jargon.info\n");
+   printf("@settitle          The New Hackers Dictionary\n");
+   printf("@synindex          fn cp\n");
+   printf("@synindex          vr cp\n");
+   printf("@c           %c**end of header\n",'%');
+   printf("@setchapternewpage odd\n");
+   printf("@finalout\n");
+   printf("@c @smallbook\n");
+   printf("\n");
+   printf("@c ==========================================================\n\n");
+   printf("@c  This file was produced by j2t. Any mistakes are *not* the\n");
+   printf("@c  fault of the jargon file editors.                        \n");
+   printf("@c ==========================================================\n\n");
+   printf("@titlepage\n");
+   printf("@title     The New Hackers Dictionary\n");
+   printf("@subtitle  Version 2.9.10\n");
+   printf("@subtitle  Generated by j2t\n");
+   printf("@author    Eric S. Raymond, Guy L. Steel, Mark Crispin et al.\n"); 
+   printf("@end titlepage\n");
+   printf("@page\n");
+   printf("\n@c ==========================================================\n");
+   printf("\n\n");
+   printf("@unnumbered Preface\n");
+   printf("@c          *******\n");
+}
+
+/*
+ * create the tail of the texinfo file produced.
+ */
+
+void print_trailer(void)
+{
+   printf("\n@c ==========================================================\n");
+   printf("@contents\n");   /* print the table of contents */
+   printf("@bye\n\n");     
+}
+
+/*
+ * write an underline under a section
+ * or chapter so we can find it later. 
+ */
+
+void write_underline(int len, int space, char ch)
+{
+  int loop;
+
+  printf("@c ");
+
+  for(loop=3; loop<space; loop++){
+    printf(" ");
+  }
+
+  while(len--){
+    printf("%c",ch);
+  }
+  printf("\n\n");
+}
+
+/*
+ * check for texinfo special characters
+ * and escape them
+ */
+
+char *check_and_convert(char *string)
+{
+  int  buffpos = 0;
+  int  len,loop;
+
+  len = strlen(string);  
+  for(loop=0; loop<len; loop++){
+    if(string[loop] == '@' || string[loop] == '{' || string[loop] == '}'){
+      buffer[buffpos++] = '@';
+      buffer[buffpos++] = string[loop];
+    } else {
+      buffer[buffpos++] = string[loop];
+    }
+  }
+  buffer[buffpos] = '\0';
+  return(buffer);
+}
+
+/*
+ * write out a chapter,section, or subsection
+ * header
+ */
+
+void write_block_header(char *type)
+{
+  int loop;
+  int len;
+
+  (void)check_and_convert(yytext);
+  len = strlen(buffer);
+  for(loop=0; buffer[loop] != '\n';loop++)
+         ;
+  buffer[loop] = '\0';
+  printf("%s %s\n",type,buffer);
+  write_underline(strlen(buffer),strlen(type)+1,'*');
+}
+
+%}
+
+/*
+ * the flex description starts here 
+ */
+
+%x HEADING EXAMPLE ENUM EXAMPLE2
+%x BITEM BITEM_ITEM 
+%s LITEM LITEM2 
+
+%%
+
+^#[^#]*"#"  /* skip the header & trailer */
+                     /* chapters have asterisks under them 
+                      * and are terminated by a colon
+                      */
+^[^\n:]+\n[*]+\n      write_block_header(CHAPTER); 
+
+^"= "[A-Z]" ="\n"="*  { /* we create a seciton for each category */
+                        if(need_closing == TRUE){
+                          printf("@end table\n\n\n");
+                        }
+                        need_closing = TRUE;
+                        write_block_header(SECTION);
+                        printf("\n\n@table @b\n");
+                      }
+
+"Examples:"[^\.]+     ECHO;
+
+"*"[^*\n]+"*"         { /* @emph{}(emphasized) text */
+                        yytext[yyleng-1] = '\0';
+                        (void)check_and_convert(&yytext[1]);
+                        printf("@i{%s}",buffer);
+                      }
+
+"{{"[^}]+"}}"         { /* special emphasis */
+                        yytext[yyleng-2] = '\0';
+                        (void)check_and_convert(&yytext[2]);
+                        printf("@b{%s}",buffer);
+                      }
+
+"{"[^}]+"}"           { /* special emphasis */
+                        yytext[yyleng-1] = '\0';
+                        (void)check_and_convert(&yytext[1]);
+                        printf("@b{%s}",buffer);
+                      }
+  
+ /* escape some special texinfo characters */
+<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"@"  printf("@@");
+<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"{"  printf("@{");
+<INITIAL,LITEM,LITEM2,BITEM,ENUM,EXAMPLE,EXAMPLE2>"}"  printf("@}");
+
+ /*
+  * reproduce @example code
+  */
+
+":"\n+[^\n0-9*]+\n"     "[^ ]   {
+                        int loop;
+                        int len;
+                        int cnt;
+
+                        printf(":\n\n@example \n");
+                        strcpy(buffer,yytext);
+                        len = strlen(buffer);
+                        cnt = 0;
+                        for(loop=len; loop > 0;loop--){
+                          if(buffer[loop] == '\n')
+                             cnt++;
+                          if(cnt == 2)
+                              break;
+                        }
+                        yyless(loop+1);
+                        statep++;
+                        states[statep] = EXAMPLE2;
+                        BEGIN(EXAMPLE2);
+                      }
+<EXAMPLE,EXAMPLE2>^\n  {
+                      printf("@end example\n\n");
+                      statep--; 
+                      BEGIN(states[statep]);
+                    }
+
+ /*
+  * repoduce @enumerate lists
+  */
+
+":"\n+[ \t]*[0-9]+"."   {
+                      int loop;
+                      int len;
+
+                      printf(":\n\n@enumerate \n");
+                      strcpy(buffer,yytext);
+                      len = strlen(buffer);
+                      for(loop=len; loop > 0;loop--){
+                        if(buffer[loop] == '\n')
+                           break;
+                      }
+                      yyless(loop);
+                      statep++;
+                      states[statep] = ENUM;
+                      BEGIN(ENUM);
+                    }
+
+<ENUM>"@"           printf("@@");
+<ENUM>":"\n+"     "[^0-9]    {
+                    printf(":\n\n@example\n");
+                    statep++;
+                    states[statep] = EXAMPLE;
+                    BEGIN(EXAMPLE); 
+                  }
+
+
+<ENUM>\n[ \t]+[0-9]+"." {
+                    printf("\n\n@item ");
+                   }
+<ENUM>^[^ ] | 
+<ENUM>\n\n\n[ \t]+[^0-9] {
+                    printf("\n\n@end enumerate\n\n");
+                    statep--;
+                    BEGIN(states[statep]);
+                  }
+ 
+ /* 
+  * reproduce one kind of @itemize list
+  */
+
+":"\n+":"         {
+                    int loop;
+                    int len;
+
+                    printf(":\n\n@itemize @bullet \n");
+                    yyless(2);
+                    statep++;
+                    states[statep] = LITEM2;
+                    BEGIN(LITEM2);
+                  }
+<LITEM2>^":".+":" {
+                    (void)check_and_convert(&yytext[1]);
+                    buffer[strlen(buffer)-1]='\0';
+                    printf("@item @b{%s:}\n",buffer);
+                  }
+ 
+<LITEM2>\n\n\n+[^:\n] {
+                    printf("\n\n@end itemize\n\n");
+                    ECHO;
+                    statep--;
+                    BEGIN(states[statep]);
+                  }
+ 
+ /*
+  * create a list out of the revision history part.
+  * We need the "Version" for this because it 
+  * clashes with other rules otherwise.
+  */
+
+:[\n]+"Version"[^:\n*]+":" {
+                    int loop;
+                    int len;
+
+                    printf(":\n\n@itemize @bullet \n");
+                    strcpy(buffer,yytext);
+                    len = strlen(buffer);
+                    for(loop=len; loop > 0;loop--){
+                      if(buffer[loop] == '\n')
+                         break;
+                    }
+                    yyless(loop);
+                    statep++;
+                    states[statep] = LITEM;
+                    BEGIN(LITEM);
+                  }
+<LITEM>^.+":"     {
+                    (void)check_and_convert(yytext);
+                    buffer[strlen(buffer)-1]='\0';
+                    printf("@item @b{%s}\n\n",buffer);
+                  }
+ 
+<LITEM>^[^:\n]+\n\n[^:\n]+\n  {
+                    int loop;
+
+                    strcpy(buffer,yytext);
+                    for(loop=0; buffer[loop] != '\n'; loop++);
+                    buffer[loop] = '\0';
+                    printf("%s\n",buffer);
+                    printf("@end itemize\n\n");
+                    printf("%s",&buffer[loop+1]);
+                    statep--;
+                    BEGIN(states[statep]);
+                  }
+ 
+ /*
+  * reproduce @itemize @bullet lists
+  */
+
+":"\n[ ]*"*"      {
+                    int loop;
+                    int len;
+
+                    printf(":\n\n@itemize @bullet \n");
+                    len = strlen(buffer);
+                    for(loop=0; loop < len;loop++){
+                      if(buffer[loop] == '\n')
+                         break;
+                    }
+                    yyless((len-loop)+2);
+                    statep++;
+                    states[statep] = BITEM;
+                    BEGIN(BITEM);
+                  }
+
+<BITEM>^" "*"*"   {
+                    printf("@item");
+                    statep++;
+                    states[statep] = BITEM_ITEM;
+                    BEGIN(BITEM_ITEM);
+                  }
+<BITEM>"@"          printf("@@");
+<BITEM>^\n        { 
+                    printf("@end itemize\n\n");
+                    statep--;
+                    BEGIN(states[statep]);
+                  } 
+<BITEM_ITEM>[^\:]* {
+                     printf(" @b{%s}\n\n",check_and_convert(yytext));
+                   }
+<BITEM_ITEM>":"   { 
+                    statep--; 
+                    BEGIN(states[statep]);
+                  }
+
+ /* 
+  * recreate @chapter, @section etc.
+  */
+
+^:[^:]*           { 
+                    (void)check_and_convert(&yytext[1]); 
+                    statep++;
+                    states[statep] = HEADING;
+                    BEGIN(HEADING); 
+                  }
+<HEADING>:[^\n]   {
+                    printf("@item @b{%s}\n",buffer); 
+                    write_underline(strlen(buffer),6,'~');
+                    statep--; 
+                    BEGIN(states[statep]);
+                  }
+<HEADING>:\n"*"*  { 
+                    if(need_closing == TRUE){
+                      printf("@end table\n\n\n");
+                      need_closing = FALSE;
+                    }
+                    printf("@chapter %s\n",buffer); 
+                    write_underline(strlen(buffer),9,'*');
+                    statep--; 
+                    BEGIN(states[statep]);
+                  }
+<HEADING>:\n"="*  { 
+                    if(need_closing == TRUE){
+                     printf("@end table\n\n\n");
+                      need_closing = FALSE;
+                    }
+                    printf("@section %s\n",buffer); 
+                    write_underline(strlen(buffer),9,'=');
+                    statep--; 
+                    BEGIN(states[statep]);
+                  }
+<HEADING>"@"        printf("@@");
+<HEADING>:\n"-"*  { 
+                    if(need_closing == TRUE){
+                      printf("@end table\n\n\n");
+                      need_closing = FALSE;
+                    }
+                    printf("@subsection %s\n",buffer); 
+                    write_underline(strlen(buffer),12,'-');
+                    statep--; 
+                    BEGIN(states[statep]);
+                  }
+
+ /*
+  * recreate @example text
+  */
+
+^"     "          {
+                    printf("@example\n");
+                    statep++;
+                    states[statep] = EXAMPLE;
+                    BEGIN(EXAMPLE); 
+                  }
+<EXAMPLE>^"     "
+.                 ECHO;
+
+%%
+
+/*
+ * initialise and go.
+ */
+
+int main(int argc, char *argv[])
+{
+  states[0] = INITIAL;
+  statep    = 0;
+  print_header();
+  yylex();
+  print_trailer();
+  return(0);
+}
+
+
+
--- a/examples/manual/myname.lex
+++ b/examples/manual/myname.lex
@ -0,0 +1,15 @@
+/*
+ *
+ * myname.lex : A sample Flex program
+ *              that does token replacement.
+ */
+
+%%
+
+%NAME     { printf("%s",getenv("LOGNAME")); }
+%HOST     { printf("%s",getenv("HOST"));    }
+%HOSTTYPE { printf("%s",getenv("HOSTTYPE"));}
+%HOME     { printf("%s",getenv("HOME"));    }
+
+%%
+
--- a/examples/manual/myname.txt
+++ b/examples/manual/myname.txt
@ -0,0 +1,6 @@
+Hello, my name name is %NAME. Actually 
+"%NAME" isn't my real name, it is the
+alias I use when I'm on %HOST, which
+is the %HOSTTYPE I use. My HOME 
+directory is %HOME. 
+
--- a/examples/manual/myname2.lex
+++ b/examples/manual/myname2.lex
@ -0,0 +1,19 @@
+/*
+ * myname2.lex : A sample Flex program
+ *               that does token replacement.                      
+ */
+
+%{
+#include <stdio.h>
+%}
+
+%x STRING
+%%
+\"                ECHO; BEGIN(STRING); 
+<STRING>[^\"\n]*  ECHO;
+<STRING>\"        ECHO; BEGIN(INITIAL);
+
+%NAME     { printf("%s",getenv("LOGNAME")); }
+%HOST     { printf("%s",getenv("HOST"));    }
+%HOSTTYPE { printf("%s",getenv("HOSTTYPE"));}
+%HOME     { printf("%s",getenv("HOME"));    }
--- a/examples/manual/numbers.lex
+++ b/examples/manual/numbers.lex
@ -0,0 +1,145 @@
+/*
+ * numbers.lex : An example of the definitions and techniques
+ *               for scanning numbers
+ */
+
+%{
+#include <stdio.h>
+
+#define UNSIGNED_LONG_SYM   1
+#define SIGNED_LONG_SYM     2
+#define UNSIGNED_SYM        3
+#define SIGNED_SYM          4
+#define LONG_DOUBLE_SYM     5
+#define FLOAT_SYM           6
+
+union _yylval {
+  long double    ylong_double;
+  float          yfloat; 
+  unsigned long  yunsigned_long;
+  unsigned       yunsigned;
+  long           ysigned_long;
+  int            ysigned;
+} yylval;
+
+%}
+
+digit             [0-9]
+hex_digit         [0-9a-fA-F]
+oct_digit         [0-7]
+
+exponent          [eE][+-]?{digit}+
+i                 {digit}+
+float_constant    ({i}\.{i}?|{i}?\.{i}){exponent}?
+hex_constant      0[xX]{hex_digit}+
+oct_constant      0{oct_digit}*
+int_constant      {digit}+
+long_ext          [lL]
+unsigned_ext      [uU]
+float_ext         [fF]
+ulong_ext         {long_ext}{unsigned_ext}|{unsigned_ext}{long_ext}
+
+%%
+
+{hex_constant}{ulong_ext} {  /* we need to skip the "0x" part */
+                             sscanf(&yytext[2],"%lx",&yylval.yunsigned_long); 
+                             return(UNSIGNED_LONG_SYM);
+                          }
+{hex_constant}{long_ext}  {  
+                             sscanf(&yytext[2],"%lx",&yylval.ysigned_long); 
+                             return(SIGNED_LONG_SYM);
+                          }
+{hex_constant}{unsigned_ext}  { 
+                             sscanf(&yytext[2],"%x",&yylval.yunsigned); 
+                             return(UNSIGNED_SYM);
+                          }
+{hex_constant}            { /* use %lx to protect against overflow */
+                             sscanf(&yytext[2],"%lx",&yylval.ysigned_long); 
+                             return(SIGNED_LONG_SYM);
+                          }
+{oct_constant}{ulong_ext} {
+                             sscanf(yytext,"%lo",&yylval.yunsigned_long); 
+                             return(UNSIGNED_LONG_SYM);
+                          }
+{oct_constant}{long_ext}  {
+                             sscanf(yytext,"%lo",&yylval.ysigned_long); 
+                             return(SIGNED_LONG_SYM);
+                          }
+{oct_constant}{unsigned_ext}  {
+                             sscanf(yytext,"%o",&yylval.yunsigned); 
+                             return(UNSIGNED_SYM);
+                          }
+{oct_constant}            { /* use %lo to protect against overflow */
+                             sscanf(yytext,"%lo",&yylval.ysigned_long); 
+                             return(SIGNED_LONG_SYM);
+                          }
+{int_constant}{ulong_ext} {
+                             sscanf(yytext,"%ld",&yylval.yunsigned_long); 
+                             return(UNSIGNED_LONG_SYM);
+                          }
+{int_constant}{long_ext}  {
+                             sscanf(yytext,"%ld",&yylval.ysigned_long); 
+                             return(SIGNED_LONG_SYM);
+                          }
+{int_constant}{unsigned_ext}  {
+                             sscanf(yytext,"%d",&yylval.yunsigned); 
+                             return(UNSIGNED_SYM);
+                          }
+{int_constant}            { /* use %ld to protect against overflow */
+                             sscanf(yytext,"%ld",&yylval.ysigned_long); 
+                             return(SIGNED_LONG_SYM);
+                          }
+{float_constant}{long_ext}  {
+                             sscanf(yytext,"%lf",&yylval.ylong_double); 
+                             return(LONG_DOUBLE_SYM);
+                          }
+{float_constant}{float_ext}  {
+                             sscanf(yytext,"%f",&yylval.yfloat); 
+                             return(FLOAT_SYM);
+                          }
+{float_constant}          { /* use %lf to protect against overflow */
+                             sscanf(yytext,"%lf",&yylval.ylong_double); 
+                             return(LONG_DOUBLE_SYM);
+                          }
+%%
+
+int main(void)
+{
+  int code;
+
+  while((code = yylex())){
+    printf("yytext          : %s\n",yytext);
+    switch(code){
+    case UNSIGNED_LONG_SYM:
+       printf("Type of number  : UNSIGNED LONG\n");
+       printf("Value of number : %lu\n",yylval.yunsigned_long);
+       break;
+    case SIGNED_LONG_SYM:  
+       printf("Type of number  : SIGNED LONG\n");
+       printf("Value of number : %ld\n",yylval.ysigned_long);
+       break;
+    case UNSIGNED_SYM:     
+       printf("Type of number  : UNSIGNED\n");
+       printf("Value of number : %u\n",yylval.yunsigned);
+       break;
+    case SIGNED_SYM:       
+       printf("Type of number  : SIGNED\n");
+       printf("Value of number : %d\n",yylval.ysigned);
+       break;
+    case LONG_DOUBLE_SYM:  
+       printf("Type of number  : LONG DOUBLE\n");
+       printf("Value of number : %lf\n",yylval.ylong_double);
+       break;
+    case FLOAT_SYM:        
+       printf("Type of number  : FLOAT\n");
+       printf("Value of number : %f\n",yylval.yfloat);
+       break;
+    default:
+       printf("Type of number  : UNDEFINED\n");
+       printf("Value of number : UNDEFINED\n");
+       break;
+    }
+  }
+  return(0);
+}
+
--- a/examples/manual/pas_include.lex
+++ b/examples/manual/pas_include.lex
@ -0,0 +1,78 @@
+/*
+ * eof_rules.lex : An example of using multiple buffers
+ *                 EOF rules, and start states
+ */
+
+%{
+                              
+#define MAX_NEST 10                   
+
+YY_BUFFER_STATE include_stack[MAX_NEST];
+int             include_count = -1;
+
+%}
+
+
+%x INCLUDE
+%x COMMENT
+
+
+%%
+
+"{"                          BEGIN(COMMENT);
+
+<COMMENT>"}"                 BEGIN(INITIAL); 
+<COMMENT>"$include"[ \t]*"(" BEGIN(INCLUDE);
+<COMMENT>[ \t]*              /* skip whitespace */
+
+<INCLUDE>")"                 BEGIN(COMMENT); 
+<INCLUDE>[ \t]*              /* skip whitespace */
+<INCLUDE>[^ \t\n() ]+ {      /* get the include file name */
+          if ( include_count >= MAX_NEST){
+             fprintf( stderr, "Too many include files" );
+             exit( 1 );
+          }
+
+          include_stack[++include_count] = YY_CURRENT_BUFFER;
+
+          yyin = fopen( yytext, "r" );
+          if ( ! yyin ){
+             fprintf( stderr, "Unable to open %s",yytext);
+             exit( 1 );
+          }
+
+          yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
+
+          BEGIN(INITIAL);
+        }
+<INCLUDE><<EOF>> 
+        {
+            fprintf( stderr, "EOF in include" );
+            yyterminate();
+        }
+<COMMENT><<EOF>> 
+        {
+            fprintf( stderr, "EOF in comment" );
+            yyterminate();
+        }
+<<EOF>> {
+          if ( include_count <= 0 ){
+            yyterminate();
+          } else {
+            yy_delete_buffer(include_stack[include_count--] );
+            yy_switch_to_buffer(include_stack[include_count] );
+            BEGIN(INCLUDE);
+          }
+        }
+[a-z]+               ECHO;
+.|\n                 ECHO;
+
+
+
+
+
+
+
+
+
+
--- a/examples/manual/pascal.lex
+++ b/examples/manual/pascal.lex
@ -0,0 +1,120 @@
+/*
+ * pascal.lex: An example PASCAL scanner
+ *
+ */
+
+%{
+#include <stdio.h>
+#include "y.tab.h"
+
+int line_number = 0;
+
+void yyerror(char *message);
+
+%}
+
+%x COMMENT1 COMMENT2
+
+white_space       [ \t]*
+digit             [0-9]
+alpha             [A-Za-z_]
+alpha_num         ({alpha}|{digit})
+hex_digit         [0-9A-F]
+identifier        {alpha}{alpha_num}*
+unsigned_integer  {digit}+
+hex_integer       ${hex_digit}{hex_digit}*
+exponent          e[+-]?{digit}+
+i                 {unsigned_integer}
+real              ({i}\.{i}?|{i}?\.{i}){exponent}?
+string            \'([^'\n]|\'\')+\'
+bad_string        \'([^'\n]|\'\')+
+
+%%
+
+"{"                  BEGIN(COMMENT1);
+<COMMENT1>[^}\n]+
+<COMMENT1>\n            ++line_number;
+<COMMENT1><<EOF>>    yyerror("EOF in comment");
+<COMMENT1>"}"        BEGIN(INITIAL);
+
+"(*"                 BEGIN(COMMENT2);
+<COMMENT2>[^)*\n]+
+<COMMENT2>\n            ++line_number;
+<COMMENT2><<EOF>>    yyerror("EOF in comment");
+<COMMENT2>"*)"       BEGIN(INITIAL);
+<COMMENT2>[*)]
+
+ /* note that FILE and BEGIN are already 
+  * defined in FLEX or C so they can't  
+  * be used. This can be overcome in                               
+  * a cleaner way by defining all the
+  * tokens to start with TOK_ or some
+  * other prefix.
+  */
+
+and                  return(AND);
+array                return(ARRAY);
+begin                return(_BEGIN);
+case                 return(CASE);
+const                return(CONST);
+div                  return(DIV);
+do                   return(DO);
+downto               return(DOWNTO);
+else                 return(ELSE);
+end                  return(END);
+file                 return(_FILE);
+for                  return(FOR);
+function             return(FUNCTION);
+goto                 return(GOTO);
+if                   return(IF);
+in                   return(IN);
+label                return(LABEL);
+mod                  return(MOD);
+nil                  return(NIL);
+not                  return(NOT);
+of                   return(OF);
+packed               return(PACKED);
+procedure            return(PROCEDURE);
+program              return(PROGRAM);
+record               return(RECORD);
+repeat               return(REPEAT);
+set                  return(SET);
+then                 return(THEN);
+to                   return(TO);
+type                 return(TYPE);
+until                return(UNTIL);
+var                  return(VAR);
+while                return(WHILE);
+with                 return(WITH);
+
+"<="|"=<"            return(LEQ);
+"=>"|">="            return(GEQ);
+"<>"                 return(NEQ);
+"="                  return(EQ);
+
+".."                 return(DOUBLEDOT);
+
+{unsigned_integer}   return(UNSIGNED_INTEGER);
+{real}               return(REAL);
+{hex_integer}        return(HEX_INTEGER);
+{string}             return{STRING};
+{bad_string}         yyerror("Unterminated string");
+
+{identifier}         return(IDENTIFIER);
+
+[*/+\-,^.;:()\[\]]   return(yytext[0]);
+
+{white_space}        /* do nothing */
+\n                   line_number += 1;
+.                    yyerror("Illegal input");
+
+%%
+
+void yyerror(char *message)
+{
+   fprintf(stderr,"Error: \"%s\" in line %d. Token = %s\n",
+           message,line_number,yytext);
+   exit(1);
+}
+
+
--- a/examples/manual/reject.lex
+++ b/examples/manual/reject.lex
@ -0,0 +1,12 @@
+/* 
+ * reject.lex: An example of REJECT and unput()
+ *             misuse.
+ */
+
+%%
+UNIX       { 
+                unput('U'); unput('N'); unput('G'); unput('\0');
+                REJECT;
+           } 
+GNU        printf("GNU is Not Unix!\n"); 
+%%
--- a/examples/manual/replace.lex
+++ b/examples/manual/replace.lex
@ -0,0 +1,33 @@
+/*
+ * replace.lex : A simple filter for renaming
+ *               parts of flex of bison generated
+ *               scanners or parsers.
+ */
+
+%{
+#include <stdio.h>
+
+char lower_replace[1024];
+char upper_replace[1024];
+
+%}
+
+%%
+
+"yy"   printf("%s",lower_replace); 
+"YY"   printf("%s",upper_replace);
+,      ECHO;
+
+%%
+
+int main(int argc, char *argv[])
+{
+   if(argc < 2){
+     printf("Usage %s lower UPPER\n",argv[0]);
+     exit(1);
+   }
+   strcpy(lower_replace,argv[1]);
+   strcpy(upper_replace,argv[2]);
+   yylex();
+   return(0);
+}
--- a/examples/manual/string1.lex
+++ b/examples/manual/string1.lex
@ -0,0 +1,98 @@
+/* 
+ * string1.lex: Handling strings by using input()
+ */
+
+%{
+#include <ctype.h>
+#include <malloc.h>
+
+#define ALLOC_SIZE 32 /* for (re)allocating the buffer */                   
+
+#define isodigit(x) ((x) >= '0' && (x) <= '7') 
+#define hextoint(x) (isdigit((x)) ? (x) - '0' : ((x) - 'A') + 10)  
+
+void yyerror(char *message)
+{
+  printf("\nError: %s\n",message);
+}
+
+%}
+
+%%
+
+\" {
+     int  inch,count,max_size;
+     char *buffer;
+     int  temp;
+
+     buffer   = malloc(ALLOC_SIZE);
+     max_size = ALLOC_SIZE;
+     inch     = input();
+     count    = 0;
+     while(inch != EOF && inch != '"' && inch != '\n'){
+        if(inch == '\\'){
+          inch = input();
+          switch(inch){
+          case '\n': inch = input(); break;
+          case 'b' : inch = '\b';    break;
+          case 't' : inch = '\t';    break;
+          case 'n' : inch = '\n';    break;
+          case 'v' : inch = '\v';    break;
+          case 'f' : inch = '\f';    break;
+          case 'r' : inch = '\r';    break;
+          case 'X' :
+          case 'x' : inch = input();
+                     if(isxdigit(inch)){
+                       temp = hextoint(toupper(inch));
+                       inch = input();
+                       if(isxdigit(inch)){
+                         temp = (temp << 4) + hextoint(toupper(inch));
+                       } else {
+                         unput(inch);
+                       }
+                       inch = temp; 
+                     } else {
+                       unput(inch);
+                       inch = 'x';
+                     }
+             break;
+          default:
+             if(isodigit(inch)){
+                temp = inch - '0';
+                inch = input();
+                if(isodigit(inch)){
+                  temp = (temp << 3) + (inch - '0');
+                } else {
+                  unput(inch);
+                  goto done;
+                }
+                inch = input();
+                if(isodigit(inch)){
+                  temp = (temp << 3) + (inch - '0');
+                } else {
+                  unput(inch);
+                }
+             done:
+                inch = temp; 
+             }
+          } 
+        }
+        buffer[count++] = inch;
+        if(count >= max_size){
+           buffer = realloc(buffer,max_size + ALLOC_SIZE);
+           max_size += ALLOC_SIZE;
+        }           
+        inch = input();
+     }
+     if(inch == EOF || inch == '\n'){
+       yyerror("Unterminated string.");
+     }
+     buffer[count] = '\0';
+     printf("String = \"%s\"\n",buffer);
+     free(buffer);
+   }
+.
+\n
+%%
+
+
--- a/examples/manual/string2.lex
+++ b/examples/manual/string2.lex
@ -0,0 +1,94 @@
+/*
+ * string2.lex: An example of using scanning strings
+ *              by using start states.
+ */
+
+%{
+#include <ctype.h>
+#include <malloc.h>
+
+#define isodigit(x) ((x) >= '0' && (x) <= '7') 
+#define hextoint(x) (isdigit((x)) ? (x) - '0' : ((x) - 'A') + 10)  
+
+char *buffer      = NULL;
+int  buffer_size  = 0;
+
+void yyerror(char *message)
+{
+  printf("\nError: %s\n",message);
+}
+
+%}
+
+%x STRING
+
+hex (x|X)[0-9a-fA-F]{1,2}
+oct [0-7]{1,3}
+
+%%
+
+\"                 { 
+                     buffer      = malloc(1); 
+                     buffer_size = 1; strcpy(buffer,"");
+                     BEGIN(STRING);
+                   }
+<STRING>\n         {
+                      yyerror("Unterminated string");       
+                      free(buffer);
+                      BEGIN(INITIAL);
+                   }
+<STRING><<EOF>>    {
+                      yyerror("EOF in string");       
+                      free(buffer);
+                      BEGIN(INITIAL);
+                   }
+<STRING>[^\\\n"]   {
+                     buffer = realloc(buffer,buffer_size+yyleng+1);
+                     buffer_size += yyleng;
+                     strcat(buffer,yytext);
+                   }
+<STRING>\\\n       /* ignore this */
+<STRING>\\{hex}    {
+                     int temp =0,loop = 0;
+                     for(loop=yyleng-2; loop>0; loop--){
+                       temp  <<= 4;
+                       temp  += hextoint(toupper(yytext[yyleng-loop]));
+                     } 
+                     buffer = realloc(buffer,buffer_size+1);
+                     buffer[buffer_size-1] = temp;
+                     buffer[buffer_size]   = '\0';
+                     buffer_size += 1;
+                   }
+<STRING>\\{oct}    {
+                     int temp =0,loop = 0;
+                     for(loop=yyleng-1; loop>0; loop--){
+                       temp  <<= 3;
+                       temp  += (yytext[yyleng-loop] - '0');
+                     } 
+                     buffer = realloc(buffer,buffer_size+1);
+                     buffer[buffer_size-1] = temp;
+                     buffer[buffer_size]   = '\0';
+                     buffer_size += 1;
+                   }
+<STRING>\\[^\n]    {
+                     buffer = realloc(buffer,buffer_size+1);
+                     switch(yytext[yyleng-1]){
+                     case 'b' : buffer[buffer_size-1] = '\b';  break;
+                     case 't' : buffer[buffer_size-1] = '\t';  break;
+                     case 'n' : buffer[buffer_size-1] = '\n';  break;
+                     case 'v' : buffer[buffer_size-1] = '\v';  break;
+                     case 'f' : buffer[buffer_size-1] = '\f';  break;
+                     case 'r' : buffer[buffer_size-1] = '\r';  break;
+                     default  : buffer[buffer_size-1] = yytext[yyleng-1];
+                     }
+                     buffer[buffer_size] = '\0';
+                     buffer_size += 1;
+                   }
+<STRING>\"         {
+                     printf("string = \"%s\"",buffer); 
+                     free(buffer);
+                     BEGIN(INITIAL);
+                   }
+%%
+
+
--- a/examples/manual/strtest.dat
+++ b/examples/manual/strtest.dat
@ -0,0 +1,21 @@
+"This is a string"
+"The next string will be empty"
+""
+"This is a string with a \b(\\b) in it"
+"This is a string with a \t(\\t) in it"
+"This is a string with a \n(\\n) in it"
+"This is a string with a \v(\\v) in it"
+"This is a string with a \f(\\f) in it"
+"This is a string with a \r(\\r) in it"
+"This is a string with a \"(\\\") in it"
+"This is a string with a \z(\\z) in it"
+"This is a string with a \X4a(\\X4a) in it"
+"This is a string with a \x4a(\\x4a) in it"
+"This is a string with a \x7(\\x7) in it"
+"This is a string with a \112(\\112) in it"
+"This is a string with a \043(\\043) in it"
+"This is a string with a \7(\\7) in it"
+"This is a multi-line \
+string"
+"This is an unterminated string
+"This is an unterminated string too
--- a/examples/manual/unput.lex
+++ b/examples/manual/unput.lex
@ -0,0 +1,32 @@
+/*
+ * unput.l : An example of what *not*
+ *           to do with unput().
+ */
+
+
+%{
+#include <stdio.h>
+
+void putback_yytext(void);
+%}
+
+%%
+foobar   putback_yytext();
+raboof   putback_yytext();
+%%
+
+void putback_yytext(void)
+{
+    int   i;
+    int   l = strlen(yytext);
+    char  buffer[YY_BUF_SIZE];
+
+    strcpy(buffer,yytext);
+    printf("Got: %s\n",yytext);
+    for(i=0; i<l; i++){
+       unput(buffer[i]);
+    }
+}
+
+ 
+           
--- a/examples/manual/user_act.lex
+++ b/examples/manual/user_act.lex
@ -0,0 +1,31 @@
+%{
+
+#include <ctype.h>
+
+void user_action(void);
+
+#define YY_USER_ACTION user_action();
+
+%}
+
+%%
+
+.*         ECHO;
+\n         ECHO;
+
+%%
+
+void user_action(void)
+{
+  int loop;
+  
+  for(loop=0; loop<yyleng; loop++){
+    if(islower(yytext[loop])){
+       yytext[loop] = toupper(yytext[loop]);
+    }
+  }
+}
+
+
+
+
--- a/examples/manual/userinit.lex
+++ b/examples/manual/userinit.lex
@ -0,0 +1,30 @@
+%{
+#define YY_USER_INIT open_input_file()
+
+extern FILE *yyin;
+
+void open_input_file(void)
+{
+  char *file_name,buffer[1024];
+
+  yyin      = NULL; 
+
+  while(yyin == NULL){
+    printf("Input file: ");
+    file_name = fgets(buffer,1024,stdin);
+    if(file_name){
+      file_name[strlen(file_name)-1] = '\0';
+      yyin = fopen(file_name,"r");
+      if(yyin == NULL){
+        printf("Unable to open \"%s\"\n",file_name);
+      }
+    } else {
+      printf("stdin\n");
+      yyin = stdin;
+      break;
+    }
+  }
+}
+
+%}
+%%
--- a/examples/manual/wc.lex
+++ b/examples/manual/wc.lex
@ -0,0 +1,122 @@
+%{
+
+/*
+ * wc.lex : A simple example of using FLEX
+ *          to create a wc-like utility.
+ *
+ *	    See MISC/fastwc/ in the flex distribution for examples
+ *	    of how to write this scanner for maximum performance.
+ */   
+
+int  numchars = 0;
+int  numwords = 0;
+int  numlines = 0;
+int  totchars = 0;
+int  totwords = 0;
+int  totlines = 0;
+
+/*
+ * rules start from here
+ */
+
+%}
+
+%%
+
+[\n]        { numchars++;  numlines++;         }
+[\r]        { numchars++;                      }
+[^ \t\n]+   { numwords++;  numchars += yyleng; }
+.           { numchars++;                      }
+
+%%
+
+/*
+ * additional C code start from here. This supplies
+ * all the argument processing etc.
+ */
+
+int main(int argc, char *argv[])
+{
+  int  loop,first=1;
+  int  lflag = 0; /* 1 if we count # of lines      */
+  int  wflag = 0; /* 1 if we count # of words      */
+  int  cflag = 0; /* 1 if we count # of characters */
+  int  fflag = 0; /* 1 if we have a file name      */
+
+  for(loop=1; loop<argc; loop++){
+     if(argv[loop][0] == '-'){
+	switch(argv[loop][1]){
+	case 'l':
+	   lflag = 1;
+	   break;
+	case 'w':
+	   wflag = 1;
+	   break;
+	case 'c':
+	   cflag = 1;
+	   break;
+	default:
+	   fprintf(stderr,"unknown option -%c\n",
+                   argv[loop][1]);
+	}
+     }
+  }
+  if(lflag == 0 && wflag == 0 && cflag == 0){
+    lflag = wflag = cflag = 1; /* default to all on */
+  }
+
+  for(loop=1; loop<argc; loop++){
+    if(argv[loop][0] != '-'){
+      fflag = 1;
+      numlines = numchars = numwords = 0;  
+      if((yyin = fopen(argv[loop],"rb")) != NULL){
+        if(first){
+          first = 0;
+	} else {
+          YY_NEW_FILE;
+	}
+        (void) yylex();
+        fclose(yyin);
+        totwords += numwords;
+        totchars += numchars;
+        totlines += numlines;
+        printf("file  : %25s :",argv[loop]) ;
+        if(lflag){
+          fprintf(stdout,"lines %5d ",numlines); 
+        }
+        if(cflag){
+          fprintf(stdout,"characters %5d ",numchars); 
+        }
+        if(wflag){
+          fprintf(stdout,"words %5d ",numwords); 
+        }
+        fprintf(stdout,"\n");
+      }else{
+        fprintf(stderr,"wc : file not found %s\n",argv[loop]);
+      } 
+    }
+  }
+  if(!fflag){
+    fprintf(stderr,"usage : wc [-l -w -c] file [file...]\n");
+    fprintf(stderr,"-l = count lines\n");
+    fprintf(stderr,"-c = count characters\n");
+    fprintf(stderr,"-w = count words\n");
+    exit(1);
+  }
+  for(loop=0;loop<79; loop++){
+    fprintf(stdout,"-");
+  }
+  fprintf(stdout,"\n");
+  fprintf(stdout,"total : %25s  ","") ;
+  if(lflag){
+    fprintf(stdout,"lines %5d ",totlines); 
+  }
+  if(cflag){
+    fprintf(stdout,"characters %5d ",totchars); 
+  }
+  if(wflag){
+     fprintf(stdout,"words %5d ",totwords); 
+  }
+  fprintf(stdout,"\n");
+  return(0);
+}
--- a/examples/manual/yymore.lex
+++ b/examples/manual/yymore.lex
@ -0,0 +1,29 @@
+/*
+ * yymore.lex: An example of using yymore()
+ *             to good effect.
+ */
+
+%{
+#include <memory.h>
+
+void yyerror(char *message)
+{
+  printf("Error: %s\n",message);
+}
+
+%}
+
+%x STRING
+
+%%
+\"   BEGIN(STRING);
+
+<STRING>[^\\\n"]*  yymore();
+<STRING><<EOF>>    yyerror("EOF in string.");       BEGIN(INITIAL);
+<STRING>\n         yyerror("Unterminated string."); BEGIN(INITIAL);
+<STRING>\\\n       yymore();
+<STRING>\"        {
+                     yytext[yyleng-1] = '\0';
+                     printf("string = \"%s\"",yytext); BEGIN(INITIAL);
+                  }
+%%
--- a/examples/manual/yymore2.lex
+++ b/examples/manual/yymore2.lex
@ -0,0 +1,33 @@
+/*
+ * yymore.lex: An example of using yymore()
+ *             to good effect.
+ */
+
+%{
+#include <memory.h>
+
+void yyerror(char *message)
+{
+  printf("Error: %s\n",message);
+}
+
+%}
+
+%x STRING
+
+%%
+\"   BEGIN(STRING);
+
+<STRING>[^\\\n"]*  yymore();
+<STRING><<EOF>>    yyerror("EOF in string.");       BEGIN(INITIAL);
+<STRING>\n         yyerror("Unterminated string."); BEGIN(INITIAL);
+<STRING>\\\n      {
+                     bcopy(yytext,yytext+2,yyleng-2);
+                     yytext += 2; yyleng -= 2;
+                     yymore();
+                  }
+<STRING>\"        {
+                     yyleng -= 1; yytext[yyleng] = '\0';
+                     printf("string = \"%s\"",yytext); BEGIN(INITIAL);
+                  }
+%%
--- a/examples/manual/yymoretest.dat
+++ b/examples/manual/yymoretest.dat
@ -0,0 +1,7 @@
+"This is a test \
+of multi-line string \
+scanning in flex. \
+This may be breaking some law \
+of usage though..."
+
+
--- a/examples/testxxLexer.l
+++ b/examples/testxxLexer.l
@ -0,0 +1,58 @@
+	// An example of using the flex C++ scanner class.
+
+%option C++ noyywrap
+
+%{
+int mylineno = 0;
+%}
+
+string	\"[^\n"]+\"
+
+ws	[ \t]+
+
+alpha	[A-Za-z]
+dig	[0-9]
+name	({alpha}|{dig}|\$)({alpha}|{dig}|\_|\.|\-|\/|\$)*
+num1	[-+]?{dig}+\.?([eE][-+]?{dig}+)?
+num2	[-+]?{dig}*\.{dig}+([eE][-+]?{dig}+)?
+number	{num1}|{num2}
+
+%%
+
+{ws}	/* skip blanks and tabs */
+
+"/*"		{
+		int c;
+
+		while((c = yyinput()) != 0)
+			{
+			if(c == '\n')
+				++mylineno;
+
+			else if(c == '*')
+				{
+				if((c = yyinput()) == '/')
+					break;
+				else
+					unput(c);
+				}
+			}
+		}
+
+{number}	cout << "number " << YYText() << '\n';
+
+\n		mylineno++;
+
+{name}		cout << "name " << YYText() << '\n';
+
+{string}	cout << "string " << YYText() << '\n';
+
+%%
+
+int main( int /* argc */, char** /* argv */ )
+	{
+	FlexLexer* lexer = new yyFlexLexer;
+	while(lexer->yylex() != 0)
+		;
+	return 0;
+	}
--- a/to.do/README
+++ b/to.do/README
@ -0,0 +1,46 @@
+The contents of this directory are:
+
+	Wilhelms.todo
+		This guy Gerhard Wilhelms did an exhaustive line-by-line
+		study of flex, finding a large number of glitches; it's
+		not clear how significant they are (some have subseuqently
+		been fixed).
+
+	Wish-List
+		A long list of cryptic one-line descriptions of various
+		features people have asked for, or I've thought would be
+		handy.  If you have questions about particular ones, let
+		me know.
+
+	reentrant
+		A set of mods contributed by John Bossom for adding
+		an option to flex to make reentrant scanners.
+
+	rflex-2.5.4-diffs
+		A set of mods contributed by Chris Appleton for
+		the same purpose.
+
+	streams
+		email from David Madden about coping with non-blocking
+		I/O.
+
+	unicode
+		patches to support Unicode scanners, contributed
+		by James A. Lauth.
+
+
+Of these, the ones that people frequently ask about are support for reentrant
+scanners and support for Unicode.  I haven't played with the reentrant stuff
+so don't know how solid / well designed it is.  I've sent out the Unicode
+stuff to numerous people and haven't received any complaints, so I imagine
+it works well.
+
+Another thing people ask for fairly often is removing the limit on size
+of the scanners.  There's some mail in the faqs/ directory about ways to
+do this by cranking some #define's in flexdef.h, though of course the best
+solution would be dynamic memory/resizing.
+
+By the way, I have translated the core flex algorithms into a C++ regular
+expression class (but one that doesn't support the uglier stuff like
+yymore()/yyless(), trailing context, etc.).  If you ever wind up wanting
+it, just let me know.
--- a/to.do/Wilhelms.todo
+++ b/to.do/Wilhelms.todo
@ -0,0 +1,711 @@
+PARSE.Y 344: ('rule'-rule " | re '$' ": There are some errors concerning
+    trailing context. First of all the rule " re '$' " implies that this is
+    no variable_trail_rule because the tail of it ( '$' ) has a fixed length
+    of 1. The only possible reason for making this rule variable is when
+    'previous_continued_action' is true. In this case 'variable_trail_rule'
+    must be set and the beginning of the trailing part must be marked.
+    However the variables 'varlength' and 'headcnt' have not the same meaning
+    as in the rule " re2 re ". Here ( in the rule " re '$' " ) 'varlength'
+    is true if the head ( 're' ) of the rule has variable length, and
+    'headcnt' is still 0 because it isn't set during reduction of 're'.
+    Therefore the test for a variable trailing rule
+      " if ( ! varlength || headcnt != 0 ) "
+    is wrong and should be removed.
+    Also it is not necessary to set 'varlength' or 'headcnt' if you set
+    " trailcnt = 1; ". If this rule is made variable then 'variable_trail_rule'
+    is set and neither 'headcnt' nor 'trailcnt' are used in 'finish_rule()'.
+    And if this rule is normal then the head may be variable or not, but in
+    'finish_rule()' code is generated to reduce 'yy_cp' by 1.
+    Finally I found no reason to create an epsilon-state and insert it in
+    front of mkstate( '\n' ) instead of adding it behind. This epsilon-state
+    should be marked as STATE_TRAILING_CONTEXT. Otherwise you get no warning
+    of dangerous trailing context if you have a rule " x\n*$ " which was made
+    variable with '|'.)
+
+		|  re '$'
+			{
+			/* if ( trlcontxt )
+			    {
+			    synerr( "trailing context used twice" );
+			    $$ = mkstate( SYM_EPSILON );
+			    }
+
+			else */ if ( previous_continued_action )
+			    {
+			    /* see the comment in the rule for "re2 re"
+			     * above
+			     */
+			/*  if ( ! varlength || headcnt != 0 )  */
+				{
+				fprintf( stderr,
+    "%s: warning - trailing context rule at line %d made variable because\n",
+					 program_name, linenum );
+				fprintf( stderr,
+					 "      of preceding '|' action\n" );
+				}
+
+			/* mark as variable */
+			/*  varlength = true;
+			    headcnt = 0;  */
+
+			    add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK )
+;
+			    variable_trail_rule = true;
+			    }
+
+		/*	trlcontxt = true;
+
+			if ( ! varlength )
+			    headcnt = rulelen;
+
+			++rulelen;  */
+			trailcnt = 1;
+
+			current_state_type = STATE_TRAILING_CONTEXT;
+			eps = mkstate( SYM_EPSILON );
+			current_state_type = STATE_NORMAL;
+
+			$$ = link_machines( $1,
+				 link_machines( mkstate( '\n' ), eps ) );
+			}
+
+DFA.C 618: (ntod(): The arrays 'targstate[]' and 'targfreq[]' can be
+    maintained in a better way. Up to now it is possible that states are added
+    to 'targstate[]' more than once, because the state 'newds' from the call
+    to snstods() creates a new entry in 'targstate[]'. But 'newds' may already
+    exist in 'targstate[]' !
+    Another point is that 'targfreq[]' is not updated if "caseins && ! useecs"
+    is true.
+    My algorithm should solve these problems. However it could be simplified
+    by replacing 'newds' by 'targ' and removing the statement "targ = newds;".
+    Remark to the second point: I decremented the targfreq-counter if 'sym'
+    was an uppercase letter and incremented it if 'sym' was a lowercase
+    letter. The index 'i' of 'targfreq[i]' points to the correct position in
+    'targstate[]' even if a new state was added.)
+
+	for ( sym = 1; sym <= numecs; ++sym )
+	    {
+	    if ( symlist[sym] )
+		{
+		symlist[sym] = 0;
+
+		if ( duplist[sym] == NIL )
+		    { /* symbol has unique out-transitions */
+		    numstates = symfollowset( dset, dsize, sym, nset );
+		    nset = epsclosure( nset, &numstates, accset,
+				       &nacc, &hashval );
+
+		    if ( snstods( nset, numstates, accset,
+				  nacc, hashval, &newds ) )
+			{
+			totnst = totnst + numstates;
+			++todo_next;
+			numas += nacc;
+
+			if ( variable_trailing_context_rules && nacc > 0 )
+			    check_trailing_context( nset, numstates,
+				accset, nacc );
+			}
+
+		    targ = newds;
+		    }
+
+		else
+		    {
+		    /* sym's equivalence class has the same transitions
+		     * as duplist(sym)'s equivalence class
+		     */
+		    targ = state[duplist[sym]];
+		    }
+
+		state[sym] = targ;
+
+		if ( trace )
+			fprintf( stderr, "\t%d\t%d\n", sym, targ );
+
+		/* update frequency count for destination state */
+
+		for ( i = 1; i <= targptr; ++i )
+		    if ( targstate[i] == targ )
+			    break;
+
+		if ( i <= targptr )
+		    {
+		    ++targfreq[i];
+		    ++numdup;
+		    }
+		else
+		    {
+		    targfreq[++targptr] = 1;
+		    targstate[targptr] = targ;
+		    ++numuniq;
+		    }
+
+		if ( caseins && ! useecs )
+		    {
+		    if ( sym >= 'A' && sym <= 'Z' )
+			{
+			--targfreq[i];
+			--totaltrans;
+			}
+		    else if ( sym >= 'a' && sym <= 'z' )
+			{
+			++targfreq[i];
+			++totaltrans;
+			}
+		    }
+
+		++totaltrans;
+		duplist[sym] = NIL;
+		}
+	    }
+
+
+GEN.C 438: (gen_next_compressed_state(): I have rewritten the function
+    'yy_try_NUL_trans()' so it really just tries to find out whether a
+    transition on the NUL character goes to the jamstate or not. ( That means
+    I removed each creation of backtracking information and the saving of the
+    new state on the 'yy_state_buf[]'. )
+    Therefore I removed the call for 'gen_backtracking()' here, because the
+    function 'gen_next_compressed_state()' is also used in 'gen_NUL_trans()'.)
+
+/*  gen_backtracking(); */
+
+GEN.C 587ff: (gen_next_state(): Since the backtracking information is not
+    created in 'gen_next_compressed_state()' any more, it is done here
+    before the next state is computed ( for "compressed" tables ). This
+    removes the bug that the backtracking information is created twice if
+    'nultrans' is not NULL and 'gen_next_compressed_state()' is called.
+    Finally I had to insert the creation of a "{" and a "}", because there
+    is a local variable created in 'gen_next_compressed_state()'. ( These are
+    needed only when backtracking information is really created.) )
+
+    if ( ! fulltbl && ! fullspd )
+	gen_backtracking();
+
+    if ( worry_about_NULs && nultrans )
+	{
+	indent_puts( "if ( *yy_cp )" );
+	indent_up();
+	indent_puts( "{" );
+	}
+    else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 )
+	indent_puts( "{" );
+
+    if ( fulltbl )
+	indent_put2s( "yy_current_state = yy_nxt[yy_current_state][%s];",
+		char_map );
+
+    else if ( fullspd )
+	indent_put2s( "yy_current_state += yy_current_state[%s].yy_nxt;",
+		    char_map );
+
+    else
+	gen_next_compressed_state( char_map );
+
+    if ( worry_about_NULs && nultrans )
+	{
+	indent_puts( "}" );
+	indent_down();
+	indent_puts( "else" );
+	indent_up();
+
+	indent_puts( "yy_current_state = yy_NUL_trans[yy_current_state];" );
+	indent_down();
+	}
+    else if ( ! fulltbl && ! fullspd && ! reject && num_backtracking > 0 )
+	indent_puts( "}" );
+
+    if ( fullspd || fulltbl )
+	gen_backtracking();
+
+    if ( reject )
+	indent_puts( "*yy_state_ptr++ = yy_current_state;" );
+    }
+
+GEN.C 553: (gen_next_match(): There is a problem if 'interactive' is true. In
+    this case the scanner jams if the next state is the jamstate ( i.e.
+    yy_base[yy_current_state] == jambase ). However the scanner reaches also
+    the jamstate if the transition character is the NUL-character or if the
+    end of the buffer is reached. Then in the EOB-action is decided whether
+    this was really a NUL character or the end-of-buffer. ( If it was a NUL,
+    scanning will be resumed. If it was the end-of-buffer, the buffer will be
+    filled first, before scanning will be resumed. )
+    These actions are not done if you use an 'interactive' scanner, because
+    the EOB-action is not executed. Therefore you have to continue scanning,
+    if you have just matched a NUL character ( i.e. *yy_cp == '\0' and
+    yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) and if you are not
+    already in the yamstate ( i.e. yy_current_state != jamstate ).
+    Note that the '<' in " yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars] "
+    implies that the EOB action is *not* executed if the last match before the
+    end-of-buffer was maximal.
+    The following change in the algorithm results in a minor performance
+    penalty because the additional conditions are tested only if you have
+    reached the end of the match or if you are using NUL characters in your
+    patterns.)
+
+	if ( interactive )
+	    {
+	    printf( "while ( yy_base[yy_current_state] != %d\n", jambase );
+	    set_indent( 4 );
+	    indent_puts( "|| ( *yy_cp == '\\0'" );
+	    indent_up();
+	    indent_puts(
+		" && yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" );
+	    do_indent();
+	    printf( " && yy_current_state != %d ) );\n", jamstate );
+	    set_indent( 2 );
+	    }
+	else
+	    printf( "while ( yy_current_state != %d );\n", jamstate );
+
+GEN.C 341: (gen_find_action(): Question: The variables 'yy_full_match',
+    'yy_full_state' and 'yy_full_lp' are used only in the REJECT macro. Why
+    do you not also test here on 'real_reject' before you create code to set
+    these variables ( like you did in line 327ff for the action of the case
+    " ( yy_act & YY_TRAILING_MASK ) " ) ?)
+
+    New code beginning at line 338 to show the context:
+	    indent_puts( "else" );
+	    indent_up();
+	    indent_puts( "{" );
+
+	    if ( real_reject )
+		{
+		/* remember matched text in case we back up due to REJECT */
+		indent_puts( "yy_full_match = yy_cp;" );
+		indent_puts( "yy_full_state = yy_state_ptr;" );
+		indent_puts( "yy_full_lp = yy_lp;" );
+		}
+
+	    indent_puts( "break;" );
+	    indent_puts( "}" );
+	    indent_down();
+
+	    indent_puts( "++yy_lp;" );
+	    indent_puts( "goto find_rule;" );
+	    }
+
+
+FLEX.SKEL 364,379: (YY_END_OF_BUFFER action: If it was really a NUL character
+    which started this action, then 'yy_bp' points still at the beginning of
+    the current run and 'yy_c_buf_p' points behind the NUL character.
+    Contrast this with the situation after the call of 'yy_get_next_buffer()'!
+    Therefore I removed the statement " yy_bp = yytext + YY_MORE_ADJ; "
+    ( line 379 ) and replaced the statement
+    " yy_c_buf_p = yytext + yy_amount_of_matched_text; " ( line 364 ) by the
+    easier one " yy_c_buf_p = --yy_cp; ". Here 'yy_cp' is also adjusted.
+    This guarantees that both 'yy_c_buf_p' and 'yy_cp' point at the NUL
+    character. Therefore 'yy_cp' will have the correct value when it is needed
+    after the call to 'yy_try_NUL_trans()' ( when we know whether we make a
+    transition or not ).
+
+    line 364:
+		    yy_c_buf_p = --yy_cp;
+
+    line 379:
+		/*  yy_bp = yytext + YY_MORE_ADJ; */
+
+GEN.C 632: (gen_NUL_trans(): I have rewritten 'yy_try_NUL_trans()'. The new
+    version just finds out whether a transition on the NUL character goes to
+    the jamstate or not. See also my remarks to 'gen_next_compressed_state()'.
+    Note that the test " yy_is_jam = (yy_current_state == jamstate); " is
+    also used, if 'interactive' is true. Otherwise 'yy_try_NUL_trans()' would
+    return 0, if the NUL character was the last character of a pattern
+    ( e.g. "x\0" ), and we therefore would not reach the last state.
+    Remark: Change also the comment in FLEX.SKEL for this function.)
+
+    FLEX.SKEL, line 583:
+%% code to find the next state goes here
+
+    GEN.C, line 632ff:
+/*  int need_backtracking = (num_backtracking > 0 && ! reject);
+
+    if ( need_backtracking )
+	/ * we'll need yy_cp lying around for the gen_backtracking() * /
+	indent_puts( "register YY_CHAR *yy_cp = yy_c_buf_p;" ); */
+
+    GEN.C, line 674ff:
+/*	if ( reject )
+	    indent_puts( "*yy_state_ptr++ = yy_current_state;" ); */
+
+	do_indent();
+
+/*	if ( interactive )
+	    printf( "yy_is_jam = (yy_base[yy_current_state] == %d);\n",
+		    jambase );
+	else */
+	    printf( "yy_is_jam = (yy_current_state == %d);\n", jamstate );
+	}
+
+    /* if we've entered an accepting state, backtrack; note that
+     * compressed tables have *already* done such backtracking, so
+     * we needn't bother with it again
+     */
+/*  if ( need_backtracking && (fullspd || fulltbl) )
+	{
+	putchar( '\n' );
+	indent_puts( "if ( ! yy_is_jam )" );
+	indent_up();
+	indent_puts( "{" );
+	gen_backtracking();
+	indent_puts( "}" );
+	indent_down();
+	} */
+    }
+
+GEN.C 1293: (make_tables(): The changed functionality of 'yy_try_NUL_trans()'
+    implies changes in the EOB action. If the next state 'yy_next_state' is 0
+    ( i.e. the jamstate ), you can immediately jump to 'yy_find_action'.
+    Remember that 'yy_cp' was already adjusted to point at the NUL !
+    Also you must not use the backtracking information because the actual
+    state 'yy_current_state' may be an accepting state.
+    If 'yy_next_state' is not the jamstate, we make a transition on the NUL.
+    This requires the following actions:
+    - Create backtracking information for compressed tables *before* we make
+      the transition on NUL.
+    - Now increment 'yy_cp' and set 'yy_current_state' to 'yy_next_state'.
+      ( Note that 'yy_cp' points at the NUL up to now. )
+    - Save the new state on the stack 'yy_state_buf[]' if 'reject' is true.
+    - Create backtracking information *after* the transition, if 'fulltbl'
+      or 'fullspd' is true.
+    - Finally decide, if 'interactive' is true, whether scanning should be
+      resumed at 'yy_match' or whether we have reached a final state and
+      should jump to 'yy_find_action'. (Condition like in 'gen_next_match()'.)
+      If 'interactive' is false, just resume scanning.)
+
+    Corresponding code in FLEX.SKEL beginning at line 381:
+		    if ( yy_next_state )
+			{
+			/* consume the NUL */
+%% code to do backtracking for compressed tables and set up yy_cp goes here
+			}
+		    else
+			goto yy_find_action;
+
+    Code in GEN.C beginning at line 1293:
+    /* first, deal with backtracking and setting up yy_cp if the scanner
+     * finds that it should JAM on the NUL
+     */
+    skelout();
+    set_indent( 6 );
+
+    if ( ! fulltbl && ! fullspd )
+	gen_backtracking();
+
+    indent_puts( "++yy_cp;" );
+    indent_puts( "yy_current_state = yy_next_state;" );
+
+    if ( reject )
+	indent_puts( "*yy_state_ptr++ = yy_current_state;" );
+
+    if ( fulltbl || fullspd )
+	gen_backtracking();
+
+    if ( interactive )
+	{
+	do_indent();
+	printf( "if ( yy_base[yy_current_state] != %d\n", jambase );
+	indent_up();
+	indent_puts( "|| ( *yy_cp == '\\0'" );
+	indent_puts( "&& yy_cp < &yy_current_buffer->yy_ch_buf[yy_n_chars]" );
+	do_indent();
+	printf( "&& yy_current_state != %d ) )\n", jamstate );
+	indent_puts( "goto yy_match;" );
+	indent_down();
+	indent_puts( "else" );
+	indent_up();
+	indent_puts( "goto yy_find_action;" );
+	indent_down();
+	}
+    else
+	indent_puts( "goto yy_match;" );
+
+/*  if ( fullspd || fulltbl )
+	indent_puts( "yy_cp = yy_c_buf_p;" );
+
+    else
+	{ / * compressed table * /
+	if ( ! reject && ! interactive )
+	    {
+	    / * do the guaranteed-needed backtrack to figure out the match * /
+	    indent_puts( "yy_cp = yy_last_accepting_cpos;" );
+	    indent_puts( "yy_current_state = yy_last_accepting_state;" );
+	    }
+	} */
+
+FLEX.SKEL 513: (yy_get_next_buffer(): Here is an error if 'yymore()' is active
+    in the last match (i.e. yy_doing_yy_more == 1 and yy_more_len > 0). Then
+    'number_to_move' will be (1 + yy_more_len), i.e. the previous character
+    plus the additional characters for using 'yymore()'.)
+
+	if ( number_to_move == 1 + YY_MORE_ADJ )
+	    {
+	    ret_val = EOB_ACT_END_OF_FILE;
+	    yy_current_buffer->yy_eof_status = EOF_DONE;
+	    }
+
+	else
+	    {
+	    ret_val = EOB_ACT_LAST_MATCH;
+	    yy_current_buffer->yy_eof_status = EOF_PENDING;
+	    }
+	}
+
+GEN.C 1317: (make_tables(): In the generation of 'yy_get_previous_state()' the
+    variable 'yy_bp' must be set to 'yytext + YY_MORE_ADJ' if 'bol_needed' is
+    true. Otherwise 'yy_bp' points eventually at the beginning of the
+    yymore-prefix instead of the current run.)
+
+    if ( bol_needed )
+	indent_puts( "register YY_CHAR *yy_bp = yytext + YY_MORE_ADJ;\n" );
+
+FLEX.SKEL 589ff: (yyunput(): The function 'yyunput()' should be rewritten.
+    First of all the example for 'unput()' in file flexdoc doesn't work:
+	      {
+	      int i;
+	      unput( ')' );
+	      for ( i = yyleng - 1; i >= 0; --i )
+		  unput( yytext[i] );
+	      unput( '(' );
+	      }
+    The actual version of 'yyunput()' modifies 'yyleng'. Therefore 'yyleng' is
+    decremented by " unput( ')' ) " and the pattern to be pushed back has lost
+    its last character. To avoid this just copy the 'yytext'-string and
+    'yyleng' before you call 'unput()'.
+    Another point is that 'yytext' and 'yyleng' could be maintained in a
+    better way. ( Up to now 'yyleng' can become negative ! )
+    I think it's better to say that the pushed back pattern should fulfill
+    the beginning-of-line-condition if and only if the old pattern does
+    ( excluding a possibly existing 'yymore'-prefix ! ).
+    Up to now you have problems if there is a 'yymore'-prefix, because
+    'yytext' will be corrupted by YY_DO_BEFORE_ACTION. ( This macro sets
+    'yytext' to 'yy_bp - yy_more_len', but our 'yy_bp' points already at the
+    beginning of the 'yymore'-prefix. )
+
+    My version of 'yyunput()' reduces the 'yytext'-string by 1 for every
+    pushed back character and decrements 'yyleng' until 'yytext' is the empty
+    string. The beginning-of-line-condition is preserved when 'bol_needed' is
+    true. ( Then the character before the current run is copied in front of
+    the pushed back character. ) If there is a 'yymore'-prefix, 'yy_more_len'
+    will be decremented if 'yy_cp' reaches the beginning of the current run.
+
+    Remark: The parameter 'yytext' in " yyunput( c, yytext ) " is not really
+    necessary since 'yytext' is a global variable. You could also set
+    " register YY_CHAR *yy_bp = yytext; " at the beginning of 'yyunput()'.)
+
+    Replace lines 622 - 623 in FLEX.SKEL:
+
+    if ( yy_cp > yy_bp && yy_cp[-1] == '\n' )
+	yy_cp[-2] = '\n';
+
+    by
+
+%% code to adjust yy_bp and yy_more_len goes here
+
+    Add in GEN.C a function 'gen_yyunput()':
+/* generate code to adjust yy_bp and yy_more_len in yyunput
+ */
+
+void gen_yyunput()
+
+    {
+    if ( yymore_used )
+	indent_puts( "yy_bp += YY_MORE_ADJ;\n" );
+
+    if ( bol_needed )
+	indent_puts( "yy_cp[-2] = yy_bp[-1];\n" );
+
+    if ( yymore_used )
+	{
+	indent_puts( "if ( (yy_cp == yy_bp) && YY_MORE_ADJ )" );
+	indent_up();
+	indent_puts( "--yy_more_len;" );
+	indent_down();
+	indent_puts( "else" );
+	indent_up();
+	indent_puts( "--yy_bp;" );
+	indent_down();
+	}
+    else
+	indent_puts( "--yy_bp;" );
+    }
+
+    Finally add in the function 'make_tables()' behind the call of
+    'gen_NUL_trans()' in line 1328:
+
+    skelout();
+    gen_yyunput();
+
+FLEX.SKEL 642,658: (input(): There is an error in 'input()' if the end of
+    'yy_current_buffer' is reached and 'yymore' is active. Then
+    'yy_get_next_buffer()' is called which function assumes that 'yytext'
+    points at the beginning of the 'yymore'-prefix. This function can't
+    recognize the end of the input stream correctly and therefore returns
+    EOB_ACT_LAST_MATCH instead of EOB_ACT_END_OF_FILE. Also if the end of
+    the input file isn't reached yet (EOB_ACT_CONTINUE_SCAN) at least one
+    character will be lost.
+    To avoid this error just turn off 'yy_doing_yy_more'. Then you need
+    not to adjust with YY_MORE_ADJ in lines 667 and 682. However you have to
+    use a function 'gen_input()', because 'yy_doing_yy_more' does not exist
+    if 'yymore_used' is false.
+
+    ( Another solution is to adjust 'yytext':
+    " yytext = yy_c_buf_p - YY_MORE_ADJ; ", line 658. )
+
+    I think the trick with "yy_did_buffer_switch_on_eof" should be done here
+    the same way as in the YY_END_OF_BUFFER action.
+    Finally I removed the variable 'yy_cp' and used 'yy_c_buf_p' instead.)
+
+#ifdef __cplusplus
+static int yyinput()
+#else
+static int input()
+#endif
+
+    {
+    int c;
+
+    *yy_c_buf_p = yy_hold_char; /* yy_cp not needed */
+
+    if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+	{
+	/* yy_c_buf_p now points to the character we want to return.
+	 * If this occurs *before* the EOB characters, then it's a
+	 * valid NUL; if not, then we've hit the end of the buffer.
+	 */
+	if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] )
+	    /* this was really a NUL */
+	    *yy_c_buf_p = '\0';
+
+	else
+	    { /* need more input */
+%% code to turn off yy_doing_yy_more and yy_more_len goes here
+	    yytext = yy_c_buf_p;
+	    ++yy_c_buf_p;
+
+	    switch ( yy_get_next_buffer() )
+		{
+		case EOB_ACT_END_OF_FILE:
+		    {
+		    yy_did_buffer_switch_on_eof = 0;
+
+		    if ( yywrap() )
+			{
+			yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */
+			return ( EOF );
+			}
+		    else
+			{
+			if ( ! yy_did_buffer_switch_on_eof )
+			    YY_NEW_FILE;
+			}
+
+#ifdef __cplusplus
+		    return ( yyinput() );
+#else
+		    return ( input() );
+#endif
+		    }
+		    break;
+
+		case EOB_ACT_CONTINUE_SCAN:
+		    yy_c_buf_p = yytext; /* + YY_MORE_ADJ not needed */
+		    break;
+
+		case EOB_ACT_LAST_MATCH:
+#ifdef __cplusplus
+		    YY_FATAL_ERROR( "unexpected last match in yyinput()" );
+#else
+		    YY_FATAL_ERROR( "unexpected last match in input()" );
+#endif
+		}
+	    }
+	}
+
+    c = *yy_c_buf_p;
+    yy_hold_char = *++yy_c_buf_p;
+
+    return ( c );
+    }
+
+    Add in GEN.C a function 'gen_input()':
+/* generate code to turn off yy_doing_yy_more and yy_more_len in input
+ */
+
+void gen_input()
+
+    {
+    if ( yymore_used )
+	indent_puts( "yy_doing_yy_more = yy_more_len = 0;" );
+    }
+
+    Finally add in the function 'make_tables()' behind the call of
+    'gen_yyunput()':
+
+    set_indent( 3 );
+    skelout();
+    gen_input();
+
+PARSE.Y 54: ( 'goal'-rule: If there is no rule in the input file, the end of
+    the prolog is not marked yet, because 'flexscan()' is still in the start
+    condition <SECT2PROLOG> and the rule <SECT2PROLOG><<EOF>> is not done up
+    to now. Therefore mark the end of prolog here, before you add the default
+    rule. I test here on " num_rules == 1 ", because the 'initforrule'-rule
+    increments 'num_rules' before this action is executed.)
+
+			if ( num_rules == 1 )
+			    fprintf( temp_action_file, "%%%% end of prolog\n" )
+;
+
+SCAN.L 255: ( '<SECT2PROLOG><<EOF>>'-rule: If there are no rules at all in
+    the input file, then this rule will be executed at the end of
+    'make_tables()'. At this point 'temp_action_file' was closed for writing
+    and has been reopened for reading. The macro MARK_END_OF_PROLOG will
+    therefore lead to a write-error.
+    To avoid this error add the condition " if ( num_rules == 0 ) ". If this
+    rule is executed at the end of 'make_tables()' there will be at least the
+    default rule, i.e. 'num_rules' will be greater than 0.
+    Remark: This correction together with the one before will allow an input
+    file which just consists of "%%". ( Copy 'stdin' to 'stdout'. ))
+
+<SECT2PROLOG><<EOF>>		{
+			if ( num_rules == 0 )
+			    MARK_END_OF_PROLOG;
+			yyterminate();
+			}
+
+MISC.C 376: ( flexfatal(): The call of 'flexend( 1 )' will lead to an
+    infinite loop if 'flexfatal()' is called from 'flexend()'. I therefore
+    introduced the flag 'doing_flexend' to prevent 'flexend()' to be called
+    more than once.)
+
+    Replace the function call 'flexend( 1 );' in GEN.C, line 376, by
+    if ( ! doing_flexend )
+	flexend( 1 );
+
+    Set 'doing_flexend' at the beginning of 'flexend()' in MAIN.C, line 195:
+    doing_flexend = true;
+
+    Add in FLEXDEF.H, line 381, the declaration of 'doing_flexend':
+extern int yymore_used, reject, real_reject, continued_action, doing_flexend;
+
+    Add in FLEXDEF.H, line 376, a comment for this variable:
+ * doing_flexend - true if flexend() has been started
+
+    Initialize 'doing_flexend' in 'flexinit()' in MAIN.C, line 401:
+    yymore_used = continued_action = reject = doing_flexend = false;
+
+FLEX.SKEL 94: ( 'YY_INPUT()'-macro: I have problems with 'fileno()' and
+    'read()'.
+    I used the C Compiler of the BORLAND C++ Compiler and compiled the created
+    scanner with the option 'ANSI keywords'.
+    In this compiler the prototype of the function 'read(...)' is declared in
+    the header file 'io.h' and not in 'stdio.h'. Therefore I get a warning.
+    Real trouble caused 'fileno' which is defined as macro in 'stdio.h':
+	#define fileno(f)       ((f)->fd)
+    However this macro does not belong to the 'ANSI keywords' because it is
+    define'd under the condition " #if !__STDC__ ". Therefore I get a warning
+    and a linker error that the function 'fileno()' does not exist.
+    (I can avoid this problem by adding the above define-macro in the *.l file
+    or by replacing the option 'ANSI keywords' by 'Borland C++ keywords'.))
--- a/to.do/Wish-List
+++ b/to.do/Wish-List
@ -0,0 +1,123 @@
+start conditions given own name space by making them structure fields
+	#define BEGIN(x) yy_start_state = yy_states->x
+reentrant/
+streams/
+yylineno maintained per input buffer
+use yyconst instead of const, to fix __STDC__ == 0 problem
+scan input for unput()
+-CF/-Cf support interactive scanners
+reject_really_used -> maintain_backup_tables
+full library encapsulation: flex'ing on the fly
+fix MAX_MNS/MARKER_DIFFERENCE to not be a hard limit
+Two flags to warn when something is seen that lex or posix might interpret
+  differently; this should be quite doable as -l already exists. Proposed
+  names: -Wl, -Wp.
+reentrant C scanners
+yy_fseek() for positioning in input file
+set-able "at beginning of line" , no more unput() trashes yytext?
+yy_unput_string(); unput() shifts yytext to preserve it, grows buffer as needed
+yy_malloc_type as void* so can be easily switched to char* for poor
+	hopeless bastards running SunSoft stuff?
+public "TODO" file, requesting help?
+test -P to make sure it's not broken now due to e.g. yy_scan_string
+%option
+hook for treating input interactively even if not isatty()
+scan.l:22:error message :-( (see flex.todo)
+document yy_fill_buffer
+lint, gcc-lint
+-lfl removed from flex.1
+merge 2.4.6, e.g., NEWS
+'|' action copies action instead of omitting break
+if yy_current_buffer defined on entry to yylex(), don't promote nil yyin
+	to stdin, etc.
+multibyte character flex
+
+
+ANSI only
+multiple inclusion of <stdlib.h>?
+[=...=] POSIX stuff
+flex.todo
+yylineno, yycol by checking for whether rules can match embedded newlines,
+	only trailing newlines, always trailing newlines, or no newlines
+compute transition path to each DFA state, to aid in backtracking
+	for each state, store pointer to predecessor, character for xtion
+merge flex.1, flexdoc.1?
+bison++ interface
+YYLEXER_NAME
+out-line FlexLexer destructors
+GNU readline contrib?
+isatty() decl?
+#ifdef chud for unput() etc. not being used?
+	"../scan.l", line 207: warning:  ::yy_did_buffer_switch_on_eof defined but not used
+	cc   -c  -g scan.c
+	"scan.cc", line 1752: warning: statement not reached
+alloca.c removed from Makefile
+// comments
+
+output partitioning for e.g., scanning tables, actions, etc.
+
+ 497  09/11 14:17-PDT  3450 To:t_bonner@oscar  Re: Modifying yytext in an actio
+MISC stuff non-writeable
+texinfo version of manual
+
+	ALSO: document how to do so (including no need to redefine unput()),
+		whether feature added or not
+
+example of "error" backtracking rules as opposed to "catch-all"
+get rid of get_previous_state via accepting #'s tied to previous state #'s
+-p tells something about backtracking
+easy way to scan strings instead of files
+input() across buffer boundaries, buffer overflow; unput() fix
+start state stack
+NLSTATE - sets "in newline" state; also mechanism to clear "in newline" state
+checks for bogus backtrack rules  ... - rule shadowing
+document incompatibility with lex when unput()'ing a newline
+	after a newline has been read
+document that comments are not allowed on definition lines
+	foo  bar  /* the "foo" definition ... */
+perhaps indented code in section 2 leads to warnings?
+#line directives for code at beginning of scanner routine
+nuke %used etc.
+hooks for direct access to the buffer, e.g. for flushing it
+options in .l file as well as on command line; particularly the rename-prefix
+	option
+clarify "eat up * not followed by /" in <comment> example; move it to
+    performance, offer simpler version for start states
+hook for finding out how much text can be safely pushed back
+the .backtrack code knows how to identify characters that cause transitions 
+	(you wanted this for some clearer error messages for the
+	 "default rule can be matched")
+yy_switch_to_buffer sets yy_init to 0?
+handy library routines, such as yy_C_comment(), yy_C_string(),
+obey #line directives in input; first, get rid of # comments ...
+flex.h header for declarations of e.g., yymore(), yytext?
+	but what about %array making the yytext definition out of date?
+merge w/ okeeffe code
+rearrange the Performance Considerations section so that the easy
+    fixes come first
+copyright notice in manuals?
+input() updates yytext and yyleng; perhaps unput too???;
+	right now it trashes them (doesn't restore '\0')
+document that yyleng can now be modified
+	except if yymore() used?
+anchoring allowed inside ()'s - (^abc|def$)
+unput() propagates non-newline state too?
+complain about invalid anchoring - foo(^abc), (^abc)+
+library in its own directory
+yylineno
+example in flexdoc on YY_INPUT reading from input()
+redesign for retargetability (i.e., use w/ other languages ...)
+clean up escape expansion
+bison @N
+example for doc. on scanning strings w/ escapes in them:
+POSIX/
+get rid of duplicated code between "re2 re" rule and "re '$'" rule
+preformatted man pages for VMS sites, possibly using col -b to get rid
+	of backspaces ...
+slurp entire input file into mega-buffer; allows pointers to in-place
+	identifiers
+lex compatibility flag
+update flags in docs
+-n removed from POSIX?
+"MAKE = ..." shouldn't be commented out, or else bigtest can fail
+BSD man macros
--- a/to.do/flex.rmail
+++ b/to.do/flex.rmail
--- a/to.do/unicode/FlexLexer.h
+++ b/to.do/unicode/FlexLexer.h
@ -0,0 +1,195 @@
+// $Header$
+
+// FlexLexer.h -- define interfaces for lexical analyzer classes generated
+//		  by flex
+
+// Copyright (c) 1993 The Regents of the University of California.
+// All rights reserved.
+//
+// This code is derived from software contributed to Berkeley by
+// Kent Williams and Tom Epperly.
+//
+// Redistribution and use in source and binary forms are permitted provided
+// that: (1) source distributions retain this entire copyright notice and
+// comment, and (2) distributions including binaries display the following
+// acknowledgement:  ``This product includes software developed by the
+// University of California, Berkeley and its contributors'' in the
+// documentation or other materials provided with the distribution and in
+// all advertising materials mentioning features or use of this software.
+// Neither the name of the University nor the names of its contributors may
+// be used to endorse or promote products derived from this software without
+// specific prior written permission.
+// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+// This file defines FlexLexer, an abstract class which specifies the
+// external interface provided to flex C++ lexer objects, and yyFlexLexer,
+// which defines a particular lexer class.
+//
+// If you want to create multiple lexer classes, you use the -P flag
+// to rename each yyFlexLexer to some other xxFlexLexer.  You then
+// include <FlexLexer.h> in your other sources once per lexer class:
+//
+//	#undef yyFlexLexer
+//	#define yyFlexLexer xxFlexLexer
+//	#include <FlexLexer.h>
+//
+//	#undef yyFlexLexer
+//	#define yyFlexLexer zzFlexLexer
+//	#include <FlexLexer.h>
+//	...
+//
+// Since this header is generic for all sizes of flex scanners, you must
+// define the type YY_CHAR before including it:
+//
+//	typedef xxx YY_CHAR;
+//	#include <FlexLexer.h>
+//	...
+//
+// where xxx = char for 7-bit scanners, unsigned char for 8-bit, and
+// wchar_t for 16-bit.
+
+#ifndef __FLEX_LEXER_H
+// Never included before - need to define base class.
+#define __FLEX_LEXER_H
+#include <iostream.h>
+
+extern "C++" {
+
+struct yy_buffer_state;
+typedef int yy_state_type;
+
+class FlexLexer {
+public:
+	virtual ~FlexLexer()	{ }
+
+	const YY_CHAR* YYText()	{ return yytext; }
+	int YYLeng()		{ return yyleng; }
+
+	virtual void
+		yy_switch_to_buffer( struct yy_buffer_state* new_buffer ) = 0;
+	virtual struct yy_buffer_state*
+		yy_create_buffer( istream* s, int size ) = 0;
+	virtual void yy_delete_buffer( struct yy_buffer_state* b ) = 0;
+	virtual void yyrestart( istream* s ) = 0;
+
+	virtual int yylex() = 0;
+
+	// Call yylex with new input/output sources.
+	int yylex( istream* new_in, ostream* new_out = 0 )
+		{
+		switch_streams( new_in, new_out );
+		return yylex();
+		}
+
+	// Switch to new input/output streams.  A nil stream pointer
+	// indicates "keep the current one".
+	virtual void switch_streams( istream* new_in = 0,
+					ostream* new_out = 0 ) = 0;
+
+	int lineno() const		{ return yylineno; }
+
+	int debug() const		{ return yy_flex_debug; }
+	void set_debug( int flag )	{ yy_flex_debug = flag; }
+
+protected:
+	YY_CHAR* yytext;
+	int yyleng;
+	int yylineno;		// only maintained if you use %option yylineno
+	int yy_flex_debug;	// only has effect with -d or "%option debug"
+};
+
+}
+#endif
+
+#if defined(yyFlexLexer) || ! defined(yyFlexLexerOnce)
+// Either this is the first time through (yyFlexLexerOnce not defined),
+// or this is a repeated include to define a different flavor of
+// yyFlexLexer, as discussed in the flex man page.
+#define yyFlexLexerOnce
+
+class yyFlexLexer : public FlexLexer {
+public:
+	// arg_yyin and arg_yyout default to the cin and cout, but we
+	// only make that assignment when initializing in yylex().
+	yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 );
+
+	virtual ~yyFlexLexer();
+
+	void yy_switch_to_buffer( struct yy_buffer_state* new_buffer );
+	struct yy_buffer_state* yy_create_buffer( istream* s, int size );
+	void yy_delete_buffer( struct yy_buffer_state* b );
+	void yyrestart( istream* s );
+
+	virtual int yylex();
+	virtual void switch_streams( istream* new_in, ostream* new_out );
+
+protected:
+	virtual int LexerInput( YY_CHAR* buf, int max_size );
+	virtual void LexerOutput( const YY_CHAR* buf, int size );
+	virtual void LexerError( const char* msg );
+
+	void yyunput( int c, YY_CHAR* buf_ptr );
+	int yyinput();
+
+	void yy_load_buffer_state();
+	void yy_init_buffer( struct yy_buffer_state* b, istream* s );
+	void yy_flush_buffer( struct yy_buffer_state* b );
+
+	int yy_start_stack_ptr;
+	int yy_start_stack_depth;
+	int* yy_start_stack;
+
+	void yy_push_state( int new_state );
+	void yy_pop_state();
+	int yy_top_state();
+
+	yy_state_type yy_get_previous_state();
+	yy_state_type yy_try_NUL_trans( yy_state_type current_state );
+	int yy_get_next_buffer();
+
+	istream* yyin;	// input source for default LexerInput
+	ostream* yyout;	// output sink for default LexerOutput
+
+	struct yy_buffer_state* yy_current_buffer;
+
+	// yy_hold_char holds the character lost when yytext is formed.
+	YY_CHAR yy_hold_char;
+
+	// Number of characters read into yy_ch_buf.
+	int yy_n_chars;
+
+	// Points to current character in buffer.
+	YY_CHAR* yy_c_buf_p;
+
+	int yy_init;		// whether we need to initialize
+	int yy_start;		// start state number
+
+	// Flag which is used to allow yywrap()'s to do buffer switches
+	// instead of setting up a fresh yyin.  A bit of a hack ...
+	int yy_did_buffer_switch_on_eof;
+
+	// The following are not always needed, but may be depending
+	// on use of certain flex features (like REJECT or yymore()).
+
+	yy_state_type yy_last_accepting_state;
+	YY_CHAR* yy_last_accepting_cpos;
+
+	yy_state_type* yy_state_buf;
+	yy_state_type* yy_state_ptr;
+
+	YY_CHAR* yy_full_match;
+	int* yy_full_state;
+	int yy_full_lp;
+
+	int yy_lp;
+	int yy_looking_for_trail_begin;
+
+	int yy_more_flag;
+	int yy_more_len;
+	int yy_more_offset;
+	int yy_prev_more_offset;
+};
+
+#endif
--- a/to.do/unicode/ccl.c
+++ b/to.do/unicode/ccl.c
@ -0,0 +1,149 @@
+/* ccl - routines for character classes */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Vern Paxson.
+ * 
+ * The United States Government has rights in this work pursuant
+ * to contract no. DE-AC03-76SF00098 between the United States
+ * Department of Energy and the University of California.
+ *
+ * Redistribution and use in source and binary forms are permitted provided
+ * that: (1) source distributions retain this entire copyright notice and
+ * comment, and (2) distributions including binaries display the following
+ * acknowledgement:  ``This product includes software developed by the
+ * University of California, Berkeley and its contributors'' in the
+ * documentation or other materials provided with the distribution and in
+ * all advertising materials mentioning features or use of this software.
+ * Neither the name of the University nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* $Header$ */
+
+#include "flexdef.h"
+
+/* ccladd - add a single character to a ccl */
+
+void ccladd( cclp, ch )
+int cclp;
+int ch;
+	{
+	int ind, len, newpos, i;
+
+	check_char( ch );
+
+	len = ccllen[cclp];
+	ind = cclmap[cclp];
+
+	/* check to see if the character is already in the ccl */
+
+	for ( i = 0; i < len; ++i )
+		if ( ccltbl[ind + i] == ch )
+			return;
+
+	newpos = ind + len;
+
+	if ( newpos >= current_max_ccl_tbl_size )
+		{
+		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
+
+		++num_reallocs;
+
+		ccltbl = reallocate_wchar_array( ccltbl,
+						current_max_ccl_tbl_size );
+		}
+
+	ccllen[cclp] = len + 1;
+	ccltbl[newpos] = ch;
+	}
+
+
+/* cclinit - return an empty ccl */
+
+int cclinit()
+	{
+	if ( ++lastccl >= current_maxccls )
+		{
+		current_maxccls += MAX_CCLS_INCREMENT;
+
+		++num_reallocs;
+
+		cclmap = reallocate_integer_array( cclmap, current_maxccls );
+		ccllen = reallocate_integer_array( ccllen, current_maxccls );
+		cclng = reallocate_integer_array( cclng, current_maxccls );
+		}
+
+	if ( lastccl == 1 )
+		/* we're making the first ccl */
+		cclmap[lastccl] = 0;
+
+	else
+		/* The new pointer is just past the end of the last ccl.
+		 * Since the cclmap points to the \first/ character of a
+		 * ccl, adding the length of the ccl to the cclmap pointer
+		 * will produce a cursor to the first free space.
+		 */
+		cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
+
+	ccllen[lastccl] = 0;
+	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
+
+	return lastccl;
+	}
+
+
+/* cclnegate - negate the given ccl */
+
+void cclnegate( cclp )
+int cclp;
+	{
+	cclng[cclp] = 1;
+	}
+
+
+/* list_character_set - list the members of a set of characters in CCL form
+ *
+ * Writes to the given file a character-class representation of those
+ * characters present in the given CCL.  A character is present if it
+ * has a non-zero value in the cset array.
+ */
+
+void list_character_set( file, cset )
+FILE *file;
+int cset[];
+	{
+	register int i;
+
+	putc( '[', file );
+
+	for ( i = 0; i < csize; ++i )
+		{
+		if ( cset[i] )
+			{
+			register int start_char = i;
+
+			putc( ' ', file );
+
+			fputs( readable_form( i ), file );
+
+			while ( ++i < csize && cset[i] )
+				;
+
+			if ( i - 1 > start_char )
+				/* this was a run */
+				fprintf( file, "-%s", readable_form( i - 1 ) );
+
+			putc( ' ', file );
+			}
+		}
+
+	putc( ']', file );
+	}
--- a/to.do/unicode/changes.txt
+++ b/to.do/unicode/changes.txt
@ -0,0 +1,102 @@
+Summary of changes for flex Unicode support
+
+- ccl.c
+  - ccladd()
+    - changed call to reallocate_Character_array to reallocate_wchar_array
+
+- ecs.c
+  - mkeccl()
+    - changed type of ccls from Char to wchar_t
+
+- flex.1
+  - added description of -U option
+  - added extra qualifier to -Ca option regarding usage with -U
+  - modified -C, -Cf, and -CF options regarding usage with -U
+
+- flex.skl
+  - changed all references of char (except error messages) to YY_CHAR
+  - added new insertion point for defining YY_CHAR and YY_SC_TO_UI()
+  - yy_scan_bytes()
+    - renamed to yy_scan_chars to avoid confusion with 2-byte chars
+    - renamed param bytes to chars
+  - ECHO
+    - redefined C version as fwrite(yytext, sizeof(YY_CHAR), yyleng, yyout)
+  - YY_INPUT
+    - removed char* cast on param buf of C++ version
+  - yyFlexLexer::LexerInput()
+    - changed get() call to read((unsigned char *) buf, sizeof(YY_CHAR))
+    - changed read() call to read((unsigned char *) buf, max_size *
+      sizeof(YY_CHAR))
+    - changed gcount() call to gcount() / sizeof(YY_CHAR)
+  - yyFlexLexer::LexerOutput()
+    - changed write() call to write((unsigned char *) buf, size *
+      sizeof(YY_CHAR))
+  - yy_get_next_buffer()
+    - yy_flex_realloc() call
+      - changed param b->yy_buf_size + 2 to
+        (b->yy_buf_size + 2) * sizeof(YY_CHAR)
+  - input() and yyFlexLexer::yyinput()
+    - changed line c = *(unsigned char *) yy_c_buf_p; to
+      c = YY_SC_TO_UI(*yy_c_buf_p);
+
+- flexdef.h
+  - defined CSIZE as 65536
+  - changed myesc() proto to return int
+  - changed type of ccltbl from Char * to wchar_t *
+  - added allocate_wchar_array() and reallocate_wchar_array() macros
+  - changed mkeccl() proto's first param to wchar_t[]
+  - changed cshell() proto's first param to wchar_t[]
+
+
+- FlexLexer.h
+  - changed all references of char (except error messages) to YY_CHAR
+  - added description about typedef'ing YY_CHAR before inclusion
+
+- gen.c
+  - changed appropriate references of char in output strings to YY_CHAR
+  - added C_uchar_decl and C_ushort_decl for 16-bit yy_ec type bump-down
+  - genecs()
+    - added code for 16-bit yy_ec type bump; -Ca bumps type to long
+  - make_tables()
+    - YY_INPUT
+      - read redefined as read(..., max_size * sizeof(YY_CHAR))
+      - added code to use getwc() and WEOF for 16-bit interactive
+      - fread redefined as fread(buf, sizeof(YY_CHAR), max_size, yyin)
+
+- main.c
+  - changed type of ccltbl from Char * to wchar_t *
+  - check_options()
+    - changed default csize from CSIZE to 256 due to redef of CSIZE
+    - added code to check for options incompatible with -U
+    - added code to ouput typedef of YY_CHAR to skeleton, plus extra call to
+      skelout() to get down to original insertion point
+  - flexend()
+    - added code to print "U" when printing stats
+  - flexinit()
+    - added code set csize for option -U
+    - changed assignment of csize in option -8 from CSIZE to 256 due to redef
+      of CSIZE
+  - readin()
+    - changed appropriate references of char in output strings to YY_CHAR
+    - removed output of YY_CHAR typedef; now located in check_options()
+  - usage()
+    - added fprintf for -U usage
+
+- misc.c
+  - check_char()
+    - added code to distinguish chars needing -8 and -U flags
+  - cshell()
+    - changed type of v from Char to wchar_t
+    - changed type of k from Char to wchar_t
+  - myesc()
+    - now returns an int to handle 16-bit escape sequences
+    - changed esc_char from Char to unsigned int as per htoi() and otoi()
+
+- scan.l
+  - changed ESCSEQ to accept 6 digit octal escapes and 4 digit hex escapes
+  - removed myesc() and ndlookup() protos
+  - added option "16bit"
+
+- tblcmp.c
+  - mktemplate
+    - changed type of transset from Char to wchar_t
--- a/to.do/unicode/ecs.c
+++ b/to.do/unicode/ecs.c
@ -0,0 +1,225 @@
+/* ecs - equivalence class routines */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Vern Paxson.
+ * 
+ * The United States Government has rights in this work pursuant
+ * to contract no. DE-AC03-76SF00098 between the United States
+ * Department of Energy and the University of California.
+ *
+ * Redistribution and use in source and binary forms are permitted provided
+ * that: (1) source distributions retain this entire copyright notice and
+ * comment, and (2) distributions including binaries display the following
+ * acknowledgement:  ``This product includes software developed by the
+ * University of California, Berkeley and its contributors'' in the
+ * documentation or other materials provided with the distribution and in
+ * all advertising materials mentioning features or use of this software.
+ * Neither the name of the University nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* $Header$ */
+
+#include "flexdef.h"
+
+/* ccl2ecl - convert character classes to set of equivalence classes */
+
+void ccl2ecl()
+	{
+	int i, ich, newlen, cclp, ccls, cclmec;
+
+	for ( i = 1; i <= lastccl; ++i )
+		{
+		/* We loop through each character class, and for each character
+		 * in the class, add the character's equivalence class to the
+		 * new "character" class we are creating.  Thus when we are all
+		 * done, character classes will really consist of collections
+		 * of equivalence classes
+		 */
+
+		newlen = 0;
+		cclp = cclmap[i];
+
+		for ( ccls = 0; ccls < ccllen[i]; ++ccls )
+			{
+			ich = ccltbl[cclp + ccls];
+			cclmec = ecgroup[ich];
+
+			if ( cclmec > 0 )
+				{
+				ccltbl[cclp + newlen] = cclmec;
+				++newlen;
+				}
+			}
+
+		ccllen[i] = newlen;
+		}
+	}
+
+
+/* cre8ecs - associate equivalence class numbers with class members
+ *
+ * fwd is the forward linked-list of equivalence class members.  bck
+ * is the backward linked-list, and num is the number of class members.
+ *
+ * Returned is the number of classes.
+ */
+
+int cre8ecs( fwd, bck, num )
+int fwd[], bck[], num;
+	{
+	int i, j, numcl;
+
+	numcl = 0;
+
+	/* Create equivalence class numbers.  From now on, ABS( bck(x) )
+	 * is the equivalence class number for object x.  If bck(x)
+	 * is positive, then x is the representative of its equivalence
+	 * class.
+	 */
+	for ( i = 1; i <= num; ++i )
+		if ( bck[i] == NIL )
+			{
+			bck[i] = ++numcl;
+			for ( j = fwd[i]; j != NIL; j = fwd[j] )
+				bck[j] = -numcl;
+			}
+
+	return numcl;
+	}
+
+
+/* mkeccl - update equivalence classes based on character class xtions
+ *
+ * synopsis
+ *    Char ccls[];
+ *    int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
+ *    void mkeccl( Char ccls[], int lenccl, int fwd[llsiz], int bck[llsiz],
+ *			int llsiz, int NUL_mapping );
+ *
+ * ccls contains the elements of the character class, lenccl is the
+ * number of elements in the ccl, fwd is the forward link-list of equivalent
+ * characters, bck is the backward link-list, and llsiz size of the link-list.
+ *
+ * NUL_mapping is the value which NUL (0) should be mapped to.
+ */
+
+void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
+wchar_t ccls[];
+int lenccl, fwd[], bck[], llsiz, NUL_mapping;
+	{
+	int cclp, oldec, newec;
+	int cclm, i, j;
+	static unsigned char cclflags[CSIZE];	/* initialized to all '\0' */
+
+	/* Note that it doesn't matter whether or not the character class is
+	 * negated.  The same results will be obtained in either case.
+	 */
+
+	cclp = 0;
+
+	while ( cclp < lenccl )
+		{
+		cclm = ccls[cclp];
+
+		if ( NUL_mapping && cclm == 0 )
+			cclm = NUL_mapping;
+
+		oldec = bck[cclm];
+		newec = cclm;
+
+		j = cclp + 1;
+
+		for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
+			{ /* look for the symbol in the character class */
+			for ( ; j < lenccl; ++j )
+				{
+				register int ccl_char;
+
+				if ( NUL_mapping && ccls[j] == 0 )
+					ccl_char = NUL_mapping;
+				else
+					ccl_char = ccls[j];
+
+				if ( ccl_char > i )
+					break;
+
+				if ( ccl_char == i && ! cclflags[j] )
+					{
+					/* We found an old companion of cclm
+					 * in the ccl.  Link it into the new
+					 * equivalence class and flag it as
+					 * having been processed.
+					 */
+
+					bck[i] = newec;
+					fwd[newec] = i;
+					newec = i;
+					/* Set flag so we don't reprocess. */
+					cclflags[j] = 1;
+
+					/* Get next equivalence class member. */
+					/* continue 2 */
+					goto next_pt;
+					}
+				}
+
+			/* Symbol isn't in character class.  Put it in the old
+			 * equivalence class.
+			 */
+
+			bck[i] = oldec;
+
+			if ( oldec != NIL )
+				fwd[oldec] = i;
+
+			oldec = i;
+
+			next_pt: ;
+			}
+
+		if ( bck[cclm] != NIL || oldec != bck[cclm] )
+			{
+			bck[cclm] = NIL;
+			fwd[oldec] = NIL;
+			}
+
+		fwd[newec] = NIL;
+
+		/* Find next ccl member to process. */
+
+		for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
+			{
+			/* Reset "doesn't need processing" flag. */
+			cclflags[cclp] = 0;
+			}
+		}
+	}
+
+
+/* mkechar - create equivalence class for single character */
+
+void mkechar( tch, fwd, bck )
+int tch, fwd[], bck[];
+	{
+	/* If until now the character has been a proper subset of
+	 * an equivalence class, break it away to create a new ec
+	 */
+
+	if ( fwd[tch] != NIL )
+		bck[fwd[tch]] = bck[tch];
+
+	if ( bck[tch] != NIL )
+		fwd[bck[tch]] = fwd[tch];
+
+	fwd[tch] = NIL;
+	bck[tch] = NIL;
+	}
--- a/to.do/unicode/flex.1
+++ b/to.do/unicode/flex.1
--- a/to.do/unicode/flex.skl
+++ b/to.do/unicode/flex.skl
--- a/to.do/unicode/flexdef.h
+++ b/to.do/unicode/flexdef.h
--- a/to.do/unicode/gen.c
+++ b/to.do/unicode/gen.c
--- a/to.do/unicode/main.c
+++ b/to.do/unicode/main.c
--- a/to.do/unicode/misc.c
+++ b/to.do/unicode/misc.c
@ -0,0 +1,894 @@
+/* misc - miscellaneous flex routines */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Vern Paxson.
+ * 
+ * The United States Government has rights in this work pursuant
+ * to contract no. DE-AC03-76SF00098 between the United States
+ * Department of Energy and the University of California.
+ *
+ * Redistribution and use in source and binary forms are permitted provided
+ * that: (1) source distributions retain this entire copyright notice and
+ * comment, and (2) distributions including binaries display the following
+ * acknowledgement:  ``This product includes software developed by the
+ * University of California, Berkeley and its contributors'' in the
+ * documentation or other materials provided with the distribution and in
+ * all advertising materials mentioning features or use of this software.
+ * Neither the name of the University nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* $Header$ */
+
+#include "flexdef.h"
+
+
+void action_define( defname, value )
+char *defname;
+int value;
+	{
+	char buf[MAXLINE];
+
+	if ( (int) strlen( defname ) > MAXLINE / 2 )
+		{
+		format_pinpoint_message( _( "name \"%s\" ridiculously long" ), 
+			defname );
+		return;
+		}
+
+	sprintf( buf, "#define %s %d\n", defname, value );
+	add_action( buf );
+	}
+
+
+void add_action( new_text )
+char *new_text;
+	{
+	int len = strlen( new_text );
+
+	while ( len + action_index >= action_size - 10 /* slop */ )
+		{
+		int new_size = action_size * 2;
+
+		if ( new_size <= 0 )
+			/* Increase just a little, to try to avoid overflow
+			 * on 16-bit machines.
+			 */
+			action_size += action_size / 8;
+		else
+			action_size = new_size;
+
+		action_array =
+			reallocate_character_array( action_array, action_size );
+		}
+
+	strcpy( &action_array[action_index], new_text );
+
+	action_index += len;
+	}
+
+
+/* allocate_array - allocate memory for an integer array of the given size */
+
+void *allocate_array( size, element_size )
+int size;
+size_t element_size;
+	{
+	register void *mem;
+	size_t num_bytes = element_size * size;
+
+	mem = flex_alloc( num_bytes );
+	if ( ! mem )
+		flexfatal(
+			_( "memory allocation failed in allocate_array()" ) );
+
+	return mem;
+	}
+
+
+/* all_lower - true if a string is all lower-case */
+
+int all_lower( str )
+register char *str;
+	{
+	while ( *str )
+		{
+		if ( ! isascii( (Char) *str ) || ! islower( *str ) )
+			return 0;
+		++str;
+		}
+
+	return 1;
+	}
+
+
+/* all_upper - true if a string is all upper-case */
+
+int all_upper( str )
+register char *str;
+	{
+	while ( *str )
+		{
+		if ( ! isascii( (Char) *str ) || ! isupper( *str ) )
+			return 0;
+		++str;
+		}
+
+	return 1;
+	}
+
+
+/* bubble - bubble sort an integer array in increasing order
+ *
+ * synopsis
+ *   int v[n], n;
+ *   void bubble( v, n );
+ *
+ * description
+ *   sorts the first n elements of array v and replaces them in
+ *   increasing order.
+ *
+ * passed
+ *   v - the array to be sorted
+ *   n - the number of elements of 'v' to be sorted
+ */
+
+void bubble( v, n )
+int v[], n;
+	{
+	register int i, j, k;
+
+	for ( i = n; i > 1; --i )
+		for ( j = 1; j < i; ++j )
+			if ( v[j] > v[j + 1] )	/* compare */
+				{
+				k = v[j];	/* exchange */
+				v[j] = v[j + 1];
+				v[j + 1] = k;
+				}
+	}
+
+
+/* check_char - checks a character to make sure it's within the range
+ *		we're expecting.  If not, generates fatal error message
+ *		and exits.
+ */
+
+void check_char( c )
+int c;
+	{
+	if ( c >= CSIZE )
+		lerrsf( _( "bad character '%s' detected in check_char()" ),
+			readable_form( c ) );
+
+	if ( c >= csize )
+		{
+		if ( c < 256 )
+			lerrsf(
+			_( "scanner requires -8 flag to use the character %s" ),
+				readable_form( c ) );
+		else
+			lerrsf(
+			_( "scanner requires -U flag to use the character %s" ),
+				readable_form( c ) );
+		}
+	}
+
+
+
+/* clower - replace upper-case letter to lower-case */
+
+Char clower( c )
+register int c;
+	{
+	return (Char) ((isascii( c ) && isupper( c )) ? tolower( c ) : c);
+	}
+
+
+/* copy_string - returns a dynamically allocated copy of a string */
+
+char *copy_string( str )
+register const char *str;
+	{
+	register const char *c1;
+	register char *c2;
+	char *copy;
+	unsigned int size;
+
+	/* find length */
+	for ( c1 = str; *c1; ++c1 )
+		;
+
+	size = (c1 - str + 1) * sizeof( char );
+	copy = (char *) flex_alloc( size );
+
+	if ( copy == NULL )
+		flexfatal( _( "dynamic memory failure in copy_string()" ) );
+
+	for ( c2 = copy; (*c2++ = *str++) != 0; )
+		;
+
+	return copy;
+	}
+
+
+/* copy_unsigned_string -
+ *    returns a dynamically allocated copy of a (potentially) unsigned string
+ */
+
+Char *copy_unsigned_string( str )
+register Char *str;
+	{
+	register Char *c;
+	Char *copy;
+
+	/* find length */
+	for ( c = str; *c; ++c )
+		;
+
+	copy = allocate_Character_array( c - str + 1 );
+
+	for ( c = copy; (*c++ = *str++) != 0; )
+		;
+
+	return copy;
+	}
+
+
+/* cshell - shell sort a character array in increasing order
+ *
+ * synopsis
+ *
+ *   Char v[n];
+ *   int n, special_case_0;
+ *   cshell( v, n, special_case_0 );
+ *
+ * description
+ *   Does a shell sort of the first n elements of array v.
+ *   If special_case_0 is true, then any element equal to 0
+ *   is instead assumed to have infinite weight.
+ *
+ * passed
+ *   v - array to be sorted
+ *   n - number of elements of v to be sorted
+ */
+
+void cshell( v, n, special_case_0 )
+wchar_t v[];
+int n, special_case_0;
+	{
+	int gap, i, j, jg;
+	wchar_t k;
+
+	for ( gap = n / 2; gap > 0; gap = gap / 2 )
+		for ( i = gap; i < n; ++i )
+			for ( j = i - gap; j >= 0; j = j - gap )
+				{
+				jg = j + gap;
+
+				if ( special_case_0 )
+					{
+					if ( v[jg] == 0 )
+						break;
+
+					else if ( v[j] != 0 && v[j] <= v[jg] )
+						break;
+					}
+
+				else if ( v[j] <= v[jg] )
+					break;
+
+				k = v[j];
+				v[j] = v[jg];
+				v[jg] = k;
+				}
+	}
+
+
+/* dataend - finish up a block of data declarations */
+
+void dataend()
+	{
+	if ( datapos > 0 )
+		dataflush();
+
+	/* add terminator for initialization; { for vi */
+	outn( "    } ;\n" );
+
+	dataline = 0;
+	datapos = 0;
+	}
+
+
+/* dataflush - flush generated data statements */
+
+void dataflush()
+	{
+	outc( '\n' );
+
+	if ( ++dataline >= NUMDATALINES )
+		{
+		/* Put out a blank line so that the table is grouped into
+		 * large blocks that enable the user to find elements easily.
+		 */
+		outc( '\n' );
+		dataline = 0;
+		}
+
+	/* Reset the number of characters written on the current line. */
+	datapos = 0;
+	}
+
+
+/* flexerror - report an error message and terminate */
+
+void flexerror( msg )
+const char msg[];
+	{
+	fprintf( stderr, "%s: %s\n", program_name, msg );
+	flexend( 1 );
+	}
+
+
+/* flexfatal - report a fatal error message and terminate */
+
+void flexfatal( msg )
+const char msg[];
+	{
+	fprintf( stderr, _( "%s: fatal internal error, %s\n" ),
+		program_name, msg );
+	exit( 1 );
+	}
+
+
+/* htoi - convert a hexadecimal digit string to an integer value */
+
+int htoi( str )
+Char str[];
+	{
+	unsigned int result;
+
+	(void) sscanf( (char *) str, "%x", &result );
+
+	return result;
+	}
+
+
+/* lerrif - report an error message formatted with one integer argument */
+
+void lerrif( msg, arg )
+const char msg[];
+int arg;
+	{
+	char errmsg[MAXLINE];
+	(void) sprintf( errmsg, msg, arg );
+	flexerror( errmsg );
+	}
+
+
+/* lerrsf - report an error message formatted with one string argument */
+
+void lerrsf( msg, arg )
+const char msg[], arg[];
+	{
+	char errmsg[MAXLINE];
+
+	(void) sprintf( errmsg, msg, arg );
+	flexerror( errmsg );
+	}
+
+
+/* line_directive_out - spit out a "#line" statement */
+
+void line_directive_out( output_file, do_infile )
+FILE *output_file;
+int do_infile;
+	{
+	char directive[MAXLINE], filename[MAXLINE];
+	char *s1, *s2, *s3;
+	static char line_fmt[] = "#line %d \"%s\"\n";
+
+	if ( ! gen_line_dirs )
+		return;
+
+	if ( (do_infile && ! infilename) || (! do_infile && ! outfilename) )
+		/* don't know the filename to use, skip */
+		return;
+
+	s1 = do_infile ? infilename : outfilename;
+	s2 = filename;
+	s3 = &filename[sizeof( filename ) - 2];
+
+	while ( s2 < s3 && *s1 )
+		{
+		if ( *s1 == '\\' )
+			/* Escape the '\' */
+			*s2++ = '\\';
+
+		*s2++ = *s1++;
+		}
+
+	*s2 = '\0';
+
+	if ( do_infile )
+		sprintf( directive, line_fmt, linenum, filename );
+	else
+		{
+		if ( output_file == stdout )
+			/* Account for the line directive itself. */
+			++out_linenum;
+
+		sprintf( directive, line_fmt, out_linenum, filename );
+		}
+
+	/* If output_file is nil then we should put the directive in
+	 * the accumulated actions.
+	 */
+	if ( output_file )
+		{
+		fputs( directive, output_file );
+		}
+	else
+		add_action( directive );
+	}
+
+
+/* mark_defs1 - mark the current position in the action array as
+ *               representing where the user's section 1 definitions end
+ *		 and the prolog begins
+ */
+void mark_defs1()
+	{
+	defs1_offset = 0;
+	action_array[action_index++] = '\0';
+	action_offset = prolog_offset = action_index;
+	action_array[action_index] = '\0';
+	}
+
+
+/* mark_prolog - mark the current position in the action array as
+ *               representing the end of the action prolog
+ */
+void mark_prolog()
+	{
+	action_array[action_index++] = '\0';
+	action_offset = action_index;
+	action_array[action_index] = '\0';
+	}
+
+
+/* mk2data - generate a data statement for a two-dimensional array
+ *
+ * Generates a data statement initializing the current 2-D array to "value".
+ */
+void mk2data( value )
+int value;
+	{
+	if ( datapos >= NUMDATAITEMS )
+		{
+		outc( ',' );
+		dataflush();
+		}
+
+	if ( datapos == 0 )
+		/* Indent. */
+		out( "    " );
+
+	else
+		outc( ',' );
+
+	++datapos;
+
+	out_dec( "%5d", value );
+	}
+
+
+/* mkdata - generate a data statement
+ *
+ * Generates a data statement initializing the current array element to
+ * "value".
+ */
+void mkdata( value )
+int value;
+	{
+	if ( datapos >= NUMDATAITEMS )
+		{
+		outc( ',' );
+		dataflush();
+		}
+
+	if ( datapos == 0 )
+		/* Indent. */
+		out( "    " );
+	else
+		outc( ',' );
+
+	++datapos;
+
+	out_dec( "%5d", value );
+	}
+
+
+/* myctoi - return the integer represented by a string of digits */
+
+int myctoi( array )
+char array[];
+	{
+	int val = 0;
+
+	(void) sscanf( array, "%d", &val );
+
+	return val;
+	}
+
+
+/* myesc - return character corresponding to escape sequence */
+
+int myesc( array )
+Char array[];
+	{
+	Char c;
+	unsigned int esc_char;
+
+	switch ( array[1] )
+		{
+		case 'b': return '\b';
+		case 'f': return '\f';
+		case 'n': return '\n';
+		case 'r': return '\r';
+		case 't': return '\t';
+
+#if __STDC__
+		case 'a': return '\a';
+		case 'v': return '\v';
+#else
+		case 'a': return '\007';
+		case 'v': return '\013';
+#endif
+
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+			{ /* \<octal> */
+			int sptr = 1;
+
+			while ( isascii( array[sptr] ) &&
+				isdigit( array[sptr] ) )
+				/* Don't increment inside loop control
+				 * because if isdigit() is a macro it might
+				 * expand into multiple increments ...
+				 */
+				++sptr;
+
+			c = array[sptr];
+			array[sptr] = '\0';
+
+			esc_char = otoi( array + 1 );
+
+			array[sptr] = c;
+
+			return esc_char;
+			}
+
+		case 'x':
+			{ /* \x<hex> */
+			int sptr = 2;
+
+			while ( isascii( array[sptr] ) &&
+				isxdigit( (char) array[sptr] ) )
+				/* Don't increment inside loop control
+				 * because if isdigit() is a macro it might
+				 * expand into multiple increments ...
+				 */
+				++sptr;
+
+			c = array[sptr];
+			array[sptr] = '\0';
+
+			esc_char = htoi( array + 2 );
+
+			array[sptr] = c;
+
+			return esc_char;
+			}
+
+		default:
+			return array[1];
+		}
+	}
+
+
+/* otoi - convert an octal digit string to an integer value */
+
+int otoi( str )
+Char str[];
+	{
+	unsigned int result;
+
+	(void) sscanf( (char *) str, "%o", &result );
+	return result;
+	}
+
+
+/* out - various flavors of outputing a (possibly formatted) string for the
+ *	 generated scanner, keeping track of the line count.
+ */
+
+void out( str )
+const char str[];
+	{
+	fputs( str, stdout );
+	out_line_count( str );
+	}
+
+void out_dec( fmt, n )
+const char fmt[];
+int n;
+	{
+	printf( fmt, n );
+	out_line_count( fmt );
+	}
+
+void out_dec2( fmt, n1, n2 )
+const char fmt[];
+int n1, n2;
+	{
+	printf( fmt, n1, n2 );
+	out_line_count( fmt );
+	}
+
+void out_hex( fmt, x )
+const char fmt[];
+unsigned int x;
+	{
+	printf( fmt, x );
+	out_line_count( fmt );
+	}
+
+void out_line_count( str )
+const char str[];
+	{
+	register int i;
+
+	for ( i = 0; str[i]; ++i )
+		if ( str[i] == '\n' )
+			++out_linenum;
+	}
+
+void out_str( fmt, str )
+const char fmt[], str[];
+	{
+	printf( fmt, str );
+	out_line_count( fmt );
+	out_line_count( str );
+	}
+
+void out_str3( fmt, s1, s2, s3 )
+const char fmt[], s1[], s2[], s3[];
+	{
+	printf( fmt, s1, s2, s3 );
+	out_line_count( fmt );
+	out_line_count( s1 );
+	out_line_count( s2 );
+	out_line_count( s3 );
+	}
+
+void out_str_dec( fmt, str, n )
+const char fmt[], str[];
+int n;
+	{
+	printf( fmt, str, n );
+	out_line_count( fmt );
+	out_line_count( str );
+	}
+
+void outc( c )
+int c;
+	{
+	putc( c, stdout );
+
+	if ( c == '\n' )
+		++out_linenum;
+	}
+
+void outn( str )
+const char str[];
+	{
+	puts( str );
+	out_line_count( str );
+	++out_linenum;
+	}
+
+
+/* readable_form - return the the human-readable form of a character
+ *
+ * The returned string is in static storage.
+ */
+
+char *readable_form( c )
+register int c;
+	{
+	static char rform[10];
+
+	if ( (c >= 0 && c < 32) || c >= 127 )
+		{
+		switch ( c )
+			{
+			case '\b': return "\\b";
+			case '\f': return "\\f";
+			case '\n': return "\\n";
+			case '\r': return "\\r";
+			case '\t': return "\\t";
+
+#if __STDC__
+			case '\a': return "\\a";
+			case '\v': return "\\v";
+#endif
+
+			default:
+				(void) sprintf( rform, "\\%.3o",
+						(unsigned int) c );
+				return rform;
+			}
+		}
+
+	else if ( c == ' ' )
+		return "' '";
+
+	else
+		{
+		rform[0] = c;
+		rform[1] = '\0';
+
+		return rform;
+		}
+	}
+
+
+/* reallocate_array - increase the size of a dynamic array */
+
+void *reallocate_array( array, size, element_size )
+void *array;
+int size;
+size_t element_size;
+	{
+	register void *new_array;
+	size_t num_bytes = element_size * size;
+
+	new_array = flex_realloc( array, num_bytes );
+	if ( ! new_array )
+		flexfatal( _( "attempt to increase array size failed" ) );
+
+	return new_array;
+	}
+
+
+/* skelout - write out one section of the skeleton file
+ *
+ * Description
+ *    Copies skelfile or skel array to stdout until a line beginning with
+ *    "%%" or EOF is found.
+ */
+void skelout()
+	{
+	char buf_storage[MAXLINE];
+	char *buf = buf_storage;
+	int do_copy = 1;
+
+	/* Loop pulling lines either from the skelfile, if we're using
+	 * one, or from the skel[] array.
+	 */
+	while ( skelfile ?
+		(fgets( buf, MAXLINE, skelfile ) != NULL) :
+		((buf = (char *) skel[skel_ind++]) != 0) )
+		{ /* copy from skel array */
+		if ( buf[0] == '%' )
+			{ /* control line */
+			switch ( buf[1] )
+				{
+				case '%':
+					return;
+
+				case '+':
+					do_copy = C_plus_plus;
+					break;
+
+				case '-':
+					do_copy = ! C_plus_plus;
+					break;
+
+				case '*':
+					do_copy = 1;
+					break;
+
+				default:
+					flexfatal(
+					_( "bad line in skeleton file" ) );
+				}
+			}
+
+		else if ( do_copy )
+			{
+			if ( skelfile )
+				/* Skeleton file reads include final
+				 * newline, skel[] array does not.
+				 */
+				out( buf );
+			else
+				outn( buf );
+			}
+		}
+	}
+
+
+/* transition_struct_out - output a yy_trans_info structure
+ *
+ * outputs the yy_trans_info structure with the two elements, element_v and
+ * element_n.  Formats the output with spaces and carriage returns.
+ */
+
+void transition_struct_out( element_v, element_n )
+int element_v, element_n;
+	{
+	out_dec2( " {%4d,%4d },", element_v, element_n );
+
+	datapos += TRANS_STRUCT_PRINT_LENGTH;
+
+	if ( datapos >= 79 - TRANS_STRUCT_PRINT_LENGTH )
+		{
+		outc( '\n' );
+
+		if ( ++dataline % 10 == 0 )
+			outc( '\n' );
+
+		datapos = 0;
+		}
+	}
+
+
+/* The following is only needed when building flex's parser using certain
+ * broken versions of bison.
+ */
+void *yy_flex_xmalloc( size )
+int size;
+	{
+	void *result = flex_alloc( (size_t) size );
+
+	if ( ! result  )
+		flexfatal(
+			_( "memory allocation failed in yy_flex_xmalloc()" ) );
+
+	return result;
+	}
+
+
+/* zero_out - set a region of memory to 0
+ *
+ * Sets region_ptr[0] through region_ptr[size_in_bytes - 1] to zero.
+ */
+
+void zero_out( region_ptr, size_in_bytes )
+char *region_ptr;
+size_t size_in_bytes;
+	{
+	register char *rp, *rp_end;
+
+	rp = region_ptr;
+	rp_end = region_ptr + size_in_bytes;
+
+	while ( rp < rp_end )
+		*rp++ = 0;
+	}
--- a/to.do/unicode/scan.l
+++ b/to.do/unicode/scan.l
@ -0,0 +1,710 @@
+/* scan.l - scanner for flex input */
+
+%{
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Vern Paxson.
+ * 
+ * The United States Government has rights in this work pursuant
+ * to contract no. DE-AC03-76SF00098 between the United States
+ * Department of Energy and the University of California.
+ *
+ * Redistribution and use in source and binary forms are permitted provided
+ * that: (1) source distributions retain this entire copyright notice and
+ * comment, and (2) distributions including binaries display the following
+ * acknowledgement:  ``This product includes software developed by the
+ * University of California, Berkeley and its contributors'' in the
+ * documentation or other materials provided with the distribution and in
+ * all advertising materials mentioning features or use of this software.
+ * Neither the name of the University nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* $Header$ */
+
+#include "flexdef.h"
+#include "parse.h"
+
+#define ACTION_ECHO add_action( yytext )
+#define ACTION_IFDEF(def, should_define) \
+	{ \
+	if ( should_define ) \
+		action_define( def, 1 ); \
+	}
+
+#define MARK_END_OF_PROLOG mark_prolog();
+
+#define YY_DECL \
+	int flexscan()
+
+#define RETURNCHAR \
+	yylval = (unsigned char) yytext[0]; \
+	return CHAR;
+
+#define RETURNNAME \
+	strcpy( nmstr, yytext ); \
+	return NAME;
+
+#define PUT_BACK_STRING(str, start) \
+	for ( i = strlen( str ) - 1; i >= start; --i ) \
+		unput((str)[i])
+
+#define CHECK_REJECT(str) \
+	if ( all_upper( str ) ) \
+		reject = true;
+
+#define CHECK_YYMORE(str) \
+	if ( all_lower( str ) ) \
+		yymore_used = true;
+%}
+
+%option caseless nodefault outfile="scan.c" stack noyy_top_state
+%option nostdinit
+
+%x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
+%x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
+%x OPTION LINEDIR
+
+WS		[[:blank:]]+
+OPTWS		[[:blank:]]*
+NOT_WS		[^[:blank:]\n]
+
+NL		\r?\n
+
+NAME		([[:alpha:]_][[:alnum:]_-]*)
+NOT_NAME	[^[:alpha:]_*\n]+
+
+SCNAME		{NAME}
+
+ESCSEQ		(\\([^\n]|[0-7]{1,6}|x[[:xdigit:]]{1,4}))
+
+FIRST_CCL_CHAR	([^\\\n]|{ESCSEQ})
+CCL_CHAR	([^\\\n\]]|{ESCSEQ})
+CCL_EXPR	("[:"[[:alpha:]]+":]")
+
+LEXOPT		[aceknopr]
+
+%%
+	static int bracelevel, didadef, indented_code;
+	static int doing_rule_action = false;
+	static int option_sense;
+
+	int doing_codeblock = false;
+	int i;
+	Char nmdef[MAXLINE];
+
+
+<INITIAL>{
+	^{WS}		indented_code = true; BEGIN(CODEBLOCK);
+	^"/*"		ACTION_ECHO; yy_push_state( COMMENT );
+	^#{OPTWS}line{WS}	yy_push_state( LINEDIR );
+	^"%s"{NAME}?	return SCDECL;
+	^"%x"{NAME}?	return XSCDECL;
+	^"%{".*{NL}	{
+			++linenum;
+			line_directive_out( (FILE *) 0, 1 );
+			indented_code = false;
+			BEGIN(CODEBLOCK);
+			}
+
+	{WS}		/* discard */
+
+	^"%%".*		{
+			sectnum = 2;
+			bracelevel = 0;
+			mark_defs1();
+			line_directive_out( (FILE *) 0, 1 );
+			BEGIN(SECT2PROLOG);
+			return SECTEND;
+			}
+
+	^"%pointer".*{NL}	yytext_is_array = false; ++linenum;
+	^"%array".*{NL}		yytext_is_array = true; ++linenum;
+
+	^"%option"	BEGIN(OPTION); return OPTION_OP;
+
+	^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}	++linenum; /* ignore */
+	^"%"{LEXOPT}{WS}.*{NL}	++linenum;	/* ignore */
+
+	^"%"[^sxaceknopr{}].*	synerr( _( "unrecognized '%' directive" ) );
+
+	^{NAME}		{
+			strcpy( nmstr, yytext );
+			didadef = false;
+			BEGIN(PICKUPDEF);
+			}
+
+	{SCNAME}	RETURNNAME;
+	^{OPTWS}{NL}	++linenum; /* allows blank lines in section 1 */
+	{OPTWS}{NL}	ACTION_ECHO; ++linenum; /* maybe end of comment line */
+}
+
+
+<COMMENT>{
+	"*/"		ACTION_ECHO; yy_pop_state();
+	"*"		ACTION_ECHO;
+	[^*\n]+		ACTION_ECHO;
+	[^*\n]*{NL}	++linenum; ACTION_ECHO;
+}
+
+<LINEDIR>{
+	\n		yy_pop_state();
+	[[:digit:]]+	linenum = myctoi( yytext );
+
+	\"[^"\n]*\"	{
+			flex_free( (void *) infilename );
+			infilename = copy_string( yytext + 1 );
+			infilename[strlen( infilename ) - 1] = '\0';
+			}
+	.		/* ignore spurious characters */
+}
+
+<CODEBLOCK>{
+	^"%}".*{NL}	++linenum; BEGIN(INITIAL);
+
+	{NAME}|{NOT_NAME}|.	ACTION_ECHO;
+
+	{NL}		{
+			++linenum;
+			ACTION_ECHO;
+			if ( indented_code )
+				BEGIN(INITIAL);
+			}
+}
+
+
+<PICKUPDEF>{
+	{WS}		/* separates name and definition */
+
+	{NOT_WS}.*	{
+			strcpy( (char *) nmdef, yytext );
+
+			/* Skip trailing whitespace. */
+			for ( i = strlen( (char *) nmdef ) - 1;
+			      i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t');
+			      --i )
+				;
+
+			nmdef[i + 1] = '\0';
+
+			ndinstal( nmstr, nmdef );
+			didadef = true;
+			}
+
+	{NL}		{
+			if ( ! didadef )
+				synerr( _( "incomplete name definition" ) );
+			BEGIN(INITIAL);
+			++linenum;
+			}
+}
+
+
+<OPTION>{
+	{NL}		++linenum; BEGIN(INITIAL);
+	{WS}		option_sense = true;
+
+	"="		return '=';
+
+	no		option_sense = ! option_sense;
+
+	7bit		csize = option_sense ? 128 : 256;
+	8bit		csize = option_sense ? 256 : 128;
+	16bit		csize = option_sense ? 65536 : 256;
+
+	align		long_align = option_sense;
+	always-interactive	{
+			action_define( "YY_ALWAYS_INTERACTIVE", option_sense );
+			}
+	array		yytext_is_array = option_sense;
+	backup		backing_up_report = option_sense;
+	batch		interactive = ! option_sense;
+	"c++"		C_plus_plus = option_sense;
+	caseful|case-sensitive		caseins = ! option_sense;
+	caseless|case-insensitive	caseins = option_sense;
+	debug		ddebug = option_sense;
+	default		spprdflt = ! option_sense;
+	ecs		useecs = option_sense;
+	fast		{
+			useecs = usemecs = false;
+			use_read = fullspd = true;
+			}
+	full		{
+			useecs = usemecs = false;
+			use_read = fulltbl = true;
+			}
+	input		ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
+	interactive	interactive = option_sense;
+	lex-compat	lex_compat = option_sense;
+	main		{
+			action_define( "YY_MAIN", option_sense );
+			do_yywrap = ! option_sense;
+			}
+	meta-ecs	usemecs = option_sense;
+	never-interactive	{
+			action_define( "YY_NEVER_INTERACTIVE", option_sense );
+			}
+	perf-report	performance_report += option_sense ? 1 : -1;
+	pointer		yytext_is_array = ! option_sense;
+	read		use_read = option_sense;
+	reject		reject_really_used = option_sense;
+	stack		action_define( "YY_STACK_USED", option_sense );
+	stdinit		do_stdinit = option_sense;
+	stdout		use_stdout = option_sense;
+	unput		ACTION_IFDEF("YY_NO_UNPUT", ! option_sense);
+	verbose		printstats = option_sense;
+	warn		nowarn = ! option_sense;
+	yylineno	do_yylineno = option_sense;
+	yymore		yymore_really_used = option_sense;
+	yywrap		do_yywrap = option_sense;
+
+	yy_push_state	ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense);
+	yy_pop_state	ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense);
+	yy_top_state	ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense);
+
+	yy_scan_buffer	ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense);
+	yy_scan_bytes	ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense);
+	yy_scan_string	ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense);
+
+	outfile		return OPT_OUTFILE;
+	prefix		return OPT_PREFIX;
+	yyclass		return OPT_YYCLASS;
+
+	\"[^"\n]*\"	{
+			strcpy( nmstr, yytext + 1 );
+			nmstr[strlen( nmstr ) - 1] = '\0';
+			return NAME;
+			}
+
+	(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|.	{
+			format_synerr( _( "unrecognized %%option: %s" ),
+				yytext );
+			BEGIN(RECOVER);
+			}
+}
+
+<RECOVER>.*{NL}		++linenum; BEGIN(INITIAL);
+
+
+<SECT2PROLOG>{
+	^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
+	^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
+
+	^{WS}.*	ACTION_ECHO;	/* indented code in prolog */
+
+	^{NOT_WS}.*	{	/* non-indented code */
+			if ( bracelevel <= 0 )
+				{ /* not in %{ ... %} */
+				yyless( 0 );	/* put it all back */
+				yy_set_bol( 1 );
+				mark_prolog();
+				BEGIN(SECT2);
+				}
+			else
+				ACTION_ECHO;
+			}
+
+	.*		ACTION_ECHO;
+	{NL}	++linenum; ACTION_ECHO;
+
+	<<EOF>>		{
+			mark_prolog();
+			sectnum = 0;
+			yyterminate(); /* to stop the parser */
+			}
+}
+
+<SECT2>{
+	^{OPTWS}{NL}	++linenum; /* allow blank lines in section 2 */
+
+	^{OPTWS}"%{"	{
+			indented_code = false;
+			doing_codeblock = true;
+			bracelevel = 1;
+			BEGIN(PERCENT_BRACE_ACTION);
+			}
+
+	^{OPTWS}"<"	BEGIN(SC); return '<';
+	^{OPTWS}"^"	return '^';
+	\"		BEGIN(QUOTE); return '"';
+	"{"/[[:digit:]]	BEGIN(NUM); return '{';
+	"$"/([[:blank:]]|{NL})	return '$';
+
+	{WS}"%{"		{
+			bracelevel = 1;
+			BEGIN(PERCENT_BRACE_ACTION);
+
+			if ( in_rule )
+				{
+				doing_rule_action = true;
+				in_rule = false;
+				return '\n';
+				}
+			}
+	{WS}"|".*{NL}	continued_action = true; ++linenum; return '\n';
+
+	^{WS}"/*"	{
+			yyless( yyleng - 2 );	/* put back '/', '*' */
+			bracelevel = 0;
+			continued_action = false;
+			BEGIN(ACTION);
+			}
+
+	^{WS}		/* allow indented rules */
+
+	{WS}		{
+			/* This rule is separate from the one below because
+			 * otherwise we get variable trailing context, so
+			 * we can't build the scanner using -{f,F}.
+			 */
+			bracelevel = 0;
+			continued_action = false;
+			BEGIN(ACTION);
+
+			if ( in_rule )
+				{
+				doing_rule_action = true;
+				in_rule = false;
+				return '\n';
+				}
+			}
+
+	{OPTWS}{NL}	{
+			bracelevel = 0;
+			continued_action = false;
+			BEGIN(ACTION);
+			unput( '\n' );	/* so <ACTION> sees it */
+
+			if ( in_rule )
+				{
+				doing_rule_action = true;
+				in_rule = false;
+				return '\n';
+				}
+			}
+
+	^{OPTWS}"<<EOF>>"	|
+	"<<EOF>>"	return EOF_OP;
+
+	^"%%".*		{
+			sectnum = 3;
+			BEGIN(SECT3);
+			yyterminate(); /* to stop the parser */
+			}
+
+	"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*	{
+			int cclval;
+
+			strcpy( nmstr, yytext );
+
+			/* Check to see if we've already encountered this
+			 * ccl.
+			 */
+			if ( (cclval = ccllookup( (Char *) nmstr )) != 0 )
+				{
+				if ( input() != ']' )
+					synerr( _( "bad character class" ) );
+
+				yylval = cclval;
+				++cclreuse;
+				return PREVCCL;
+				}
+			else
+				{
+				/* We fudge a bit.  We know that this ccl will
+				 * soon be numbered as lastccl + 1 by cclinit.
+				 */
+				cclinstal( (Char *) nmstr, lastccl + 1 );
+
+				/* Push back everything but the leading bracket
+				 * so the ccl can be rescanned.
+				 */
+				yyless( 1 );
+
+				BEGIN(FIRSTCCL);
+				return '[';
+				}
+			}
+
+	"{"{NAME}"}"	{
+			register Char *nmdefptr;
+
+			strcpy( nmstr, yytext + 1 );
+			nmstr[yyleng - 2] = '\0';  /* chop trailing brace */
+
+			if ( (nmdefptr = ndlookup( nmstr )) == 0 )
+				format_synerr(
+					_( "undefined definition {%s}" ),
+						nmstr );
+
+			else
+				{ /* push back name surrounded by ()'s */
+				int len = strlen( (char *) nmdefptr );
+
+				if ( lex_compat || nmdefptr[0] == '^' ||
+				     (len > 0 && nmdefptr[len - 1] == '$') )
+					{ /* don't use ()'s after all */
+					PUT_BACK_STRING((char *) nmdefptr, 0);
+
+					if ( nmdefptr[0] == '^' )
+						BEGIN(CARETISBOL);
+					}
+
+				else
+					{
+					unput(')');
+					PUT_BACK_STRING((char *) nmdefptr, 0);
+					unput('(');
+					}
+				}
+			}
+
+	[/|*+?.(){}]	return (unsigned char) yytext[0];
+	.		RETURNCHAR;
+}
+
+
+<SC>{
+	[,*]		return (unsigned char) yytext[0];
+	">"		BEGIN(SECT2); return '>';
+	">"/^		BEGIN(CARETISBOL); return '>';
+	{SCNAME}	RETURNNAME;
+	.		{
+			format_synerr( _( "bad <start condition>: %s" ),
+				yytext );
+			}
+}
+
+<CARETISBOL>"^"		BEGIN(SECT2); return '^';
+
+
+<QUOTE>{
+	[^"\n]		RETURNCHAR;
+	\"		BEGIN(SECT2); return '"';
+
+	{NL}		{
+			synerr( _( "missing quote" ) );
+			BEGIN(SECT2);
+			++linenum;
+			return '"';
+			}
+}
+
+
+<FIRSTCCL>{
+	"^"/[^-\]\n]	BEGIN(CCL); return '^';
+	"^"/("-"|"]")	return '^';
+	.		BEGIN(CCL); RETURNCHAR;
+}
+
+<CCL>{
+	-/[^\]\n]	return '-';
+	[^\]\n]		RETURNCHAR;
+	"]"		BEGIN(SECT2); return ']';
+	.|{NL}		{
+			synerr( _( "bad character class" ) );
+			BEGIN(SECT2);
+			return ']';
+			}
+}
+
+<FIRSTCCL,CCL>{
+	"[:alnum:]"	BEGIN(CCL); return CCE_ALNUM;
+	"[:alpha:]"	BEGIN(CCL); return CCE_ALPHA;
+	"[:blank:]"	BEGIN(CCL); return CCE_BLANK;
+	"[:cntrl:]"	BEGIN(CCL); return CCE_CNTRL;
+	"[:digit:]"	BEGIN(CCL); return CCE_DIGIT;
+	"[:graph:]"	BEGIN(CCL); return CCE_GRAPH;
+	"[:lower:]"	BEGIN(CCL); return CCE_LOWER;
+	"[:print:]"	BEGIN(CCL); return CCE_PRINT;
+	"[:punct:]"	BEGIN(CCL); return CCE_PUNCT;
+	"[:space:]"	BEGIN(CCL); return CCE_SPACE;
+	"[:upper:]"	BEGIN(CCL); return CCE_UPPER;
+	"[:xdigit:]"	BEGIN(CCL); return CCE_XDIGIT;
+	{CCL_EXPR}	{
+			format_synerr(
+				_( "bad character class expression: %s" ),
+					yytext );
+			BEGIN(CCL); return CCE_ALNUM;
+			}
+}
+
+<NUM>{
+	[[:digit:]]+	{
+			yylval = myctoi( yytext );
+			return NUMBER;
+			}
+
+	","		return ',';
+	"}"		BEGIN(SECT2); return '}';
+
+	.		{
+			synerr( _( "bad character inside {}'s" ) );
+			BEGIN(SECT2);
+			return '}';
+			}
+
+	{NL}		{
+			synerr( _( "missing }" ) );
+			BEGIN(SECT2);
+			++linenum;
+			return '}';
+			}
+}
+
+
+<PERCENT_BRACE_ACTION>{
+	{OPTWS}"%}".*		bracelevel = 0;
+
+	<ACTION>"/*"		ACTION_ECHO; yy_push_state( COMMENT );
+
+	<CODEBLOCK,ACTION>{
+		"reject"	{
+			ACTION_ECHO;
+			CHECK_REJECT(yytext);
+			}
+		"yymore"	{
+			ACTION_ECHO;
+			CHECK_YYMORE(yytext);
+			}
+	}
+
+	{NAME}|{NOT_NAME}|.	ACTION_ECHO;
+	{NL}		{
+			++linenum;
+			ACTION_ECHO;
+			if ( bracelevel == 0 ||
+			     (doing_codeblock && indented_code) )
+				{
+				if ( doing_rule_action )
+					add_action( "\tYY_BREAK\n" );
+
+				doing_rule_action = doing_codeblock = false;
+				BEGIN(SECT2);
+				}
+			}
+}
+
+
+	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
+<ACTION>{
+	"{"		ACTION_ECHO; ++bracelevel;
+	"}"		ACTION_ECHO; --bracelevel;
+	[^[:alpha:]_{}"'/\n]+	ACTION_ECHO;
+	{NAME}		ACTION_ECHO;
+	"'"([^'\\\n]|\\.)*"'"	ACTION_ECHO; /* character constant */
+	\"		ACTION_ECHO; BEGIN(ACTION_STRING);
+	{NL}		{
+			++linenum;
+			ACTION_ECHO;
+			if ( bracelevel == 0 )
+				{
+				if ( doing_rule_action )
+					add_action( "\tYY_BREAK\n" );
+
+				doing_rule_action = false;
+				BEGIN(SECT2);
+				}
+			}
+	.		ACTION_ECHO;
+}
+
+<ACTION_STRING>{
+	[^"\\\n]+	ACTION_ECHO;
+	\\.		ACTION_ECHO;
+	{NL}		++linenum; ACTION_ECHO;
+	\"		ACTION_ECHO; BEGIN(ACTION);
+	.		ACTION_ECHO;
+}
+
+<COMMENT,ACTION,ACTION_STRING><<EOF>>	{
+			synerr( _( "EOF encountered inside an action" ) );
+			yyterminate();
+			}
+
+
+<SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}	{
+			yylval = myesc( (Char *) yytext );
+
+			if ( YY_START == FIRSTCCL )
+				BEGIN(CCL);
+
+			return CHAR;
+			}
+
+
+<SECT3>{
+	.*(\n?)		ECHO;
+	<<EOF>>		sectnum = 0; yyterminate();
+}
+
+<*>.|\n			format_synerr( _( "bad character: %s" ), yytext );
+
+%%
+
+
+int yywrap()
+	{
+	if ( --num_input_files > 0 )
+		{
+		set_input_file( *++input_files );
+		return 0;
+		}
+
+	else
+		return 1;
+	}
+
+
+/* set_input_file - open the given file (if NULL, stdin) for scanning */
+
+void set_input_file( file )
+char *file;
+	{
+	if ( file && strcmp( file, "-" ) )
+		{
+		infilename = copy_string( file );
+		yyin = fopen( infilename, "r" );
+
+		if ( yyin == NULL )
+			lerrsf( _( "can't open %s" ), file );
+		}
+
+	else
+		{
+		yyin = stdin;
+		infilename = copy_string( "<stdin>" );
+		}
+
+	linenum = 1;
+	}
+
+
+/* Wrapper routines for accessing the scanner's malloc routines. */
+
+void *flex_alloc( size )
+size_t size;
+	{
+	return (void *) malloc( size );
+	}
+
+void *flex_realloc( ptr, size )
+void *ptr;
+size_t size;
+	{
+	return (void *) realloc( ptr, size );
+	}
+
+void flex_free( ptr )
+void *ptr;
+	{
+	if ( ptr )
+		free( ptr );
+	}
--- a/to.do/unicode/tblcmp.c
+++ b/to.do/unicode/tblcmp.c
@ -0,0 +1,887 @@
+/* tblcmp - table compression routines */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Vern Paxson.
+ * 
+ * The United States Government has rights in this work pursuant
+ * to contract no. DE-AC03-76SF00098 between the United States
+ * Department of Energy and the University of California.
+ *
+ * Redistribution and use in source and binary forms are permitted provided
+ * that: (1) source distributions retain this entire copyright notice and
+ * comment, and (2) distributions including binaries display the following
+ * acknowledgement:  ``This product includes software developed by the
+ * University of California, Berkeley and its contributors'' in the
+ * documentation or other materials provided with the distribution and in
+ * all advertising materials mentioning features or use of this software.
+ * Neither the name of the University nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+/* $Header$ */
+
+#include "flexdef.h"
+
+
+/* declarations for functions that have forward references */
+
+void mkentry PROTO((register int*, int, int, int, int));
+void mkprot PROTO((int[], int, int));
+void mktemplate PROTO((int[], int, int));
+void mv2front PROTO((int));
+int tbldiff PROTO((int[], int, int[]));
+
+
+/* bldtbl - build table entries for dfa state
+ *
+ * synopsis
+ *   int state[numecs], statenum, totaltrans, comstate, comfreq;
+ *   bldtbl( state, statenum, totaltrans, comstate, comfreq );
+ *
+ * State is the statenum'th dfa state.  It is indexed by equivalence class and
+ * gives the number of the state to enter for a given equivalence class.
+ * totaltrans is the total number of transitions out of the state.  Comstate
+ * is that state which is the destination of the most transitions out of State.
+ * Comfreq is how many transitions there are out of State to Comstate.
+ *
+ * A note on terminology:
+ *    "protos" are transition tables which have a high probability of
+ * either being redundant (a state processed later will have an identical
+ * transition table) or nearly redundant (a state processed later will have
+ * many of the same out-transitions).  A "most recently used" queue of
+ * protos is kept around with the hope that most states will find a proto
+ * which is similar enough to be usable, and therefore compacting the
+ * output tables.
+ *    "templates" are a special type of proto.  If a transition table is
+ * homogeneous or nearly homogeneous (all transitions go to the same
+ * destination) then the odds are good that future states will also go
+ * to the same destination state on basically the same character set.
+ * These homogeneous states are so common when dealing with large rule
+ * sets that they merit special attention.  If the transition table were
+ * simply made into a proto, then (typically) each subsequent, similar
+ * state will differ from the proto for two out-transitions.  One of these
+ * out-transitions will be that character on which the proto does not go
+ * to the common destination, and one will be that character on which the
+ * state does not go to the common destination.  Templates, on the other
+ * hand, go to the common state on EVERY transition character, and therefore
+ * cost only one difference.
+ */
+
+void bldtbl( state, statenum, totaltrans, comstate, comfreq )
+int state[], statenum, totaltrans, comstate, comfreq;
+	{
+	int extptr, extrct[2][CSIZE + 1];
+	int mindiff, minprot, i, d;
+
+	/* If extptr is 0 then the first array of extrct holds the result
+	 * of the "best difference" to date, which is those transitions
+	 * which occur in "state" but not in the proto which, to date,
+	 * has the fewest differences between itself and "state".  If
+	 * extptr is 1 then the second array of extrct hold the best
+	 * difference.  The two arrays are toggled between so that the
+	 * best difference to date can be kept around and also a difference
+	 * just created by checking against a candidate "best" proto.
+	 */
+
+	extptr = 0;
+
+	/* If the state has too few out-transitions, don't bother trying to
+	 * compact its tables.
+	 */
+
+	if ( (totaltrans * 100) < (numecs * PROTO_SIZE_PERCENTAGE) )
+		mkentry( state, numecs, statenum, JAMSTATE, totaltrans );
+
+	else
+		{
+		/* "checkcom" is true if we should only check "state" against
+		 * protos which have the same "comstate" value.
+		 */
+		int checkcom =
+			comfreq * 100 > totaltrans * CHECK_COM_PERCENTAGE;
+
+		minprot = firstprot;
+		mindiff = totaltrans;
+
+		if ( checkcom )
+			{
+			/* Find first proto which has the same "comstate". */
+			for ( i = firstprot; i != NIL; i = protnext[i] )
+				if ( protcomst[i] == comstate )
+					{
+					minprot = i;
+					mindiff = tbldiff( state, minprot,
+							extrct[extptr] );
+					break;
+					}
+			}
+
+		else
+			{
+			/* Since we've decided that the most common destination
+			 * out of "state" does not occur with a high enough
+			 * frequency, we set the "comstate" to zero, assuring
+			 * that if this state is entered into the proto list,
+			 * it will not be considered a template.
+			 */
+			comstate = 0;
+
+			if ( firstprot != NIL )
+				{
+				minprot = firstprot;
+				mindiff = tbldiff( state, minprot,
+						extrct[extptr] );
+				}
+			}
+
+		/* We now have the first interesting proto in "minprot".  If
+		 * it matches within the tolerances set for the first proto,
+		 * we don't want to bother scanning the rest of the proto list
+		 * to see if we have any other reasonable matches.
+		 */
+
+		if ( mindiff * 100 > totaltrans * FIRST_MATCH_DIFF_PERCENTAGE )
+			{
+			/* Not a good enough match.  Scan the rest of the
+			 * protos.
+			 */
+			for ( i = minprot; i != NIL; i = protnext[i] )
+				{
+				d = tbldiff( state, i, extrct[1 - extptr] );
+				if ( d < mindiff )
+					{
+					extptr = 1 - extptr;
+					mindiff = d;
+					minprot = i;
+					}
+				}
+			}
+
+		/* Check if the proto we've decided on as our best bet is close
+		 * enough to the state we want to match to be usable.
+		 */
+
+		if ( mindiff * 100 > totaltrans * ACCEPTABLE_DIFF_PERCENTAGE )
+			{
+			/* No good.  If the state is homogeneous enough,
+			 * we make a template out of it.  Otherwise, we
+			 * make a proto.
+			 */
+
+			if ( comfreq * 100 >=
+			     totaltrans * TEMPLATE_SAME_PERCENTAGE )
+				mktemplate( state, statenum, comstate );
+
+			else
+				{
+				mkprot( state, statenum, comstate );
+				mkentry( state, numecs, statenum,
+					JAMSTATE, totaltrans );
+				}
+			}
+
+		else
+			{ /* use the proto */
+			mkentry( extrct[extptr], numecs, statenum,
+				prottbl[minprot], mindiff );
+
+			/* If this state was sufficiently different from the
+			 * proto we built it from, make it, too, a proto.
+			 */
+
+			if ( mindiff * 100 >=
+			     totaltrans * NEW_PROTO_DIFF_PERCENTAGE )
+				mkprot( state, statenum, comstate );
+
+			/* Since mkprot added a new proto to the proto queue,
+			 * it's possible that "minprot" is no longer on the
+			 * proto queue (if it happened to have been the last
+			 * entry, it would have been bumped off).  If it's
+			 * not there, then the new proto took its physical
+			 * place (though logically the new proto is at the
+			 * beginning of the queue), so in that case the
+			 * following call will do nothing.
+			 */
+
+			mv2front( minprot );
+			}
+		}
+	}
+
+
+/* cmptmps - compress template table entries
+ *
+ * Template tables are compressed by using the 'template equivalence
+ * classes', which are collections of transition character equivalence
+ * classes which always appear together in templates - really meta-equivalence
+ * classes.
+ */
+
+void cmptmps()
+	{
+	int tmpstorage[CSIZE + 1];
+	register int *tmp = tmpstorage, i, j;
+	int totaltrans, trans;
+
+	peakpairs = numtemps * numecs + tblend;
+
+	if ( usemecs )
+		{
+		/* Create equivalence classes based on data gathered on
+		 * template transitions.
+		 */
+		nummecs = cre8ecs( tecfwd, tecbck, numecs );
+		}
+
+	else
+		nummecs = numecs;
+
+	while ( lastdfa + numtemps + 1 >= current_max_dfas )
+		increase_max_dfas();
+
+	/* Loop through each template. */
+
+	for ( i = 1; i <= numtemps; ++i )
+		{
+		/* Number of non-jam transitions out of this template. */
+		totaltrans = 0;
+
+		for ( j = 1; j <= numecs; ++j )
+			{
+			trans = tnxt[numecs * i + j];
+
+			if ( usemecs )
+				{
+				/* The absolute value of tecbck is the
+				 * meta-equivalence class of a given
+				 * equivalence class, as set up by cre8ecs().
+				 */
+				if ( tecbck[j] > 0 )
+					{
+					tmp[tecbck[j]] = trans;
+
+					if ( trans > 0 )
+						++totaltrans;
+					}
+				}
+
+			else
+				{
+				tmp[j] = trans;
+
+				if ( trans > 0 )
+					++totaltrans;
+				}
+			}
+
+		/* It is assumed (in a rather subtle way) in the skeleton
+		 * that if we're using meta-equivalence classes, the def[]
+		 * entry for all templates is the jam template, i.e.,
+		 * templates never default to other non-jam table entries
+		 * (e.g., another template)
+		 */
+
+		/* Leave room for the jam-state after the last real state. */
+		mkentry( tmp, nummecs, lastdfa + i + 1, JAMSTATE, totaltrans );
+		}
+	}
+
+
+
+/* expand_nxt_chk - expand the next check arrays */
+
+void expand_nxt_chk()
+	{
+	register int old_max = current_max_xpairs;
+
+	current_max_xpairs += MAX_XPAIRS_INCREMENT;
+
+	++num_reallocs;
+
+	nxt = reallocate_integer_array( nxt, current_max_xpairs );
+	chk = reallocate_integer_array( chk, current_max_xpairs );
+
+	zero_out( (char *) (chk + old_max),
+		(size_t) (MAX_XPAIRS_INCREMENT * sizeof( int )) );
+	}
+
+
+/* find_table_space - finds a space in the table for a state to be placed
+ *
+ * synopsis
+ *     int *state, numtrans, block_start;
+ *     int find_table_space();
+ *
+ *     block_start = find_table_space( state, numtrans );
+ *
+ * State is the state to be added to the full speed transition table.
+ * Numtrans is the number of out-transitions for the state.
+ *
+ * find_table_space() returns the position of the start of the first block (in
+ * chk) able to accommodate the state
+ *
+ * In determining if a state will or will not fit, find_table_space() must take
+ * into account the fact that an end-of-buffer state will be added at [0],
+ * and an action number will be added in [-1].
+ */
+
+int find_table_space( state, numtrans )
+int *state, numtrans;
+	{
+	/* Firstfree is the position of the first possible occurrence of two
+	 * consecutive unused records in the chk and nxt arrays.
+	 */
+	register int i;
+	register int *state_ptr, *chk_ptr;
+	register int *ptr_to_last_entry_in_state;
+
+	/* If there are too many out-transitions, put the state at the end of
+	 * nxt and chk.
+	 */
+	if ( numtrans > MAX_XTIONS_FULL_INTERIOR_FIT )
+		{
+		/* If table is empty, return the first available spot in
+		 * chk/nxt, which should be 1.
+		 */
+		if ( tblend < 2 )
+			return 1;
+
+		/* Start searching for table space near the end of
+		 * chk/nxt arrays.
+		 */
+		i = tblend - numecs;
+		}
+
+	else
+		/* Start searching for table space from the beginning
+		 * (skipping only the elements which will definitely not
+		 * hold the new state).
+		 */
+		i = firstfree;
+
+	while ( 1 )	/* loops until a space is found */
+		{
+		while ( i + numecs >= current_max_xpairs )
+			expand_nxt_chk();
+
+		/* Loops until space for end-of-buffer and action number
+		 * are found.
+		 */
+		while ( 1 )
+			{
+			/* Check for action number space. */
+			if ( chk[i - 1] == 0 )
+				{
+				/* Check for end-of-buffer space. */
+				if ( chk[i] == 0 )
+					break;
+
+				else
+					/* Since i != 0, there is no use
+					 * checking to see if (++i) - 1 == 0,
+					 * because that's the same as i == 0,
+					 * so we skip a space.
+					 */
+					i += 2;
+				}
+
+			else
+				++i;
+
+			while ( i + numecs >= current_max_xpairs )
+				expand_nxt_chk();
+			}
+
+		/* If we started search from the beginning, store the new
+		 * firstfree for the next call of find_table_space().
+		 */
+		if ( numtrans <= MAX_XTIONS_FULL_INTERIOR_FIT )
+			firstfree = i + 1;
+
+		/* Check to see if all elements in chk (and therefore nxt)
+		 * that are needed for the new state have not yet been taken.
+		 */
+
+		state_ptr = &state[1];
+		ptr_to_last_entry_in_state = &chk[i + numecs + 1];
+
+		for ( chk_ptr = &chk[i + 1];
+		      chk_ptr != ptr_to_last_entry_in_state; ++chk_ptr )
+			if ( *(state_ptr++) != 0 && *chk_ptr != 0 )
+				break;
+
+		if ( chk_ptr == ptr_to_last_entry_in_state )
+			return i;
+
+		else
+		++i;
+		}
+	}
+
+
+/* inittbl - initialize transition tables
+ *
+ * Initializes "firstfree" to be one beyond the end of the table.  Initializes
+ * all "chk" entries to be zero.
+ */
+void inittbl()
+	{
+	register int i;
+
+	zero_out( (char *) chk, (size_t) (current_max_xpairs * sizeof( int )) );
+
+	tblend = 0;
+	firstfree = tblend + 1;
+	numtemps = 0;
+
+	if ( usemecs )
+		{
+		/* Set up doubly-linked meta-equivalence classes; these
+		 * are sets of equivalence classes which all have identical
+		 * transitions out of TEMPLATES.
+		 */
+
+		tecbck[1] = NIL;
+
+		for ( i = 2; i <= numecs; ++i )
+			{
+			tecbck[i] = i - 1;
+			tecfwd[i - 1] = i;
+			}
+
+		tecfwd[numecs] = NIL;
+		}
+	}
+
+
+/* mkdeftbl - make the default, "jam" table entries */
+
+void mkdeftbl()
+	{
+	int i;
+
+	jamstate = lastdfa + 1;
+
+	++tblend; /* room for transition on end-of-buffer character */
+
+	while ( tblend + numecs >= current_max_xpairs )
+		expand_nxt_chk();
+
+	/* Add in default end-of-buffer transition. */
+	nxt[tblend] = end_of_buffer_state;
+	chk[tblend] = jamstate;
+
+	for ( i = 1; i <= numecs; ++i )
+		{
+		nxt[tblend + i] = 0;
+		chk[tblend + i] = jamstate;
+		}
+
+	jambase = tblend;
+
+	base[jamstate] = jambase;
+	def[jamstate] = 0;
+
+	tblend += numecs;
+	++numtemps;
+	}
+
+
+/* mkentry - create base/def and nxt/chk entries for transition array
+ *
+ * synopsis
+ *   int state[numchars + 1], numchars, statenum, deflink, totaltrans;
+ *   mkentry( state, numchars, statenum, deflink, totaltrans );
+ *
+ * "state" is a transition array "numchars" characters in size, "statenum"
+ * is the offset to be used into the base/def tables, and "deflink" is the
+ * entry to put in the "def" table entry.  If "deflink" is equal to
+ * "JAMSTATE", then no attempt will be made to fit zero entries of "state"
+ * (i.e., jam entries) into the table.  It is assumed that by linking to
+ * "JAMSTATE" they will be taken care of.  In any case, entries in "state"
+ * marking transitions to "SAME_TRANS" are treated as though they will be
+ * taken care of by whereever "deflink" points.  "totaltrans" is the total
+ * number of transitions out of the state.  If it is below a certain threshold,
+ * the tables are searched for an interior spot that will accommodate the
+ * state array.
+ */
+
+void mkentry( state, numchars, statenum, deflink, totaltrans )
+register int *state;
+int numchars, statenum, deflink, totaltrans;
+	{
+	register int minec, maxec, i, baseaddr;
+	int tblbase, tbllast;
+
+	if ( totaltrans == 0 )
+		{ /* there are no out-transitions */
+		if ( deflink == JAMSTATE )
+			base[statenum] = JAMSTATE;
+		else
+			base[statenum] = 0;
+
+		def[statenum] = deflink;
+		return;
+		}
+
+	for ( minec = 1; minec <= numchars; ++minec )
+		{
+		if ( state[minec] != SAME_TRANS )
+			if ( state[minec] != 0 || deflink != JAMSTATE )
+				break;
+		}
+
+	if ( totaltrans == 1 )
+		{
+		/* There's only one out-transition.  Save it for later to fill
+		 * in holes in the tables.
+		 */
+		stack1( statenum, minec, state[minec], deflink );
+		return;
+		}
+
+	for ( maxec = numchars; maxec > 0; --maxec )
+		{
+		if ( state[maxec] != SAME_TRANS )
+			if ( state[maxec] != 0 || deflink != JAMSTATE )
+				break;
+		}
+
+	/* Whether we try to fit the state table in the middle of the table
+	 * entries we have already generated, or if we just take the state
+	 * table at the end of the nxt/chk tables, we must make sure that we
+	 * have a valid base address (i.e., non-negative).  Note that
+	 * negative base addresses dangerous at run-time (because indexing
+	 * the nxt array with one and a low-valued character will access
+	 * memory before the start of the array.
+	 */
+
+	/* Find the first transition of state that we need to worry about. */
+	if ( totaltrans * 100 <= numchars * INTERIOR_FIT_PERCENTAGE )
+		{
+		/* Attempt to squeeze it into the middle of the tables. */
+		baseaddr = firstfree;
+
+		while ( baseaddr < minec )
+			{
+			/* Using baseaddr would result in a negative base
+			 * address below; find the next free slot.
+			 */
+			for ( ++baseaddr; chk[baseaddr] != 0; ++baseaddr )
+				;
+			}
+
+		while ( baseaddr + maxec - minec + 1 >= current_max_xpairs )
+			expand_nxt_chk();
+
+		for ( i = minec; i <= maxec; ++i )
+			if ( state[i] != SAME_TRANS &&
+			     (state[i] != 0 || deflink != JAMSTATE) &&
+			     chk[baseaddr + i - minec] != 0 )
+				{ /* baseaddr unsuitable - find another */
+				for ( ++baseaddr;
+				      baseaddr < current_max_xpairs &&
+				      chk[baseaddr] != 0; ++baseaddr )
+					;
+
+				while ( baseaddr + maxec - minec + 1 >=
+					current_max_xpairs )
+					expand_nxt_chk();
+
+				/* Reset the loop counter so we'll start all
+				 * over again next time it's incremented.
+				 */
+
+				i = minec - 1;
+				}
+		}
+
+	else
+		{
+		/* Ensure that the base address we eventually generate is
+		 * non-negative.
+		 */
+		baseaddr = MAX( tblend + 1, minec );
+		}
+
+	tblbase = baseaddr - minec;
+	tbllast = tblbase + maxec;
+
+	while ( tbllast + 1 >= current_max_xpairs )
+		expand_nxt_chk();
+
+	base[statenum] = tblbase;
+	def[statenum] = deflink;
+
+	for ( i = minec; i <= maxec; ++i )
+		if ( state[i] != SAME_TRANS )
+			if ( state[i] != 0 || deflink != JAMSTATE )
+				{
+				nxt[tblbase + i] = state[i];
+				chk[tblbase + i] = statenum;
+				}
+
+	if ( baseaddr == firstfree )
+		/* Find next free slot in tables. */
+		for ( ++firstfree; chk[firstfree] != 0; ++firstfree )
+			;
+
+	tblend = MAX( tblend, tbllast );
+	}
+
+
+/* mk1tbl - create table entries for a state (or state fragment) which
+ *            has only one out-transition
+ */
+
+void mk1tbl( state, sym, onenxt, onedef )
+int state, sym, onenxt, onedef;
+	{
+	if ( firstfree < sym )
+		firstfree = sym;
+
+	while ( chk[firstfree] != 0 )
+		if ( ++firstfree >= current_max_xpairs )
+			expand_nxt_chk();
+
+	base[state] = firstfree - sym;
+	def[state] = onedef;
+	chk[firstfree] = state;
+	nxt[firstfree] = onenxt;
+
+	if ( firstfree > tblend )
+		{
+		tblend = firstfree++;
+
+		if ( firstfree >= current_max_xpairs )
+			expand_nxt_chk();
+		}
+	}
+
+
+/* mkprot - create new proto entry */
+
+void mkprot( state, statenum, comstate )
+int state[], statenum, comstate;
+	{
+	int i, slot, tblbase;
+
+	if ( ++numprots >= MSP || numecs * numprots >= PROT_SAVE_SIZE )
+		{
+		/* Gotta make room for the new proto by dropping last entry in
+		 * the queue.
+		 */
+		slot = lastprot;
+		lastprot = protprev[lastprot];
+		protnext[lastprot] = NIL;
+		}
+
+	else
+		slot = numprots;
+
+	protnext[slot] = firstprot;
+
+	if ( firstprot != NIL )
+		protprev[firstprot] = slot;
+
+	firstprot = slot;
+	prottbl[slot] = statenum;
+	protcomst[slot] = comstate;
+
+	/* Copy state into save area so it can be compared with rapidly. */
+	tblbase = numecs * (slot - 1);
+
+	for ( i = 1; i <= numecs; ++i )
+		protsave[tblbase + i] = state[i];
+	}
+
+
+/* mktemplate - create a template entry based on a state, and connect the state
+ *              to it
+ */
+
+void mktemplate( state, statenum, comstate )
+int state[], statenum, comstate;
+	{
+	int i, numdiff, tmpbase, tmp[CSIZE + 1];
+	wchar_t transset[CSIZE + 1];
+	int tsptr;
+
+	++numtemps;
+
+	tsptr = 0;
+
+	/* Calculate where we will temporarily store the transition table
+	 * of the template in the tnxt[] array.  The final transition table
+	 * gets created by cmptmps().
+	 */
+
+	tmpbase = numtemps * numecs;
+
+	if ( tmpbase + numecs >= current_max_template_xpairs )
+		{
+		current_max_template_xpairs += MAX_TEMPLATE_XPAIRS_INCREMENT;
+
+		++num_reallocs;
+
+		tnxt = reallocate_integer_array( tnxt,
+			current_max_template_xpairs );
+		}
+
+	for ( i = 1; i <= numecs; ++i )
+		if ( state[i] == 0 )
+			tnxt[tmpbase + i] = 0;
+		else
+			{
+			transset[tsptr++] = i;
+			tnxt[tmpbase + i] = comstate;
+			}
+
+	if ( usemecs )
+		mkeccl( transset, tsptr, tecfwd, tecbck, numecs, 0 );
+
+	mkprot( tnxt + tmpbase, -numtemps, comstate );
+
+	/* We rely on the fact that mkprot adds things to the beginning
+	 * of the proto queue.
+	 */
+
+	numdiff = tbldiff( state, firstprot, tmp );
+	mkentry( tmp, numecs, statenum, -numtemps, numdiff );
+	}
+
+
+/* mv2front - move proto queue element to front of queue */
+
+void mv2front( qelm )
+int qelm;
+	{
+	if ( firstprot != qelm )
+		{
+		if ( qelm == lastprot )
+			lastprot = protprev[lastprot];
+
+		protnext[protprev[qelm]] = protnext[qelm];
+
+		if ( protnext[qelm] != NIL )
+			protprev[protnext[qelm]] = protprev[qelm];
+
+		protprev[qelm] = NIL;
+		protnext[qelm] = firstprot;
+		protprev[firstprot] = qelm;
+		firstprot = qelm;
+		}
+	}
+
+
+/* place_state - place a state into full speed transition table
+ *
+ * State is the statenum'th state.  It is indexed by equivalence class and
+ * gives the number of the state to enter for a given equivalence class.
+ * Transnum is the number of out-transitions for the state.
+ */
+
+void place_state( state, statenum, transnum )
+int *state, statenum, transnum;
+	{
+	register int i;
+	register int *state_ptr;
+	int position = find_table_space( state, transnum );
+
+	/* "base" is the table of start positions. */
+	base[statenum] = position;
+
+	/* Put in action number marker; this non-zero number makes sure that
+	 * find_table_space() knows that this position in chk/nxt is taken
+	 * and should not be used for another accepting number in another
+	 * state.
+	 */
+	chk[position - 1] = 1;
+
+	/* Put in end-of-buffer marker; this is for the same purposes as
+	 * above.
+	 */
+	chk[position] = 1;
+
+	/* Place the state into chk and nxt. */
+	state_ptr = &state[1];
+
+	for ( i = 1; i <= numecs; ++i, ++state_ptr )
+		if ( *state_ptr != 0 )
+			{
+			chk[position + i] = i;
+			nxt[position + i] = *state_ptr;
+			}
+
+	if ( position + numecs > tblend )
+		tblend = position + numecs;
+	}
+
+
+/* stack1 - save states with only one out-transition to be processed later
+ *
+ * If there's room for another state on the "one-transition" stack, the
+ * state is pushed onto it, to be processed later by mk1tbl.  If there's
+ * no room, we process the sucker right now.
+ */
+
+void stack1( statenum, sym, nextstate, deflink )
+int statenum, sym, nextstate, deflink;
+	{
+	if ( onesp >= ONE_STACK_SIZE - 1 )
+		mk1tbl( statenum, sym, nextstate, deflink );
+
+	else
+		{
+		++onesp;
+		onestate[onesp] = statenum;
+		onesym[onesp] = sym;
+		onenext[onesp] = nextstate;
+		onedef[onesp] = deflink;
+		}
+	}
+
+
+/* tbldiff - compute differences between two state tables
+ *
+ * "state" is the state array which is to be extracted from the pr'th
+ * proto.  "pr" is both the number of the proto we are extracting from
+ * and an index into the save area where we can find the proto's complete
+ * state table.  Each entry in "state" which differs from the corresponding
+ * entry of "pr" will appear in "ext".
+ *
+ * Entries which are the same in both "state" and "pr" will be marked
+ * as transitions to "SAME_TRANS" in "ext".  The total number of differences
+ * between "state" and "pr" is returned as function value.  Note that this
+ * number is "numecs" minus the number of "SAME_TRANS" entries in "ext".
+ */
+
+int tbldiff( state, pr, ext )
+int state[], pr, ext[];
+	{
+	register int i, *sp = state, *ep = ext, *protp;
+	register int numdiff = 0;
+
+	protp = &protsave[numecs * (pr - 1)];
+
+	for ( i = numecs; i > 0; --i )
+		{
+		if ( *++protp == *++sp )
+			*++ep = SAME_TRANS;
+		else
+			{
+			*++ep = *sp;
+			++numdiff;
+			}
+		}
+
+	return numdiff;
+	}