mirror of
https://github.com/ThomasDickey/mawk-snapshots.git
synced 2026-01-27 03:14:29 +00:00
1235 lines
24 KiB
C
1235 lines
24 KiB
C
/********************************************
|
|
scan.c
|
|
copyright 2008-2023,2024, Thomas E. Dickey
|
|
copyright 2010, Jonathan Nieder
|
|
copyright 1991-1996,2014, Michael D. Brennan
|
|
|
|
This is a source file for mawk, an implementation of
|
|
the AWK programming language.
|
|
|
|
Mawk is distributed without warranty under the terms of
|
|
the GNU General Public License, version 2, 1991.
|
|
********************************************/
|
|
|
|
/*
|
|
* $MawkId: scan.c,v 1.58 2024/07/26 00:19:17 tom Exp $
|
|
*/
|
|
|
|
#include <mawk.h>
|
|
#include <scan.h>
|
|
#include <memory.h>
|
|
#include <field.h>
|
|
#include <init.h>
|
|
#include <fin.h>
|
|
#include <repl.h>
|
|
#include <code.h>
|
|
|
|
#ifdef HAVE_FCNTL_H
|
|
#include <fcntl.h>
|
|
#endif
|
|
|
|
#include <files.h>
|
|
|
|
#define CHR_LPAREN '('
|
|
#define CHR_RPAREN ')'
|
|
|
|
#define STR_LBRACE "{"
|
|
#define STR_RBRACE "}"
|
|
|
|
#define ct_ret(x) do { current_token = (x); return scan_scope(current_token); } while (0)
|
|
|
|
#define next() (*buffp ? *buffp++ : slow_next())
|
|
#define un_next() buffp--
|
|
|
|
#define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\
|
|
else { un_next() ; ct_ret(d) ; }
|
|
|
|
#define test2_ret(c1,x1,c2,x2,d) switch( next() )\
|
|
{ case c1: ct_ret(x1) ;\
|
|
case c2: ct_ret(x2) ;\
|
|
default: un_next() ;\
|
|
ct_ret(d) ; }
|
|
double double_zero = 0.0;
|
|
double double_one = 1.0;
|
|
|
|
/* static functions */
|
|
static void scan_fillbuff(void);
|
|
static void scan_open(void);
|
|
static int slow_next(void);
|
|
static void eat_comment(void);
|
|
static double collect_decimal(int, int *);
|
|
static int collect_string(void);
|
|
static int collect_RE(void);
|
|
|
|
/*-----------------------------
|
|
program file management
|
|
*----------------------------*/
|
|
|
|
char *pfile_name;
|
|
PFILE *pfile_list;
|
|
|
|
static STRING *program_string;
|
|
static UChar *buffer;
|
|
static UChar *buffp;
|
|
/* unsigned so it works with 8 bit chars */
|
|
static int program_fd;
|
|
static int eof_flag;
|
|
|
|
/*
|
|
* Data for scan_scope()
|
|
*/
|
|
#define MAX_REPAIR 10
|
|
static SYMTAB *current_symbol;
|
|
static SYMTAB *current_funct;
|
|
|
|
typedef enum {
|
|
ssDEFAULT = 0
|
|
,ssHEADER
|
|
,ssFUNCTN
|
|
,ssLPAREN
|
|
,ssRPAREN
|
|
,ssLBRACE
|
|
,ssRBRACE
|
|
} SCAN_SCOPE;
|
|
|
|
static SCAN_SCOPE current_scope;
|
|
static int braces_level;
|
|
static int max_repair;
|
|
static struct {
|
|
SYMTAB *stp;
|
|
SYM_TYPE type;
|
|
} repair_syms[MAX_REPAIR];
|
|
|
|
/* use unsigned chars for index into scan_code[] */
|
|
#define NextUChar(c) (UChar)(c = (char) next())
|
|
|
|
/* overused tmp buffer */
|
|
char string_buff[SPRINTF_LIMIT];
|
|
|
|
static void
|
|
string_too_long(void)
|
|
{
|
|
compile_error("string too long \"%.10s ...", string_buff);
|
|
mawk_exit(2);
|
|
}
|
|
|
|
#define CheckStringSize(ptr) \
|
|
if ((size_t)((ptr) - string_buff) >= sizeof(string_buff)) \
|
|
string_too_long()
|
|
|
|
void
|
|
scan_init(const char *cmdline_program)
|
|
{
|
|
if (cmdline_program) {
|
|
program_fd = -1; /* command line program */
|
|
program_string = new_STRING0(strlen(cmdline_program) + 1);
|
|
strcpy(program_string->str, cmdline_program);
|
|
/* simulate file termination */
|
|
program_string->str[program_string->len - 1] = '\n';
|
|
buffp = (UChar *) program_string->str;
|
|
eof_flag = 1;
|
|
} else { /* program from file[s] */
|
|
scan_open();
|
|
buffp = buffer = (UChar *) zmalloc((size_t) (BUFFSZ + 1));
|
|
scan_fillbuff();
|
|
}
|
|
|
|
#ifdef OS2 /* OS/2 "extproc" is similar to #! */
|
|
if (strnicmp(buffp, "extproc ", 8) == 0)
|
|
eat_comment();
|
|
#endif
|
|
eat_nl(); /* scan to first token */
|
|
if (next() == 0) {
|
|
/* no program */
|
|
mawk_exit(0);
|
|
}
|
|
|
|
un_next();
|
|
|
|
}
|
|
|
|
static void
|
|
scan_open(void) /* open pfile_name */
|
|
{
|
|
if (pfile_name[0] == '-' && pfile_name[1] == 0) {
|
|
program_fd = 0;
|
|
} else if ((program_fd = open(pfile_name, O_RDONLY, 0)) == -1) {
|
|
errmsg(errno, "cannot open \"%s\"", pfile_name);
|
|
mawk_exit(2);
|
|
}
|
|
}
|
|
|
|
void
|
|
scan_cleanup(void)
|
|
{
|
|
if (program_fd >= 0)
|
|
zfree(buffer, (size_t) (BUFFSZ + 1));
|
|
if (program_string)
|
|
free_STRING(program_string);
|
|
|
|
if (program_fd > 0)
|
|
close(program_fd);
|
|
|
|
/* redefine SPACE as [ \t\n] */
|
|
|
|
scan_code['\n'] = (char) ((posix_space_flag && rs_shadow.type != SEP_MLR)
|
|
? SC_UNEXPECTED
|
|
: SC_SPACE);
|
|
scan_code['\f'] = SC_UNEXPECTED; /*value doesn't matter */
|
|
scan_code['\013'] = SC_UNEXPECTED; /* \v not space */
|
|
scan_code['\r'] = SC_UNEXPECTED;
|
|
}
|
|
|
|
/*--------------------------------
|
|
global variables shared by yyparse() and yylex()
|
|
and used for error messages too
|
|
*-------------------------------*/
|
|
|
|
int current_token = -1;
|
|
unsigned token_lineno;
|
|
unsigned compile_error_count;
|
|
int NR_flag; /* are we tracking NR */
|
|
int paren_cnt;
|
|
int brace_cnt;
|
|
int print_flag; /* changes meaning of '>' */
|
|
int getline_flag; /* changes meaning of '<' */
|
|
|
|
/*----------------------------------------
|
|
file reading functions
|
|
next() and un_next(c) are macros in scan.h
|
|
|
|
*---------------------*/
|
|
|
|
static unsigned lineno = 1;
|
|
|
|
static void
|
|
scan_fillbuff(void)
|
|
{
|
|
size_t r;
|
|
|
|
r = fillbuff(program_fd, (char *) buffer, (size_t) BUFFSZ);
|
|
if (r < BUFFSZ) {
|
|
eof_flag = 1;
|
|
/* make sure eof is terminated */
|
|
buffer[r] = '\n';
|
|
buffer[r + 1] = 0;
|
|
} else {
|
|
buffer[r] = 0;
|
|
}
|
|
}
|
|
|
|
/* read one character -- slowly */
|
|
static int
|
|
slow_next(void)
|
|
{
|
|
|
|
while (*buffp == 0) {
|
|
if (!eof_flag) {
|
|
buffp = buffer;
|
|
scan_fillbuff();
|
|
} else if (pfile_list /* open another program file */ ) {
|
|
PFILE *q;
|
|
|
|
if (program_fd > 0)
|
|
close(program_fd);
|
|
eof_flag = 0;
|
|
pfile_name = pfile_list->fname;
|
|
q = pfile_list;
|
|
pfile_list = pfile_list->link;
|
|
ZFREE(q);
|
|
scan_open();
|
|
token_lineno = lineno = 1;
|
|
} else {
|
|
break; /* real eof */
|
|
}
|
|
}
|
|
|
|
return *buffp++; /* note can un_next() , eof which is zero */
|
|
}
|
|
|
|
static void
|
|
eat_comment(void)
|
|
{
|
|
register int c;
|
|
|
|
while (scan_code[NextUChar(c)] && (c != '\n')) {
|
|
; /* empty */
|
|
}
|
|
un_next();
|
|
}
|
|
|
|
/* this is how we handle extra semi-colons that are
|
|
now allowed to separate pattern-action blocks
|
|
|
|
A proof that they are useless clutter to the language:
|
|
we throw them away
|
|
*/
|
|
|
|
static void
|
|
eat_semi_colon(void)
|
|
/* eat one semi-colon on the current line */
|
|
{
|
|
register int c;
|
|
|
|
while (scan_code[NextUChar(c)] == SC_SPACE) {
|
|
; /* empty */
|
|
}
|
|
if (c != ';')
|
|
un_next();
|
|
}
|
|
|
|
void
|
|
eat_nl(void) /* eat all space including newlines */
|
|
{
|
|
while (1) {
|
|
switch (scan_code[(UChar) next()]) {
|
|
case SC_COMMENT:
|
|
eat_comment();
|
|
break;
|
|
|
|
case SC_NL:
|
|
lineno++;
|
|
/* FALLTHRU */
|
|
|
|
case SC_SPACE:
|
|
break;
|
|
|
|
case SC_ESCAPE:
|
|
/* bug fix - surprised anyone did this,
|
|
a csh user with backslash dyslexia.(Not a joke)
|
|
*/
|
|
{
|
|
int c;
|
|
|
|
while (scan_code[NextUChar(c)] == SC_SPACE) {
|
|
; /* empty */
|
|
}
|
|
if (c == '\n')
|
|
token_lineno = ++lineno;
|
|
else if (c == 0) {
|
|
un_next();
|
|
return;
|
|
} else { /* error */
|
|
un_next();
|
|
/* can't un_next() twice so deal with it */
|
|
yylval.ival = '\\';
|
|
unexpected_char();
|
|
if (++compile_error_count == MAX_COMPILE_ERRORS)
|
|
mawk_exit(2);
|
|
return;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
un_next();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Function parameters are local to a function, but because mawk uses a single
|
|
* hash table, it may have conflicts with global symbols (function names).
|
|
* Work around this by saving the conflicting symbol, overriding it an ordinary
|
|
* symbol and restoring at the end of the function.
|
|
*/
|
|
static int
|
|
scan_scope(int state)
|
|
{
|
|
switch (state) {
|
|
case FUNCTION:
|
|
if (braces_level == 0)
|
|
current_scope = ssHEADER;
|
|
break;
|
|
case LPAREN:
|
|
if (current_scope == ssFUNCTN)
|
|
current_scope = ssLPAREN;
|
|
break;
|
|
case FUNCT_ID:
|
|
if (current_scope == ssHEADER) {
|
|
current_scope = ssFUNCTN;
|
|
current_funct = current_symbol;
|
|
} else if (current_scope == ssLPAREN) {
|
|
if (current_symbol == current_funct) {
|
|
compile_error("function parameter cannot be the function");
|
|
} else if (max_repair < MAX_REPAIR) {
|
|
repair_syms[max_repair].stp = current_symbol;
|
|
repair_syms[max_repair].type = current_symbol->type;
|
|
++max_repair;
|
|
state = ID;
|
|
} else {
|
|
compile_error("too many local/global symbol conflicts");
|
|
}
|
|
}
|
|
break;
|
|
case RPAREN:
|
|
if (current_scope == ssLPAREN)
|
|
current_scope = ssRPAREN;
|
|
break;
|
|
case LBRACE:
|
|
++braces_level;
|
|
if (current_scope == ssRPAREN)
|
|
current_scope = ssLBRACE;
|
|
break;
|
|
case RBRACE:
|
|
if (braces_level > 0 && current_scope == ssLBRACE) {
|
|
if (--braces_level == 0) {
|
|
current_scope = ssDEFAULT;
|
|
while (max_repair > 0) {
|
|
--max_repair;
|
|
(repair_syms[max_repair].stp)->type =
|
|
repair_syms[max_repair].type;
|
|
}
|
|
}
|
|
} else {
|
|
current_scope = ssDEFAULT;
|
|
}
|
|
break;
|
|
}
|
|
return state;
|
|
}
|
|
|
|
int
|
|
yylex(void)
|
|
{
|
|
register int c;
|
|
|
|
token_lineno = lineno;
|
|
|
|
#ifdef NO_LEAKS
|
|
memset(&yylval, 0, sizeof(yylval));
|
|
#endif
|
|
|
|
reswitch:
|
|
|
|
switch (scan_code[NextUChar(c)]) {
|
|
case 0:
|
|
ct_ret(EOF);
|
|
|
|
case SC_SPACE:
|
|
goto reswitch;
|
|
|
|
case SC_COMMENT:
|
|
eat_comment();
|
|
goto reswitch;
|
|
|
|
case SC_NL:
|
|
lineno++;
|
|
eat_nl();
|
|
ct_ret(NL);
|
|
|
|
case SC_ESCAPE:
|
|
while (scan_code[NextUChar(c)] == SC_SPACE) {
|
|
; /* empty */
|
|
}
|
|
if (c == '\n') {
|
|
token_lineno = ++lineno;
|
|
goto reswitch;
|
|
}
|
|
|
|
if (c == 0)
|
|
ct_ret(EOF);
|
|
un_next();
|
|
yylval.ival = '\\';
|
|
ct_ret(UNEXPECTED);
|
|
|
|
case SC_SEMI_COLON:
|
|
eat_nl();
|
|
ct_ret(SEMI_COLON);
|
|
|
|
case SC_LBRACE:
|
|
eat_nl();
|
|
brace_cnt++;
|
|
ct_ret(LBRACE);
|
|
|
|
case SC_PLUS:
|
|
switch (next()) {
|
|
case '+':
|
|
yylval.ival = '+';
|
|
string_buff[0] =
|
|
string_buff[1] = '+';
|
|
string_buff[2] = 0;
|
|
ct_ret(INC_or_DEC);
|
|
|
|
case '=':
|
|
ct_ret(ADD_ASG);
|
|
|
|
default:
|
|
un_next();
|
|
ct_ret(PLUS);
|
|
}
|
|
|
|
case SC_MINUS:
|
|
switch (next()) {
|
|
case '-':
|
|
yylval.ival = '-';
|
|
string_buff[0] =
|
|
string_buff[1] = '-';
|
|
string_buff[2] = 0;
|
|
ct_ret(INC_or_DEC);
|
|
|
|
case '=':
|
|
ct_ret(SUB_ASG);
|
|
|
|
default:
|
|
un_next();
|
|
ct_ret(MINUS);
|
|
}
|
|
|
|
case SC_COMMA:
|
|
eat_nl();
|
|
ct_ret(COMMA);
|
|
|
|
case SC_MUL:
|
|
test1_ret('=', MUL_ASG, MUL);
|
|
|
|
case SC_DIV:
|
|
{
|
|
static const int can_precede_div[] =
|
|
{DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD,
|
|
GETLINE, INC_or_DEC, -1};
|
|
|
|
const int *p = can_precede_div;
|
|
|
|
do {
|
|
if (*p == current_token) {
|
|
if (*p != INC_or_DEC) {
|
|
test1_ret('=', DIV_ASG, DIV);
|
|
}
|
|
|
|
if (next() == '=') {
|
|
un_next();
|
|
ct_ret(collect_RE());
|
|
}
|
|
}
|
|
}
|
|
while (*++p != -1);
|
|
|
|
ct_ret(collect_RE());
|
|
}
|
|
|
|
case SC_MOD:
|
|
test1_ret('=', MOD_ASG, MOD);
|
|
|
|
case SC_POW:
|
|
test1_ret('=', POW_ASG, POW);
|
|
|
|
case SC_LPAREN:
|
|
paren_cnt++;
|
|
ct_ret(LPAREN);
|
|
|
|
case SC_RPAREN:
|
|
if (--paren_cnt < 0) {
|
|
compile_error("extra ')'");
|
|
paren_cnt = 0;
|
|
goto reswitch;
|
|
}
|
|
|
|
ct_ret(RPAREN);
|
|
|
|
case SC_LBOX:
|
|
ct_ret(LBOX);
|
|
|
|
case SC_RBOX:
|
|
ct_ret(RBOX);
|
|
|
|
case SC_MATCH:
|
|
string_buff[0] = '~';
|
|
string_buff[1] = 0;
|
|
yylval.ival = 1;
|
|
ct_ret(MATCH);
|
|
|
|
case SC_EQUAL:
|
|
test1_ret('=', EQ, ASSIGN);
|
|
|
|
case SC_NOT: /* ! */
|
|
if ((c = next()) == '~') {
|
|
string_buff[0] = '!';
|
|
string_buff[1] = '~';
|
|
string_buff[2] = 0;
|
|
yylval.ival = 0;
|
|
ct_ret(MATCH);
|
|
} else if (c == '=')
|
|
ct_ret(NEQ);
|
|
|
|
un_next();
|
|
ct_ret(NOT);
|
|
|
|
case SC_LT: /* '<' */
|
|
if (next() == '=')
|
|
ct_ret(LTE);
|
|
else
|
|
un_next();
|
|
|
|
if (getline_flag) {
|
|
getline_flag = 0;
|
|
ct_ret(IO_IN);
|
|
} else
|
|
ct_ret(LT);
|
|
|
|
case SC_GT: /* '>' */
|
|
if (print_flag && paren_cnt == 0) {
|
|
print_flag = 0;
|
|
/* there are 3 types of IO_OUT
|
|
-- build the error string in string_buff */
|
|
string_buff[0] = '>';
|
|
if (next() == '>') {
|
|
yylval.ival = F_APPEND;
|
|
string_buff[1] = '>';
|
|
string_buff[2] = 0;
|
|
} else {
|
|
un_next();
|
|
yylval.ival = F_TRUNC;
|
|
string_buff[1] = 0;
|
|
}
|
|
ct_ret(IO_OUT);
|
|
}
|
|
|
|
test1_ret('=', GTE, GT);
|
|
|
|
case SC_OR:
|
|
if (next() == '|') {
|
|
eat_nl();
|
|
ct_ret(OR);
|
|
} else {
|
|
un_next();
|
|
|
|
if (print_flag && paren_cnt == 0) {
|
|
print_flag = 0;
|
|
yylval.ival = PIPE_OUT;
|
|
string_buff[0] = '|';
|
|
string_buff[1] = 0;
|
|
ct_ret(IO_OUT);
|
|
} else
|
|
ct_ret(PIPE);
|
|
}
|
|
|
|
case SC_AND:
|
|
if (next() == '&') {
|
|
eat_nl();
|
|
ct_ret(AND);
|
|
} else {
|
|
un_next();
|
|
yylval.ival = '&';
|
|
ct_ret(UNEXPECTED);
|
|
}
|
|
|
|
case SC_QMARK:
|
|
ct_ret(QMARK);
|
|
|
|
case SC_COLON:
|
|
ct_ret(COLON);
|
|
|
|
case SC_RBRACE:
|
|
if (--brace_cnt < 0) {
|
|
compile_error("extra '" STR_RBRACE "'");
|
|
eat_semi_colon();
|
|
brace_cnt = 0;
|
|
goto reswitch;
|
|
}
|
|
|
|
if ((c = current_token) == NL || c == SEMI_COLON
|
|
|| c == SC_FAKE_SEMI_COLON || c == RBRACE) {
|
|
/* if the brace_cnt is zero , we've completed
|
|
a pattern action block. If the user insists
|
|
on adding a semi-colon on the same line
|
|
we will eat it. Note what we do below:
|
|
physical law -- conservation of semi-colons */
|
|
|
|
if (brace_cnt == 0)
|
|
eat_semi_colon();
|
|
eat_nl();
|
|
ct_ret(RBRACE);
|
|
}
|
|
|
|
/* supply missing semi-colon to statement that
|
|
precedes a right-brace */
|
|
brace_cnt++;
|
|
un_next();
|
|
current_token = SC_FAKE_SEMI_COLON;
|
|
return scan_scope(SEMI_COLON);
|
|
|
|
case SC_DIGIT:
|
|
case SC_DOT:
|
|
{
|
|
double d;
|
|
int flag;
|
|
|
|
if ((d = collect_decimal(c, &flag)) == 0.0) {
|
|
if (flag)
|
|
ct_ret(flag);
|
|
else
|
|
yylval.ptr = (PTR) & double_zero;
|
|
} else if (d == 1.0) {
|
|
yylval.ptr = (PTR) & double_one;
|
|
} else {
|
|
yylval.ptr = (PTR) ZMALLOC(double);
|
|
*(double *) yylval.ptr = d;
|
|
}
|
|
ct_ret(DOUBLE);
|
|
}
|
|
|
|
case SC_DOLLAR: /* '$' */
|
|
{
|
|
double d;
|
|
int flag;
|
|
|
|
while (scan_code[NextUChar(c)] == SC_SPACE) {
|
|
; /* empty */
|
|
}
|
|
if (scan_code[c] != SC_DIGIT &&
|
|
scan_code[c] != SC_DOT) {
|
|
un_next();
|
|
ct_ret(DOLLAR);
|
|
}
|
|
|
|
/* compute field address at compile time */
|
|
if ((d = collect_decimal(c, &flag)) <= 0.0) {
|
|
if (flag)
|
|
ct_ret(flag); /* an error */
|
|
else
|
|
yylval.cp = &field[0];
|
|
} else {
|
|
Int ival = d_to_I(d);
|
|
double dval = (double) ival;
|
|
if (dval != d) {
|
|
compile_error("$%g is invalid field index", d);
|
|
}
|
|
yylval.cp = field_ptr((int) ival);
|
|
}
|
|
|
|
ct_ret(FIELD);
|
|
}
|
|
|
|
case SC_DQUOTE:
|
|
ct_ret(collect_string());
|
|
|
|
case SC_IDCHAR: /* collect an identifier */
|
|
{
|
|
char *p = string_buff + 1;
|
|
SYMTAB *stp;
|
|
|
|
string_buff[0] = (char) c;
|
|
|
|
while (1) {
|
|
CheckStringSize(p);
|
|
c = scan_code[NextUChar(*p++)];
|
|
if (c != SC_IDCHAR && c != SC_DIGIT)
|
|
break;
|
|
}
|
|
|
|
un_next();
|
|
*--p = 0;
|
|
|
|
current_symbol = stp = find(string_buff);
|
|
switch (stp->type) {
|
|
case ST_NONE:
|
|
/* check for function call before defined */
|
|
if (next() == CHR_LPAREN) {
|
|
stp->type = ST_FUNCT;
|
|
stp->stval.fbp = ZMALLOC(FBLOCK);
|
|
stp->stval.fbp->name = stp->name;
|
|
stp->stval.fbp->code = (INST *) 0;
|
|
stp->stval.fbp->size = 0;
|
|
yylval.fbp = stp->stval.fbp;
|
|
current_token = FUNCT_ID;
|
|
} else {
|
|
yylval.stp = stp;
|
|
current_token =
|
|
current_token == DOLLAR ? D_ID : ID;
|
|
}
|
|
un_next();
|
|
break;
|
|
|
|
case ST_NR:
|
|
NR_flag = 1;
|
|
stp->type = ST_VAR;
|
|
/* FALLTHRU */
|
|
|
|
case ST_VAR:
|
|
case ST_ARRAY:
|
|
case ST_LOCAL_NONE:
|
|
case ST_LOCAL_VAR:
|
|
case ST_LOCAL_ARRAY:
|
|
|
|
yylval.stp = stp;
|
|
current_token =
|
|
current_token == DOLLAR ? D_ID : ID;
|
|
break;
|
|
|
|
case ST_ENV:
|
|
stp->type = ST_ARRAY;
|
|
stp->stval.array = new_ARRAY();
|
|
load_environ(stp->stval.array);
|
|
yylval.stp = stp;
|
|
current_token =
|
|
current_token == DOLLAR ? D_ID : ID;
|
|
break;
|
|
|
|
case ST_FUNCT:
|
|
yylval.fbp = stp->stval.fbp;
|
|
current_token = FUNCT_ID;
|
|
break;
|
|
|
|
case ST_KEYWORD:
|
|
current_token = stp->stval.kw;
|
|
break;
|
|
|
|
case ST_BUILTIN:
|
|
yylval.bip = stp->stval.bip;
|
|
current_token = BUILTIN;
|
|
break;
|
|
|
|
case ST_FIELD:
|
|
yylval.cp = stp->stval.cp;
|
|
current_token = FIELD;
|
|
break;
|
|
|
|
default:
|
|
bozo("find returned bad st type");
|
|
}
|
|
return scan_scope(current_token);
|
|
}
|
|
|
|
case SC_UNEXPECTED:
|
|
yylval.ival = c & 0xff;
|
|
ct_ret(UNEXPECTED);
|
|
}
|
|
return scan_scope(0); /* never get here make lint happy */
|
|
}
|
|
|
|
/* collect a decimal constant in temp_buff.
|
|
Return the value and error conditions by reference */
|
|
|
|
static double
|
|
collect_decimal(int c, int *flag)
|
|
{
|
|
register char *p = string_buff + 1;
|
|
char *endp;
|
|
char *temp;
|
|
char *last_decimal = 0;
|
|
double d;
|
|
|
|
*flag = 0;
|
|
string_buff[0] = (char) c;
|
|
|
|
if (c == '.') {
|
|
last_decimal = p - 1;
|
|
CheckStringSize(p);
|
|
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
|
|
*flag = UNEXPECTED;
|
|
yylval.ival = '.';
|
|
return 0.0;
|
|
}
|
|
} else {
|
|
while (1) {
|
|
CheckStringSize(p);
|
|
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
|
|
break;
|
|
}
|
|
};
|
|
if (p[-1] == '.') {
|
|
last_decimal = p - 1;
|
|
} else {
|
|
un_next();
|
|
p--;
|
|
}
|
|
}
|
|
/* get rest of digits after decimal point */
|
|
while (1) {
|
|
CheckStringSize(p);
|
|
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* check for exponent */
|
|
if (p[-1] != 'e' && p[-1] != 'E') {
|
|
un_next();
|
|
*--p = 0;
|
|
} else { /* get the exponent */
|
|
if (scan_code[NextUChar(*p)] != SC_DIGIT &&
|
|
*p != '-' && *p != '+') {
|
|
/* if we can, undo and try again */
|
|
if (buffp - buffer >= 2) {
|
|
un_next(); /* undo the last character */
|
|
un_next(); /* undo the 'e' */
|
|
*--p = 0;
|
|
} else {
|
|
*++p = 0;
|
|
*flag = BAD_DECIMAL;
|
|
return 0.0;
|
|
}
|
|
} else { /* get the rest of the exponent */
|
|
p++;
|
|
while (1) {
|
|
CheckStringSize(p);
|
|
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
|
|
break;
|
|
}
|
|
}
|
|
un_next();
|
|
*--p = 0;
|
|
}
|
|
}
|
|
|
|
#ifdef LOCALE
|
|
if (last_decimal && decimal_dot) {
|
|
*last_decimal = decimal_dot;
|
|
}
|
|
#endif
|
|
|
|
errno = 0; /* check for overflow/underflow */
|
|
d = strtod(string_buff, &temp);
|
|
endp = temp;
|
|
|
|
#ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG
|
|
if (errno)
|
|
compile_error("%s : decimal %sflow", string_buff,
|
|
d == 0.0 ? "under" : "over");
|
|
#else /* ! sun4 bug */
|
|
if (errno && d != 0.0)
|
|
compile_error("%s : decimal overflow", string_buff);
|
|
#endif
|
|
|
|
if (endp < p) {
|
|
/* if we can, undo and try again */
|
|
if ((p - endp) < (buffp - buffer)) {
|
|
while (endp < p) {
|
|
un_next();
|
|
++endp;
|
|
}
|
|
} else {
|
|
*flag = BAD_DECIMAL;
|
|
return 0.0;
|
|
}
|
|
}
|
|
return d;
|
|
}
|
|
|
|
/*---------- process escape characters ---------------*/
|
|
|
|
static const char hex_val['f' - 'A' + 1] =
|
|
{
|
|
10, 11, 12, 13, 14, 15, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
10, 11, 12, 13, 14, 15};
|
|
|
|
#define isoctal(x) ((x)>='0'&&(x)<='7')
|
|
|
|
#define hex_value(x) hex_val[(x)-'A']
|
|
|
|
#define ishex(x) (scan_code[x] == SC_DIGIT ||\
|
|
('A' <= (x) && (x) <= 'f' && hex_value(x)))
|
|
|
|
/* process one , two or three octal digits
|
|
moving a pointer forward by reference */
|
|
static int
|
|
octal(char **start_p)
|
|
{
|
|
register char *p = *start_p;
|
|
register unsigned x;
|
|
|
|
x = (unsigned) (*p++ - '0');
|
|
if (isoctal(*p)) {
|
|
x = (x << 3) + (unsigned) (*p++ - '0');
|
|
if (isoctal(*p))
|
|
x = (x << 3) + (unsigned) (*p++ - '0');
|
|
}
|
|
*start_p = p;
|
|
return (int) (x & 0xff);
|
|
}
|
|
|
|
/* process one or two hex digits
|
|
moving a pointer forward by reference */
|
|
|
|
static int
|
|
hex(char **start_p)
|
|
{
|
|
register UChar *p = (UChar *) * start_p;
|
|
register unsigned x;
|
|
unsigned t;
|
|
|
|
if (scan_code[*p] == SC_DIGIT)
|
|
x = (unsigned) (*p++ - '0');
|
|
else
|
|
x = (unsigned) hex_value(*p++);
|
|
|
|
if (scan_code[*p] == SC_DIGIT)
|
|
x = (x << 4) + *p++ - '0';
|
|
else if ('A' <= *p && *p <= 'f' && (t = (unsigned) hex_value(*p))) {
|
|
x = (x << 4) + t;
|
|
p++;
|
|
}
|
|
|
|
*start_p = (char *) p;
|
|
return (int) x;
|
|
}
|
|
|
|
/* process the escape characters in a string, in place . */
|
|
char *
|
|
rm_escape(char *s, size_t *lenp)
|
|
{
|
|
register char *p, *q;
|
|
char *t;
|
|
|
|
q = p = s;
|
|
|
|
while (*p) {
|
|
if (*p == '\\') {
|
|
int ch = *++p;
|
|
switch (ch) {
|
|
case 'n':
|
|
p++;
|
|
*q++ = '\n';
|
|
break;
|
|
case 't':
|
|
p++;
|
|
*q++ = '\t';
|
|
break;
|
|
case 'f':
|
|
p++;
|
|
*q++ = '\f';
|
|
break;
|
|
case 'b':
|
|
p++;
|
|
*q++ = '\b';
|
|
break;
|
|
case 'r':
|
|
p++;
|
|
*q++ = '\r';
|
|
break;
|
|
case 'a':
|
|
p++;
|
|
*q++ = '\07';
|
|
break;
|
|
case 'v':
|
|
p++;
|
|
*q++ = '\013';
|
|
break;
|
|
case '\\':
|
|
p++;
|
|
*q++ = '\\';
|
|
break;
|
|
case '\"':
|
|
p++;
|
|
*q++ = '\"';
|
|
break;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
t = p;
|
|
*q++ = (char) octal(&t);
|
|
p = t;
|
|
break;
|
|
case 'x':
|
|
if (ishex(*(UChar *) (p + 1))) {
|
|
t = p + 1;
|
|
*q++ = (char) hex(&t);
|
|
p = t;
|
|
break;
|
|
} else {
|
|
goto not_escape;
|
|
}
|
|
case '\0':
|
|
*q++ = '\\';
|
|
break;
|
|
not_escape:
|
|
default:
|
|
*q++ = '\\';
|
|
*q++ = *p++;
|
|
break;
|
|
}
|
|
|
|
} else
|
|
*q++ = *p++;
|
|
}
|
|
|
|
*q = 0;
|
|
if (lenp != 0)
|
|
*lenp = (unsigned) (q - s);
|
|
return s;
|
|
}
|
|
|
|
static int
|
|
collect_string(void)
|
|
{
|
|
register char *p = string_buff;
|
|
int c;
|
|
int e_flag = 0; /* on if have an escape char */
|
|
size_t len_buff;
|
|
|
|
while (1) {
|
|
CheckStringSize(p);
|
|
switch (scan_code[NextUChar(*p++)]) {
|
|
case SC_DQUOTE: /* done */
|
|
*--p = 0;
|
|
goto out;
|
|
|
|
case SC_NL:
|
|
p[-1] = 0;
|
|
/* FALLTHRU */
|
|
|
|
case 0: /* unterminated string */
|
|
compile_error(
|
|
"runaway string constant \"%.10s ...",
|
|
string_buff);
|
|
mawk_exit(2);
|
|
|
|
case SC_ESCAPE:
|
|
if ((c = next()) == '\n') {
|
|
p--;
|
|
lineno++;
|
|
} else if (c == 0)
|
|
un_next();
|
|
else {
|
|
*p++ = (char) c;
|
|
e_flag = 1;
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
out:
|
|
if (e_flag)
|
|
rm_escape(string_buff, &len_buff);
|
|
else
|
|
len_buff = (unsigned) ((char *) p - string_buff);
|
|
yylval.ptr = (PTR) new_STRING1(string_buff, len_buff);
|
|
return STRING_;
|
|
}
|
|
|
|
static int
|
|
collect_RE(void)
|
|
{
|
|
char *p = string_buff;
|
|
const char *first = NULL;
|
|
int limit = sizeof(string_buff) - 2;
|
|
int c;
|
|
int boxed = 0;
|
|
STRING *sval;
|
|
|
|
while (1) {
|
|
if (p >= (string_buff + limit)) {
|
|
compile_error(
|
|
"regular expression /%.10s ..."
|
|
" exceeds implementation size limit (%d)",
|
|
string_buff,
|
|
limit);
|
|
mawk_exit(2);
|
|
}
|
|
CheckStringSize(p);
|
|
switch (scan_code[NextUChar(c = *p++)]) {
|
|
case SC_POW:
|
|
/* Handle [^]] and [^^] correctly. */
|
|
if ((p - 1) == first && first != 0 && first[-1] == '[') {
|
|
first = p;
|
|
}
|
|
break;
|
|
|
|
case SC_LBOX:
|
|
/*
|
|
* If we're starting a bracket expression, remember where that
|
|
* started, so we can make comparisons to handle things like
|
|
* "[]xxxx]" and "[^]xxxx]".
|
|
*/
|
|
if (!boxed) {
|
|
first = p;
|
|
++boxed;
|
|
} else {
|
|
/* XXX. Does not handle collating symbols or equivalence
|
|
* class expressions. */
|
|
/* XXX. Does not match logic used in rexp0.c to check for
|
|
* a character class expression, though probably the
|
|
* latter should be adjusted.
|
|
* POSIX and common sense give us license to complain about
|
|
* expressions such as '[[:not a special character class]]'.
|
|
*/
|
|
if (next() == ':') {
|
|
++boxed;
|
|
}
|
|
un_next();
|
|
}
|
|
break;
|
|
|
|
case SC_RBOX:
|
|
/*
|
|
* A right square-bracket loses its special meaning if it occurs
|
|
* first in the list (after an optional "^").
|
|
*/
|
|
if (boxed && p - 1 != first) {
|
|
--boxed;
|
|
}
|
|
break;
|
|
|
|
case SC_DIV: /* done */
|
|
if (!boxed) {
|
|
*--p = 0;
|
|
goto out;
|
|
}
|
|
break;
|
|
|
|
case SC_NL:
|
|
p[-1] = 0;
|
|
/* FALLTHRU */
|
|
|
|
case 0: /* unterminated re */
|
|
compile_error(
|
|
"runaway regular expression /%.10s ...",
|
|
string_buff);
|
|
mawk_exit(2);
|
|
|
|
case SC_ESCAPE:
|
|
switch (c = next()) {
|
|
case '/':
|
|
p[-1] = '/';
|
|
break;
|
|
|
|
case '\n':
|
|
p--;
|
|
break;
|
|
|
|
case 0:
|
|
un_next();
|
|
break;
|
|
|
|
default:
|
|
*p++ = (char) c;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
out:
|
|
/* now we've got the RE, so compile it */
|
|
sval = new_STRING(string_buff);
|
|
yylval.ptr = re_compile(sval);
|
|
free_STRING(sval);
|
|
return RE;
|
|
}
|
|
|
|
#ifdef NO_LEAKS
|
|
void
|
|
scan_leaks(void)
|
|
{
|
|
TRACE(("scan_leaks\n"));
|
|
if (yylval.ptr) {
|
|
free(yylval.ptr);
|
|
yylval.ptr = 0;
|
|
}
|
|
}
|
|
#endif
|