mawk/scan.c
2024-11-18 09:26:11 +00:00

1287 lines
24 KiB
C

/********************************************
scan.c
copyright 2008-2023,2024, Thomas E. Dickey
copyright 2010, Jonathan Nieder
copyright 1991-1996,2014, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: scan.c,v 1.67 2024/11/11 20:58:37 tom Exp $
*/
#define Visible_ARRAY
#define Visible_CELL
#define Visible_CODEBLOCK
#define Visible_FBLOCK
#define Visible_PFILE
#define Visible_RE_DATA
#define Visible_SEPARATOR
#define Visible_STRING
#define Visible_SYMTAB
#include <mawk.h>
#include <scan.h>
#include <memory.h>
#include <field.h>
#include <init.h>
#include <fin.h>
#include <code.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#include <files.h>
#define CHR_LPAREN '('
#define CHR_RPAREN ')'
#define STR_LBRACE "{"
#define STR_RBRACE "}"
#define ct_ret(x) do { current_token = (x); return scan_scope(current_token); } while (0)
#if OPT_TRACE > 1
static int next(void);
static void un_next(void);
#else
#define next() (*buffp ? *buffp++ : slow_next())
#define un_next() buffp--
#endif
#define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\
else { un_next() ; ct_ret(d) ; }
#define test2_ret(c1,x1,c2,x2,d) switch( next() )\
{ case c1: ct_ret(x1) ;\
case c2: ct_ret(x2) ;\
default: un_next() ;\
ct_ret(d) ; }
double double_zero = 0.0;
double double_one = 1.0;
/* static functions */
static void scan_fillbuff(void);
static void scan_open(void);
static int slow_next(void);
static void eat_comment(void);
static double collect_decimal(int, int *);
static int collect_string(void);
static int collect_RE(void);
/*-----------------------------
program file management
*----------------------------*/
char *pfile_name;
PFILE *pfile_list;
static STRING *program_string;
static UChar *buffer;
static UChar *buffp;
/* unsigned so it works with 8 bit chars */
static int program_fd;
static int eof_flag;
/*
* Data for scan_scope()
*/
#define MAX_REPAIR 10
static SYMTAB *current_symbol;
static SYMTAB *current_funct;
typedef enum {
ssDEFAULT = 0
,ssHEADER
,ssFUNCTN
,ssLPAREN
,ssRPAREN
,ssLBRACE
,ssRBRACE
} SCAN_SCOPE;
static SCAN_SCOPE current_scope;
static int braces_level;
static int max_repair;
static struct {
SYMTAB *stp;
SYM_TYPE type;
} repair_syms[MAX_REPAIR];
/* use unsigned chars for index into scan_code[] */
#define NextUChar(c) (UChar)(c = next()) /* use if c is not a char */
#define NextChar(c) (UChar)(c = (char) next()) /* use if c is a char */
/* overused tmp buffer */
char string_buff[SPRINTF_LIMIT];
static GCC_NORETURN void
string_too_long(void)
{
compile_error("string too long \"%.10s ...", string_buff);
mawk_exit(2);
}
#define CheckStringSize(ptr) \
if ((size_t)((ptr) - string_buff) >= sizeof(string_buff)) \
string_too_long()
void
scan_init(const char *cmdline_program)
{
if (cmdline_program) {
program_fd = -1; /* command line program */
program_string = new_STRING0(strlen(cmdline_program) + 1);
strcpy(program_string->str, cmdline_program);
/* simulate file termination */
program_string->str[program_string->len - 1] = '\n';
buffp = (UChar *) program_string->str;
eof_flag = 1;
} else { /* program from file[s] */
scan_open();
buffp = buffer = (UChar *) zmalloc((size_t) (BUFFSZ + 1));
scan_fillbuff();
}
#ifdef OS2 /* OS/2 "extproc" is similar to #! */
if (strnicmp(buffp, "extproc ", 8) == 0)
eat_comment();
#endif
eat_nl(); /* scan to first token */
if (next() == 0) {
/* no program */
mawk_exit(0);
}
un_next();
}
static void
scan_open(void) /* open pfile_name */
{
if (pfile_name[0] == '-' && pfile_name[1] == 0) {
program_fd = 0;
} else if ((program_fd = open(pfile_name, O_RDONLY, 0)) == -1) {
errmsg(errno, "cannot open \"%s\"", pfile_name);
mawk_exit(2);
}
}
void
scan_cleanup(void)
{
if (program_fd >= 0)
zfree(buffer, (size_t) (BUFFSZ + 1));
if (program_string)
free_STRING(program_string);
if (program_fd > 0)
close(program_fd);
/* redefine SPACE as [ \t\n] */
scan_code['\n'] = (char) ((posix_space_flag && rs_shadow.type != SEP_MLR)
? SC_UNEXPECTED
: SC_SPACE);
scan_code['\f'] = SC_UNEXPECTED; /*value doesn't matter */
scan_code['\013'] = SC_UNEXPECTED; /* \v not space */
scan_code['\r'] = SC_UNEXPECTED;
}
/*--------------------------------
global variables shared by yyparse() and yylex()
and used for error messages too
*-------------------------------*/
int current_token = -1;
unsigned token_lineno;
unsigned compile_error_count;
int NR_flag; /* are we tracking NR */
int paren_cnt;
int brace_cnt;
int print_flag; /* changes meaning of '>' */
int getline_flag; /* changes meaning of '<' */
/*----------------------------------------
file reading functions
next() and un_next(c) are macros in scan.h
*---------------------*/
static unsigned lineno = 1;
static void
scan_fillbuff(void)
{
size_t r;
r = fillbuff(program_fd, (char *) buffer, (size_t) BUFFSZ);
if (r < BUFFSZ) {
eof_flag = 1;
/* make sure eof is terminated */
buffer[r] = '\n';
buffer[r + 1] = 0;
} else {
buffer[r] = 0;
}
}
/* read one character -- slowly */
static int
slow_next(void)
{
while (*buffp == 0) {
if (!eof_flag) {
buffp = buffer;
scan_fillbuff();
} else if (pfile_list /* open another program file */ ) {
PFILE *q;
if (program_fd > 0)
close(program_fd);
eof_flag = 0;
pfile_name = pfile_list->fname;
q = pfile_list;
pfile_list = pfile_list->link;
ZFREE(q);
scan_open();
token_lineno = lineno = 1;
} else {
break; /* real eof */
}
}
return 0xff & *buffp++; /* note can un_next(), eof which is zero */
}
#if OPT_TRACE > 1
#define SHOW(tag,c) \
TRACE((((c) >= ' ' && (c) <= '~') ? "%s %c\n" : "%s 0x%x\n", tag, c))
static int
next(void)
{
int ch;
if (*buffp != '\0') {
ch = *buffp++;
} else {
ch = slow_next();
}
SHOW("* GET", ch);
return ch;
}
static void
un_next(void)
{
buffp--;
SHOW("UNGET", *buffp);
}
#endif
static void
eat_comment(void)
{
register int c;
while (scan_code[NextUChar(c)] && (c != '\n')) {
; /* empty */
}
un_next();
}
/* this is how we handle extra semi-colons that are
now allowed to separate pattern-action blocks
A proof that they are useless clutter to the language:
we throw them away
*/
static void
eat_semi_colon(void)
/* eat one semi-colon on the current line */
{
register int c;
while (scan_code[NextUChar(c)] == SC_SPACE) {
; /* empty */
}
if (c != ';')
un_next();
}
void
eat_nl(void) /* eat all space including newlines */
{
while (1) {
switch (scan_code[(UChar) next()]) {
case SC_COMMENT:
eat_comment();
break;
case SC_NL:
lineno++;
/* FALLTHRU */
case SC_SPACE:
break;
case SC_ESCAPE:
/* bug fix - surprised anyone did this,
a csh user with backslash dyslexia.(Not a joke)
*/
{
int c;
while (scan_code[NextUChar(c)] == SC_SPACE) {
; /* empty */
}
if (c == '\n')
token_lineno = ++lineno;
else if (c == 0) {
un_next();
return;
} else { /* error */
un_next();
/* can't un_next() twice so deal with it */
yylval.ival = '\\';
unexpected_char();
return;
}
}
break;
default:
un_next();
return;
}
}
}
/*
* Function parameters are local to a function, but because mawk uses a single
* hash table, it may have conflicts with global symbols (function names).
* Work around this by saving the conflicting symbol, overriding it an ordinary
* symbol and restoring at the end of the function.
*/
static int
scan_scope(int state)
{
switch (state) {
case FUNCTION:
if (braces_level == 0)
current_scope = ssHEADER;
break;
case LPAREN:
if (current_scope == ssFUNCTN)
current_scope = ssLPAREN;
break;
case FUNCT_ID:
if (current_scope == ssHEADER) {
current_scope = ssFUNCTN;
current_funct = current_symbol;
} else if (current_scope == ssLPAREN) {
if (current_symbol == current_funct) {
compile_error("function parameter cannot be the function");
} else if (max_repair < MAX_REPAIR) {
repair_syms[max_repair].stp = current_symbol;
repair_syms[max_repair].type = current_symbol->type;
++max_repair;
state = ID;
} else {
compile_error("too many local/global symbol conflicts");
}
}
break;
case RPAREN:
if (current_scope == ssLPAREN)
current_scope = ssRPAREN;
break;
case LBRACE:
++braces_level;
if (current_scope == ssRPAREN)
current_scope = ssLBRACE;
break;
case RBRACE:
if (braces_level > 0 && current_scope == ssLBRACE) {
if (--braces_level == 0) {
current_scope = ssDEFAULT;
while (max_repair > 0) {
--max_repair;
(repair_syms[max_repair].stp)->type =
repair_syms[max_repair].type;
}
}
} else {
current_scope = ssDEFAULT;
}
break;
}
return state;
}
int
yylex(void)
{
register int c;
token_lineno = lineno;
#ifdef NO_LEAKS
memset(&yylval, 0, sizeof(yylval));
#endif
reswitch:
switch (scan_code[NextUChar(c)]) {
case 0:
ct_ret(EOF);
case SC_SPACE:
goto reswitch;
case SC_COMMENT:
eat_comment();
goto reswitch;
case SC_NL:
lineno++;
eat_nl();
ct_ret(NL);
case SC_ESCAPE:
while (scan_code[NextUChar(c)] == SC_SPACE) {
; /* empty */
}
if (c == '\n') {
token_lineno = ++lineno;
goto reswitch;
}
if (c == 0)
ct_ret(EOF);
un_next();
yylval.ival = '\\';
ct_ret(UNEXPECTED);
case SC_SEMI_COLON:
eat_nl();
ct_ret(SEMI_COLON);
case SC_LBRACE:
eat_nl();
brace_cnt++;
ct_ret(LBRACE);
case SC_PLUS:
switch (next()) {
case '+':
yylval.ival = '+';
string_buff[0] =
string_buff[1] = '+';
string_buff[2] = 0;
ct_ret(INC_or_DEC);
case '=':
ct_ret(ADD_ASG);
default:
un_next();
ct_ret(PLUS);
}
case SC_MINUS:
switch (next()) {
case '-':
yylval.ival = '-';
string_buff[0] =
string_buff[1] = '-';
string_buff[2] = 0;
ct_ret(INC_or_DEC);
case '=':
ct_ret(SUB_ASG);
default:
un_next();
ct_ret(MINUS);
}
case SC_COMMA:
eat_nl();
ct_ret(COMMA);
case SC_MUL:
test1_ret('=', MUL_ASG, MUL);
case SC_DIV:
{
static const int can_precede_div[] =
{DOUBLE, STRING_, RPAREN, ID, D_ID, RE, RBOX, FIELD,
GETLINE, INC_or_DEC, -1};
const int *p = can_precede_div;
do {
if (*p == current_token) {
if (*p != INC_or_DEC) {
test1_ret('=', DIV_ASG, DIV);
}
if (next() == '=') {
un_next();
ct_ret(collect_RE());
}
}
}
while (*++p != -1);
ct_ret(collect_RE());
}
case SC_MOD:
test1_ret('=', MOD_ASG, MOD);
case SC_POW:
test1_ret('=', POW_ASG, POW);
case SC_LPAREN:
paren_cnt++;
ct_ret(LPAREN);
case SC_RPAREN:
if (--paren_cnt < 0) {
compile_error("extra ')'");
paren_cnt = 0;
goto reswitch;
}
ct_ret(RPAREN);
case SC_LBOX:
ct_ret(LBOX);
case SC_RBOX:
ct_ret(RBOX);
case SC_MATCH:
string_buff[0] = '~';
string_buff[1] = 0;
yylval.ival = 1;
ct_ret(MATCH);
case SC_EQUAL:
test1_ret('=', EQ, ASSIGN);
case SC_NOT: /* ! */
if ((c = next()) == '~') {
string_buff[0] = '!';
string_buff[1] = '~';
string_buff[2] = 0;
yylval.ival = 0;
ct_ret(MATCH);
} else if (c == '=')
ct_ret(NEQ);
un_next();
ct_ret(NOT);
case SC_LT: /* '<' */
if (next() == '=')
ct_ret(LTE);
else
un_next();
if (getline_flag) {
getline_flag = 0;
ct_ret(IO_IN);
} else
ct_ret(LT);
case SC_GT: /* '>' */
if (print_flag && paren_cnt == 0) {
print_flag = 0;
/* there are 3 types of IO_OUT
-- build the error string in string_buff */
string_buff[0] = '>';
if (next() == '>') {
yylval.ival = F_APPEND;
string_buff[1] = '>';
string_buff[2] = 0;
} else {
un_next();
yylval.ival = F_TRUNC;
string_buff[1] = 0;
}
ct_ret(IO_OUT);
}
test1_ret('=', GTE, GT);
case SC_OR:
if (next() == '|') {
eat_nl();
ct_ret(OR);
} else {
un_next();
if (print_flag && paren_cnt == 0) {
print_flag = 0;
yylval.ival = PIPE_OUT;
string_buff[0] = '|';
string_buff[1] = 0;
ct_ret(IO_OUT);
} else
ct_ret(PIPE);
}
case SC_AND:
if (next() == '&') {
eat_nl();
ct_ret(AND);
} else {
un_next();
yylval.ival = '&';
ct_ret(UNEXPECTED);
}
case SC_QMARK:
ct_ret(QMARK);
case SC_COLON:
ct_ret(COLON);
case SC_RBRACE:
if (--brace_cnt < 0) {
compile_error("extra '" STR_RBRACE "'");
eat_semi_colon();
brace_cnt = 0;
goto reswitch;
}
if ((c = current_token) == NL || c == SEMI_COLON
|| c == SC_FAKE_SEMI_COLON || c == RBRACE) {
/* if the brace_cnt is zero , we've completed
a pattern action block. If the user insists
on adding a semi-colon on the same line
we will eat it. Note what we do below:
physical law -- conservation of semi-colons */
if (brace_cnt == 0)
eat_semi_colon();
eat_nl();
ct_ret(RBRACE);
}
/* supply missing semi-colon to statement that
precedes a right-brace */
brace_cnt++;
un_next();
current_token = SC_FAKE_SEMI_COLON;
return scan_scope(SEMI_COLON);
case SC_DIGIT:
case SC_DOT:
{
double d;
int flag;
if ((d = collect_decimal(c, &flag)) == 0.0) {
if (flag)
ct_ret(flag);
else
yylval.ptr = (PTR) & double_zero;
} else if (d == 1.0) {
yylval.ptr = (PTR) & double_one;
} else {
yylval.ptr = (PTR) ZMALLOC(double);
*(double *) yylval.ptr = d;
}
ct_ret(DOUBLE);
}
case SC_DOLLAR: /* '$' */
{
double d;
int flag;
while (scan_code[NextUChar(c)] == SC_SPACE) {
; /* empty */
}
if (scan_code[c] != SC_DIGIT &&
scan_code[c] != SC_DOT) {
un_next();
ct_ret(DOLLAR);
}
/* compute field address at compile time */
if ((d = collect_decimal(c, &flag)) <= 0.0) {
if (flag)
ct_ret(flag); /* an error */
else
yylval.cp = &field[0];
} else {
Int ival = d_to_I(d);
double dval = (double) ival;
if (dval != d) {
compile_error("$%g is invalid field index", d);
}
yylval.cp = field_ptr((int) ival);
}
ct_ret(FIELD);
}
case SC_DQUOTE:
ct_ret(collect_string());
case SC_IDCHAR: /* collect an identifier */
{
char *p = string_buff + 1;
SYMTAB *stp;
string_buff[0] = (char) c;
while (1) {
CheckStringSize(p);
c = scan_code[NextChar(*p++)];
if (c != SC_IDCHAR && c != SC_DIGIT)
break;
}
un_next();
*--p = 0;
current_symbol = stp = find(string_buff);
switch (stp->type) {
case ST_NONE:
/* check for function call before defined */
if (next() == CHR_LPAREN) {
stp->type = ST_FUNCT;
stp->stval.fbp = ZMALLOC(FBLOCK);
stp->stval.fbp->name = stp->name;
stp->stval.fbp->code = (INST *) 0;
stp->stval.fbp->size = 0;
yylval.fbp = stp->stval.fbp;
current_token = FUNCT_ID;
} else {
yylval.stp = stp;
current_token =
current_token == DOLLAR ? D_ID : ID;
}
un_next();
break;
case ST_NR:
NR_flag = 1;
stp->type = ST_VAR;
/* FALLTHRU */
case ST_VAR:
case ST_ARRAY:
case ST_LOCAL_NONE:
case ST_LOCAL_VAR:
case ST_LOCAL_ARRAY:
yylval.stp = stp;
current_token =
current_token == DOLLAR ? D_ID : ID;
break;
case ST_ENV:
stp->type = ST_ARRAY;
stp->stval.array = new_ARRAY();
load_environ(stp->stval.array);
yylval.stp = stp;
current_token =
current_token == DOLLAR ? D_ID : ID;
break;
case ST_FUNCT:
yylval.fbp = stp->stval.fbp;
current_token = FUNCT_ID;
break;
case ST_KEYWORD:
current_token = stp->stval.kw;
break;
case ST_BUILTIN:
yylval.bip = stp->stval.bip;
current_token = BUILTIN;
break;
case ST_FIELD:
yylval.cp = stp->stval.cp;
current_token = FIELD;
break;
default:
bozo("find returned bad st type");
}
return scan_scope(current_token);
}
case SC_UNEXPECTED:
yylval.ival = c & 0xff;
ct_ret(UNEXPECTED);
}
return scan_scope(0); /* never get here make lint happy */
}
/* collect a decimal constant in temp_buff.
Return the value and error conditions by reference */
static double
collect_decimal(int c, int *flag)
{
register char *p = string_buff + 1;
char *endp;
char *temp;
char *last_decimal = 0;
double d;
*flag = 0;
string_buff[0] = (char) c;
if (c == '.') {
last_decimal = p - 1;
CheckStringSize(p);
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
*flag = UNEXPECTED;
yylval.ival = '.';
return 0.0;
}
} else {
while (1) {
CheckStringSize(p);
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
break;
}
};
if (p[-1] == '.') {
last_decimal = p - 1;
} else {
un_next();
p--;
}
}
/* get rest of digits after decimal point */
while (1) {
CheckStringSize(p);
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
break;
}
}
/* check for exponent */
if (p[-1] != 'e' && p[-1] != 'E') {
un_next();
*--p = 0;
} else { /* get the exponent */
if (scan_code[NextChar(*p)] != SC_DIGIT &&
*p != '-' && *p != '+') {
/* if we can, undo and try again */
if (buffp - buffer >= 2) {
un_next(); /* undo the last character */
un_next(); /* undo the 'e' */
*--p = 0;
} else {
*++p = 0;
*flag = BAD_DECIMAL;
return 0.0;
}
} else { /* get the rest of the exponent */
p++;
while (1) {
CheckStringSize(p);
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
break;
}
}
un_next();
*--p = 0;
}
}
#ifdef LOCALE
if (last_decimal && decimal_dot) {
*last_decimal = decimal_dot;
}
#endif
errno = 0; /* check for overflow/underflow */
d = strtod(string_buff, &temp);
endp = temp;
#ifndef STRTOD_UNDERFLOW_ON_ZERO_BUG
if (errno)
compile_error("%s : decimal %sflow", string_buff,
d == 0.0 ? "under" : "over");
#else /* ! sun4 bug */
if (errno && d != 0.0)
compile_error("%s : decimal overflow", string_buff);
#endif
if (endp < p) {
/* if we can, undo and try again */
if ((p - endp) < (buffp - buffer)) {
while (endp < p) {
un_next();
++endp;
}
} else {
*flag = BAD_DECIMAL;
return 0.0;
}
}
return d;
}
/*---------- process escape characters ---------------*/
static const char hex_val['f' - 'A' + 1] =
{
10, 11, 12, 13, 14, 15, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
10, 11, 12, 13, 14, 15};
#define isoctal(x) ((x)>='0'&&(x)<='7')
#define hex_value(x) hex_val[(x)-'A']
#define ishex(x) (scan_code[x] == SC_DIGIT ||\
('A' <= (x) && (x) <= 'f' && hex_value(x)))
/* process one , two or three octal digits
moving a pointer forward by reference */
static int
octal(char **start_p)
{
register char *p = *start_p;
register unsigned x;
x = (unsigned) (*p++ - '0');
if (isoctal(*p)) {
x = (x << 3) + (unsigned) (*p++ - '0');
if (isoctal(*p))
x = (x << 3) + (unsigned) (*p++ - '0');
}
*start_p = p;
return (int) (x & 0xff);
}
/* process one or two hex digits
moving a pointer forward by reference */
static int
hex(char **start_p)
{
register UChar *p = (UChar *) * start_p;
register unsigned x;
unsigned t;
if (scan_code[*p] == SC_DIGIT)
x = (unsigned) (*p++ - '0');
else
x = (unsigned) hex_value(*p++);
if (scan_code[*p] == SC_DIGIT)
x = (x << 4) + *p++ - '0';
else if ('A' <= *p && *p <= 'f' && (t = (unsigned) hex_value(*p))) {
x = (x << 4) + t;
p++;
}
*start_p = (char *) p;
return (int) x;
}
/* process the escape characters in a string, in place . */
char *
rm_escape(char *s, size_t *lenp)
{
register char *p, *q;
char *t;
q = p = s;
while (*p) {
if (*p == '\\') {
int ch = *++p;
switch (ch) {
case 'n':
p++;
*q++ = '\n';
break;
case 't':
p++;
*q++ = '\t';
break;
case 'f':
p++;
*q++ = '\f';
break;
case 'b':
p++;
*q++ = '\b';
break;
case 'r':
p++;
*q++ = '\r';
break;
case 'a':
p++;
*q++ = '\07';
break;
case 'v':
p++;
*q++ = '\013';
break;
case '\\':
p++;
*q++ = '\\';
break;
case '\"':
p++;
*q++ = '\"';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
t = p;
*q++ = (char) octal(&t);
p = t;
break;
case 'x':
if (ishex(*(UChar *) (p + 1))) {
t = p + 1;
*q++ = (char) hex(&t);
p = t;
break;
} else {
goto not_escape;
}
case '\0':
*q++ = '\\';
break;
not_escape:
default:
*q++ = '\\';
*q++ = *p++;
break;
}
} else
*q++ = *p++;
}
*q = 0;
if (lenp != 0)
*lenp = (unsigned) (q - s);
return s;
}
char *
safe_string(char *value)
{
char *result = strdup(value);
if (result == NULL) {
result = value;
} else {
char *s;
/* replace nonprintable characters with '@', which is illegal too */
for (s = result; *s != '\0'; ++s) {
if (scan_code[(UChar) * s] == SC_UNEXPECTED)
*s = '@';
}
}
return result;
}
static int
collect_string(void)
{
register char *p = string_buff;
int c;
int e_flag = 0; /* on if have an escape char */
size_t len_buff;
while (1) {
CheckStringSize(p);
switch (scan_code[NextChar(*p++)]) {
case SC_DQUOTE: /* done */
*--p = 0;
goto out;
case SC_NL:
p[-1] = 0;
/* FALLTHRU */
case 0: /* unterminated string */
compile_error(
"runaway string constant \"%.10s ...",
safe_string(string_buff));
mawk_exit(2);
case SC_ESCAPE:
if ((c = next()) == '\n') {
p--;
lineno++;
} else if (c == 0)
un_next();
else {
*p++ = (char) c;
e_flag = 1;
}
break;
default:
break;
}
}
out:
if (e_flag)
rm_escape(string_buff, &len_buff);
else
len_buff = (unsigned) ((char *) p - string_buff);
yylval.ptr = (PTR) new_STRING1(string_buff, len_buff);
return STRING_;
}
static int
collect_RE(void)
{
char *p = string_buff;
const char *first = NULL;
int limit = sizeof(string_buff) - 2;
int c;
int boxed = 0;
STRING *sval;
while (1) {
if (p >= (string_buff + limit)) {
compile_error(
"regular expression /%.10s ..."
" exceeds implementation size limit (%d)",
string_buff,
limit);
mawk_exit(2);
}
CheckStringSize(p);
switch (scan_code[NextChar(c = *p++)]) {
case SC_POW:
/* Handle [^]] and [^^] correctly. */
if ((p - 1) == first && first != 0 && first[-1] == '[') {
first = p;
}
break;
case SC_LBOX:
/*
* If we're starting a bracket expression, remember where that
* started, so we can make comparisons to handle things like
* "[]xxxx]" and "[^]xxxx]".
*/
if (!boxed) {
first = p;
++boxed;
} else {
/* XXX. Does not handle collating symbols or equivalence
* class expressions. */
/* XXX. Does not match logic used in rexp0.c to check for
* a character class expression, though probably the
* latter should be adjusted.
* POSIX and common sense give us license to complain about
* expressions such as '[[:not a special character class]]'.
*/
if (next() == ':') {
++boxed;
}
un_next();
}
break;
case SC_RBOX:
/*
* A right square-bracket loses its special meaning if it occurs
* first in the list (after an optional "^").
*/
if (boxed && p - 1 != first) {
--boxed;
}
break;
case SC_DIV: /* done */
if (!boxed) {
*--p = 0;
goto out;
}
break;
case SC_NL:
p[-1] = 0;
/* FALLTHRU */
case 0: /* unterminated re */
compile_error(
"runaway regular expression /%.10s ...",
safe_string(string_buff));
mawk_exit(2);
case SC_ESCAPE:
switch (c = next()) {
case '/':
p[-1] = '/';
break;
case '\n':
p--;
break;
case 0:
un_next();
break;
default:
*p++ = (char) c;
break;
}
break;
}
}
out:
/* now we've got the RE, so compile it */
sval = new_STRING(string_buff);
yylval.ptr = re_compile(sval);
free_STRING(sval);
return RE;
}
#ifdef NO_LEAKS
void
scan_leaks(void)
{
TRACE(("scan_leaks\n"));
if (yylval.ptr) {
free(yylval.ptr);
yylval.ptr = 0;
}
}
#endif