snapshot of project "mawk", label t20090820b

This commit is contained in:
Thomas E. Dickey 2009-09-17 23:29:05 -04:00
parent 89129c703f
commit fb73ac62dc
9 changed files with 174 additions and 22 deletions

10
CHANGES
View File

@ -1,8 +1,14 @@
-- $MawkId: CHANGES,v 1.56 2009/09/14 09:32:45 tom Exp $
-- $MawkId: CHANGES,v 1.58 2009/09/16 23:32:59 tom Exp $
Changes by Thomas E Dickey <dickey@invisible-island.net>
20090914
20090916
correct logic in scan.c to handle expression "[[]" (report by Aleksey
Cheusov).
add MAWK_LONG_OPTIONS feature to allow mawk to ignore long options
which are not implemented.
modify built-in regular expression functions to accept embedded nulls.
modify input reader FINgets() to accept embedded nulls in data read

View File

@ -1,4 +1,4 @@
MANIFEST for mawk, version t20090820a
MANIFEST for mawk, version t20090820b
--------------------------------------------------------------------------------
MANIFEST this file
ACKNOWLEDGMENT acknowledgements

9
cast.c
View File

@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: cast.c,v 1.8 2009/08/20 23:00:13 tom Exp $
* $MawkId: cast.c,v 1.9 2009/09/16 09:29:51 tom Exp $
* @Log: cast.c,v @
* Revision 1.6 1996/08/11 22:07:50 mike
* Fix small bozo in rt_error("overflow converting ...")
@ -316,6 +316,12 @@ cast_for_split(CELL * cp)
cp->type = C_SPACE;
return;
} else if (c == 0) {
#ifdef LOCAL_REGEXP
char temp[1];
temp[0] = (char) c;
free_STRING(string(cp));
cp->ptr = (PTR) new_STRING1(temp, 1);
#else
/*
* A null is not a meta character, but strchr will match it anyway.
* For now, there's no reason to compile a null as a regular
@ -327,6 +333,7 @@ cast_for_split(CELL * cp)
free_STRING(string(cp));
cp->ptr = (PTR) new_STRING1(temp, 1);
return;
#endif
} else if (strchr(meta, c)) {
xbuff[1] = (char) c;
free_STRING(string(cp));

26
init.c
View File

@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: init.c,v 1.11 2009/08/21 00:53:52 tom Exp $
* $MawkId: init.c,v 1.12 2009/09/16 22:32:17 tom Exp $
* @Log: init.c,v @
* Revision 1.11 1995/08/20 17:35:21 mike
* include <stdlib.h> for MSC, needed for environ decl
@ -169,6 +169,30 @@ process_cmdline(int argc, char **argv)
}
/* safe to look at argv[i][2] */
/*
* Check for "long" options and decide how to handle them.
*/
if (strlen(argv[i]) > 2 && !strncmp(argv[i], "--", 2)) {
char *env = getenv("MAWK_LONG_OPTIONS");
if (env != 0) {
switch (*env) {
default:
case 'e': /* error */
bad_option(argv[i]);
break;
case 'w': /* warn */
errmsg(0, "ignored option: %s", argv[i]);
break;
case 'i': /* ignore */
break;
}
} else {
bad_option(argv[i]);
}
nextarg = i + 1;
continue;
}
if (argv[i][2] == 0) {
if (i == argc - 1 && argv[i][1] != '-') {
if (strchr("WFvf", argv[i][1])) {

54
rexp0.c
View File

@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp0.c,v 1.13 2009/09/13 22:38:22 tom Exp $
* $MawkId: rexp0.c,v 1.16 2009/09/17 22:58:49 tom Exp $
* @Log: rexp0.c,v @
* Revision 1.5 1996/11/08 15:39:27 mike
* While cleaning up block_on, I introduced a bug. Now fixed.
@ -84,7 +84,7 @@ static BV *store_bvp(BV *);
static const
char RE_char2token['|' + 1] =
{
0, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*07*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*07*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*0f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*17*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*1f*/
@ -127,6 +127,10 @@ RE_lex(MACHINE * mp)
{
register int c;
if ((unsigned) (1 + lp - re_str) >= re_len) {
return 0;
}
switch (c = char2token((UChar) (*lp))) {
case T_PLUS:
case T_STAR:
@ -269,7 +273,7 @@ do_str(
*s++ = (char) c;
len = 1;
while (1) {
while ((1 + p - re_str) < (int) re_len) {
char *save;
switch (char2token((UChar) (*p))) {
@ -398,8 +402,9 @@ lookup_cclass(char **start)
}
}
if (code == CCLASS_NONE)
if (code == CCLASS_NONE) {
RE_error_trap(-E3);
}
if ((result = cclass_table[item].data) == 0) {
int ch = 0;
@ -485,6 +490,38 @@ lookup_cclass(char **start)
return result;
}
static CCLASS *
get_cclass(char *start, char **next)
{
CCLASS *result = 0;
if (start[0] == '['
&& start[1] == ':') {
result = lookup_cclass(&start);
if (next != 0) {
*next = start;
}
}
return result;
}
/*
* Check if we're pointing to a left square-bracket. If so, return nonzero
* if that is a literal one, not part of character class, etc.
*
* http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html#tag_09_03_05
*/
static int
literal_leftsq(char *start)
{
int result = 0;
if (start[0] == '[') {
if (get_cclass(start, 0) == 0)
result = 1;
}
return result;
}
/* build a BV for a character class.
*start points at the '['
on exit: *start points at the character after ']'
@ -506,15 +543,16 @@ do_class(char **start, MACHINE * mp)
/* []...] puts ] in a class
[^]..] negates a class with ]
*/
if (*p == ']')
if (literal_leftsq(p) || p[0] == ']')
p++;
else if (*p == '^' && *(p + 1) == ']')
else if (p[0] == '^' && (literal_leftsq(p + 1) || p[1] == ']'))
p += 2;
for (level = 0, q = p; (level != 0) || (*q != ']'); ++q) {
if (*q == '[') {
if (q[1] != ':' || ++level > 1)
if (q[1] != ':' || ++level > 1) {
RE_error_trap(-E3);
}
} else if (*q == ']') {
if (level > 0) {
if (q[-1] != ':')
@ -554,7 +592,7 @@ do_class(char **start, MACHINE * mp)
break;
case '[':
if (p[1] == ':' && (cclass = lookup_cclass(&p)) != 0) {
if ((cclass = get_cclass(p, &p)) != 0) {
while (cclass->first >= 0) {
block_on(*bvp, cclass->first, cclass->last);
++cclass;

17
scan.c
View File

@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: scan.c,v 1.11 2009/07/27 20:39:41 tom Exp $
* $MawkId: scan.c,v 1.12 2009/09/17 09:35:28 tom Exp $
* @Log: scan.c,v @
* Revision 1.8 1996/07/28 21:47:05 mike
* gnuish patch
@ -1003,7 +1003,8 @@ collect_RE(void)
string_buff);
mawk_exit(2);
}
switch (scan_code[(UChar) (*p++ = (char) next())]) {
c = (UChar) (*p++ = (char) next());
switch (scan_code[c]) {
case SC_POW:
if (p == first + 1) {
first = p;
@ -1016,9 +1017,17 @@ collect_RE(void)
* started, so we can make comparisons to handle things like
* "[]xxxx]" and "[^]xxxx]".
*/
if (!boxed)
if (!boxed) {
first = p;
++boxed;
++boxed;
} else if (p != first + 1) {
++boxed;
} else {
if (next() == ':') {
++boxed;
}
un_next();
}
break;
case SC_RBOX:

View File

@ -4,92 +4,151 @@
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: to >>
reg4.5<<: to >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: Some care is needed so that things like >>
reg4.5<<: Some care is needed so that things like >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: static unsigned last_dhash ; >>
reg4.5<<: static unsigned last_dhash ; >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: ARRAY A ; >>
reg4.5<<: ARRAY A ; >>
reg4.3<<: STRING *sval ; >>
reg4.5<<: STRING *sval ; >>
reg4.3<<: { >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: { >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: } >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: { >>
reg4.3<<: else >>
reg4.5<<: else >>
reg4.3<<: } >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: } >>
reg4.3<<: return p ; >>
reg4.5<<: return p ; >>
reg4.3<<: } >>
reg4.4<<: } >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: ARRAY A ; >>
reg4.5<<: ARRAY A ; >>
reg4.3<<: double d ; >>
reg4.5<<: double d ; >>
reg4.3<<: int cflag ; >>
reg4.5<<: int cflag ; >>
reg4.3<<: { >>
reg4.4<<: { >>
reg4.3<<: ANODE *ap ; >>
reg4.5<<: ANODE *ap ; >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.3<<: break ; >>
reg4.5<<: break ; >>
reg4.3<<: } >>
reg4.3<<: } >>
reg4.3<<: else >>
reg4.5<<: else >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
reg4.1<<: >>
reg4.2<<: >>
reg4.3<<: >>
reg4.4<<: >>
reg4.5<<: >>
26..12: each array is of size A_HASH_PRIME.
reg5.1<<A_HASH_PRIME>>
26..12: each array is of size A_HASH_PRIME.

View File

@ -1,15 +1,23 @@
# $MawkId: reg4.awk,v 1.4 2009/07/12 22:23:58 tom Exp $
# $MawkId: reg4.awk,v 1.7 2009/09/17 23:29:01 tom Exp $
{
if ($0 ~/^[-+()0-9.,$%/'"]*$/)
{
{
print ("reg4.1<<:",$0,">>")
}
if ($0 ~/^[]+()0-9.,$%/'"-]*$/)
{
{
print ("reg4.2<<:",$0,">>")
}
if ($0 ~/^[^]+()0-9.,$%/'"-]*$/)
{
{
print ("reg4.3<<:",$0,">>")
}
if ($0 ~/^[[+(){}0-9.,$%/'"-]*$/)
{
print ("reg4.4<<:",$0,">>")
}
if ($0 ~/^[^[+(){}0-9.,$%/'"-]*$/)
{
print ("reg4.5<<:",$0,">>")
}
}

View File

@ -1,4 +1,4 @@
# $MawkId: reg5.awk,v 1.1 2009/07/27 18:55:24 tom Exp $
# $MawkId: reg5.awk,v 1.2 2009/09/17 00:51:34 tom Exp $
BEGIN {
pat1="([[:upper:][:digit:]])+(_[[:upper:][:digit:]]+)+"
pat2="0x[[:xdigit:]]+"
@ -22,4 +22,5 @@ BEGIN {
printf "%d..%d:%s\n", RSTART, RLENGTH, $0
printf ("reg5.3<<%s>>\n",substr($0,RSTART,RLENGTH))
}
# add patterns like those in reg4.awk which exercise [, ] at beginning
}