mirror of
https://github.com/ThomasDickey/mawk-snapshots.git
synced 2026-01-26 19:09:15 +00:00
snapshot of project "mawk", label t20090820b
This commit is contained in:
parent
89129c703f
commit
fb73ac62dc
10
CHANGES
10
CHANGES
@ -1,8 +1,14 @@
|
||||
-- $MawkId: CHANGES,v 1.56 2009/09/14 09:32:45 tom Exp $
|
||||
-- $MawkId: CHANGES,v 1.58 2009/09/16 23:32:59 tom Exp $
|
||||
|
||||
Changes by Thomas E Dickey <dickey@invisible-island.net>
|
||||
|
||||
20090914
|
||||
20090916
|
||||
correct logic in scan.c to handle expression "[[]" (report by Aleksey
|
||||
Cheusov).
|
||||
|
||||
add MAWK_LONG_OPTIONS feature to allow mawk to ignore long options
|
||||
which are not implemented.
|
||||
|
||||
modify built-in regular expression functions to accept embedded nulls.
|
||||
|
||||
modify input reader FINgets() to accept embedded nulls in data read
|
||||
|
||||
2
MANIFEST
2
MANIFEST
@ -1,4 +1,4 @@
|
||||
MANIFEST for mawk, version t20090820a
|
||||
MANIFEST for mawk, version t20090820b
|
||||
--------------------------------------------------------------------------------
|
||||
MANIFEST this file
|
||||
ACKNOWLEDGMENT acknowledgements
|
||||
|
||||
9
cast.c
9
cast.c
@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
|
||||
********************************************/
|
||||
|
||||
/*
|
||||
* $MawkId: cast.c,v 1.8 2009/08/20 23:00:13 tom Exp $
|
||||
* $MawkId: cast.c,v 1.9 2009/09/16 09:29:51 tom Exp $
|
||||
* @Log: cast.c,v @
|
||||
* Revision 1.6 1996/08/11 22:07:50 mike
|
||||
* Fix small bozo in rt_error("overflow converting ...")
|
||||
@ -316,6 +316,12 @@ cast_for_split(CELL * cp)
|
||||
cp->type = C_SPACE;
|
||||
return;
|
||||
} else if (c == 0) {
|
||||
#ifdef LOCAL_REGEXP
|
||||
char temp[1];
|
||||
temp[0] = (char) c;
|
||||
free_STRING(string(cp));
|
||||
cp->ptr = (PTR) new_STRING1(temp, 1);
|
||||
#else
|
||||
/*
|
||||
* A null is not a meta character, but strchr will match it anyway.
|
||||
* For now, there's no reason to compile a null as a regular
|
||||
@ -327,6 +333,7 @@ cast_for_split(CELL * cp)
|
||||
free_STRING(string(cp));
|
||||
cp->ptr = (PTR) new_STRING1(temp, 1);
|
||||
return;
|
||||
#endif
|
||||
} else if (strchr(meta, c)) {
|
||||
xbuff[1] = (char) c;
|
||||
free_STRING(string(cp));
|
||||
|
||||
26
init.c
26
init.c
@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
|
||||
********************************************/
|
||||
|
||||
/*
|
||||
* $MawkId: init.c,v 1.11 2009/08/21 00:53:52 tom Exp $
|
||||
* $MawkId: init.c,v 1.12 2009/09/16 22:32:17 tom Exp $
|
||||
* @Log: init.c,v @
|
||||
* Revision 1.11 1995/08/20 17:35:21 mike
|
||||
* include <stdlib.h> for MSC, needed for environ decl
|
||||
@ -169,6 +169,30 @@ process_cmdline(int argc, char **argv)
|
||||
}
|
||||
/* safe to look at argv[i][2] */
|
||||
|
||||
/*
|
||||
* Check for "long" options and decide how to handle them.
|
||||
*/
|
||||
if (strlen(argv[i]) > 2 && !strncmp(argv[i], "--", 2)) {
|
||||
char *env = getenv("MAWK_LONG_OPTIONS");
|
||||
if (env != 0) {
|
||||
switch (*env) {
|
||||
default:
|
||||
case 'e': /* error */
|
||||
bad_option(argv[i]);
|
||||
break;
|
||||
case 'w': /* warn */
|
||||
errmsg(0, "ignored option: %s", argv[i]);
|
||||
break;
|
||||
case 'i': /* ignore */
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
bad_option(argv[i]);
|
||||
}
|
||||
nextarg = i + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argv[i][2] == 0) {
|
||||
if (i == argc - 1 && argv[i][1] != '-') {
|
||||
if (strchr("WFvf", argv[i][1])) {
|
||||
|
||||
54
rexp0.c
54
rexp0.c
@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
|
||||
********************************************/
|
||||
|
||||
/*
|
||||
* $MawkId: rexp0.c,v 1.13 2009/09/13 22:38:22 tom Exp $
|
||||
* $MawkId: rexp0.c,v 1.16 2009/09/17 22:58:49 tom Exp $
|
||||
* @Log: rexp0.c,v @
|
||||
* Revision 1.5 1996/11/08 15:39:27 mike
|
||||
* While cleaning up block_on, I introduced a bug. Now fixed.
|
||||
@ -84,7 +84,7 @@ static BV *store_bvp(BV *);
|
||||
static const
|
||||
char RE_char2token['|' + 1] =
|
||||
{
|
||||
0, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*07*/
|
||||
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*07*/
|
||||
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*0f*/
|
||||
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*17*/
|
||||
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*1f*/
|
||||
@ -127,6 +127,10 @@ RE_lex(MACHINE * mp)
|
||||
{
|
||||
register int c;
|
||||
|
||||
if ((unsigned) (1 + lp - re_str) >= re_len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (c = char2token((UChar) (*lp))) {
|
||||
case T_PLUS:
|
||||
case T_STAR:
|
||||
@ -269,7 +273,7 @@ do_str(
|
||||
*s++ = (char) c;
|
||||
len = 1;
|
||||
|
||||
while (1) {
|
||||
while ((1 + p - re_str) < (int) re_len) {
|
||||
char *save;
|
||||
|
||||
switch (char2token((UChar) (*p))) {
|
||||
@ -398,8 +402,9 @@ lookup_cclass(char **start)
|
||||
}
|
||||
}
|
||||
|
||||
if (code == CCLASS_NONE)
|
||||
if (code == CCLASS_NONE) {
|
||||
RE_error_trap(-E3);
|
||||
}
|
||||
|
||||
if ((result = cclass_table[item].data) == 0) {
|
||||
int ch = 0;
|
||||
@ -485,6 +490,38 @@ lookup_cclass(char **start)
|
||||
return result;
|
||||
}
|
||||
|
||||
static CCLASS *
|
||||
get_cclass(char *start, char **next)
|
||||
{
|
||||
CCLASS *result = 0;
|
||||
|
||||
if (start[0] == '['
|
||||
&& start[1] == ':') {
|
||||
result = lookup_cclass(&start);
|
||||
if (next != 0) {
|
||||
*next = start;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we're pointing to a left square-bracket. If so, return nonzero
|
||||
* if that is a literal one, not part of character class, etc.
|
||||
*
|
||||
* http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html#tag_09_03_05
|
||||
*/
|
||||
static int
|
||||
literal_leftsq(char *start)
|
||||
{
|
||||
int result = 0;
|
||||
if (start[0] == '[') {
|
||||
if (get_cclass(start, 0) == 0)
|
||||
result = 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* build a BV for a character class.
|
||||
*start points at the '['
|
||||
on exit: *start points at the character after ']'
|
||||
@ -506,15 +543,16 @@ do_class(char **start, MACHINE * mp)
|
||||
/* []...] puts ] in a class
|
||||
[^]..] negates a class with ]
|
||||
*/
|
||||
if (*p == ']')
|
||||
if (literal_leftsq(p) || p[0] == ']')
|
||||
p++;
|
||||
else if (*p == '^' && *(p + 1) == ']')
|
||||
else if (p[0] == '^' && (literal_leftsq(p + 1) || p[1] == ']'))
|
||||
p += 2;
|
||||
|
||||
for (level = 0, q = p; (level != 0) || (*q != ']'); ++q) {
|
||||
if (*q == '[') {
|
||||
if (q[1] != ':' || ++level > 1)
|
||||
if (q[1] != ':' || ++level > 1) {
|
||||
RE_error_trap(-E3);
|
||||
}
|
||||
} else if (*q == ']') {
|
||||
if (level > 0) {
|
||||
if (q[-1] != ':')
|
||||
@ -554,7 +592,7 @@ do_class(char **start, MACHINE * mp)
|
||||
break;
|
||||
|
||||
case '[':
|
||||
if (p[1] == ':' && (cclass = lookup_cclass(&p)) != 0) {
|
||||
if ((cclass = get_cclass(p, &p)) != 0) {
|
||||
while (cclass->first >= 0) {
|
||||
block_on(*bvp, cclass->first, cclass->last);
|
||||
++cclass;
|
||||
|
||||
17
scan.c
17
scan.c
@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
|
||||
********************************************/
|
||||
|
||||
/*
|
||||
* $MawkId: scan.c,v 1.11 2009/07/27 20:39:41 tom Exp $
|
||||
* $MawkId: scan.c,v 1.12 2009/09/17 09:35:28 tom Exp $
|
||||
* @Log: scan.c,v @
|
||||
* Revision 1.8 1996/07/28 21:47:05 mike
|
||||
* gnuish patch
|
||||
@ -1003,7 +1003,8 @@ collect_RE(void)
|
||||
string_buff);
|
||||
mawk_exit(2);
|
||||
}
|
||||
switch (scan_code[(UChar) (*p++ = (char) next())]) {
|
||||
c = (UChar) (*p++ = (char) next());
|
||||
switch (scan_code[c]) {
|
||||
case SC_POW:
|
||||
if (p == first + 1) {
|
||||
first = p;
|
||||
@ -1016,9 +1017,17 @@ collect_RE(void)
|
||||
* started, so we can make comparisons to handle things like
|
||||
* "[]xxxx]" and "[^]xxxx]".
|
||||
*/
|
||||
if (!boxed)
|
||||
if (!boxed) {
|
||||
first = p;
|
||||
++boxed;
|
||||
++boxed;
|
||||
} else if (p != first + 1) {
|
||||
++boxed;
|
||||
} else {
|
||||
if (next() == ':') {
|
||||
++boxed;
|
||||
}
|
||||
un_next();
|
||||
}
|
||||
break;
|
||||
|
||||
case SC_RBOX:
|
||||
|
||||
@ -4,92 +4,151 @@
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: to >>
|
||||
reg4.5<<: to >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: Some care is needed so that things like >>
|
||||
reg4.5<<: Some care is needed so that things like >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: static unsigned last_dhash ; >>
|
||||
reg4.5<<: static unsigned last_dhash ; >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: ARRAY A ; >>
|
||||
reg4.5<<: ARRAY A ; >>
|
||||
reg4.3<<: STRING *sval ; >>
|
||||
reg4.5<<: STRING *sval ; >>
|
||||
reg4.3<<: { >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: { >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: } >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: { >>
|
||||
reg4.3<<: else >>
|
||||
reg4.5<<: else >>
|
||||
reg4.3<<: } >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: } >>
|
||||
reg4.3<<: return p ; >>
|
||||
reg4.5<<: return p ; >>
|
||||
reg4.3<<: } >>
|
||||
reg4.4<<: } >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: ARRAY A ; >>
|
||||
reg4.5<<: ARRAY A ; >>
|
||||
reg4.3<<: double d ; >>
|
||||
reg4.5<<: double d ; >>
|
||||
reg4.3<<: int cflag ; >>
|
||||
reg4.5<<: int cflag ; >>
|
||||
reg4.3<<: { >>
|
||||
reg4.4<<: { >>
|
||||
reg4.3<<: ANODE *ap ; >>
|
||||
reg4.5<<: ANODE *ap ; >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.3<<: break ; >>
|
||||
reg4.5<<: break ; >>
|
||||
reg4.3<<: } >>
|
||||
reg4.3<<: } >>
|
||||
reg4.3<<: else >>
|
||||
reg4.5<<: else >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
reg4.1<<: >>
|
||||
reg4.2<<: >>
|
||||
reg4.3<<: >>
|
||||
reg4.4<<: >>
|
||||
reg4.5<<: >>
|
||||
26..12: each array is of size A_HASH_PRIME.
|
||||
reg5.1<<A_HASH_PRIME>>
|
||||
26..12: each array is of size A_HASH_PRIME.
|
||||
|
||||
@ -1,15 +1,23 @@
|
||||
# $MawkId: reg4.awk,v 1.4 2009/07/12 22:23:58 tom Exp $
|
||||
# $MawkId: reg4.awk,v 1.7 2009/09/17 23:29:01 tom Exp $
|
||||
{
|
||||
if ($0 ~/^[-+()0-9.,$%/'"]*$/)
|
||||
{
|
||||
{
|
||||
print ("reg4.1<<:",$0,">>")
|
||||
}
|
||||
if ($0 ~/^[]+()0-9.,$%/'"-]*$/)
|
||||
{
|
||||
{
|
||||
print ("reg4.2<<:",$0,">>")
|
||||
}
|
||||
if ($0 ~/^[^]+()0-9.,$%/'"-]*$/)
|
||||
{
|
||||
{
|
||||
print ("reg4.3<<:",$0,">>")
|
||||
}
|
||||
if ($0 ~/^[[+(){}0-9.,$%/'"-]*$/)
|
||||
{
|
||||
print ("reg4.4<<:",$0,">>")
|
||||
}
|
||||
if ($0 ~/^[^[+(){}0-9.,$%/'"-]*$/)
|
||||
{
|
||||
print ("reg4.5<<:",$0,">>")
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# $MawkId: reg5.awk,v 1.1 2009/07/27 18:55:24 tom Exp $
|
||||
# $MawkId: reg5.awk,v 1.2 2009/09/17 00:51:34 tom Exp $
|
||||
BEGIN {
|
||||
pat1="([[:upper:][:digit:]])+(_[[:upper:][:digit:]]+)+"
|
||||
pat2="0x[[:xdigit:]]+"
|
||||
@ -22,4 +22,5 @@ BEGIN {
|
||||
printf "%d..%d:%s\n", RSTART, RLENGTH, $0
|
||||
printf ("reg5.3<<%s>>\n",substr($0,RSTART,RLENGTH))
|
||||
}
|
||||
# add patterns like those in reg4.awk which exercise [, ] at beginning
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user