snapshot of project "mawk", label t20240905

This commit is contained in:
Thomas E. Dickey 2024-09-05 22:57:27 +00:00
parent 28d7bf8a51
commit ad9910e0ba
No known key found for this signature in database
GPG Key ID: CC2AF4472167BE03
32 changed files with 239 additions and 146 deletions

12
CHANGES
View File

@ -1,4 +1,12 @@
-- $MawkId: CHANGES,v 1.391 2024/08/27 23:54:59 tom Exp $
-- $MawkId: CHANGES,v 1.397 2024/09/05 22:57:27 tom Exp $
20240905
+ update manual page discussing approved extensions.
+ mask nonprintable characters when reporting errors in scripts
+ fix for unmatchable pattern workaround in 20240819, in case the regex
also has a syntax error immediately after (report/testcase by Tyler
Hawkins).
+ fix sign-extension in scan.c (report/testcase by Tyler Hawkins).
20240827
+ amend fix for Original Mawk #48, providing for deep function
@ -119,7 +127,7 @@
20230808
+ modify input buffer-resizing to improve performance with very long
longs (report/testcase by Leif LeBaron).
lines (report/testcase by Leif LeBaron).
20230804
> fixes for nulls (patches by Miguel Pineiro Jr).

View File

@ -1,4 +1,4 @@
MANIFEST for mawk, version t20240827
MANIFEST for mawk, version t20240905
--------------------------------------------------------------------------------
MANIFEST this file
ACKNOWLEDGMENT acknowledgements

View File

@ -1,4 +1,4 @@
# $MawkId: Makefile.in,v 1.64 2024/08/18 18:17:25 tom Exp $
# $MawkId: Makefile.in,v 1.65 2024/09/05 22:44:14 tom Exp $
# Makefile-template for MAWK
###############################################################################
# copyright 2009-2023,2024 Thomas E. Dickey
@ -170,34 +170,34 @@ $(BINDIR) :
mkdir -p "$@"
# output from makedeps.sh
array.o : array.h bi_vars.h config.h field.h mawk.h memory.h nstd.h sizes.h split.h symtype.h types.h zmalloc.h
array.o : array.h bi_vars.h config.h field.h mawk.h memory.h nstd.h repl.h sizes.h split.h symtype.h types.h zmalloc.h
bi_funct.o : array.h bi_funct.h bi_vars.h config.h field.h files.h fin.h init.h mawk.h memory.h nstd.h regexp.h repl.h rexp.h sizes.h symtype.h types.h zmalloc.h
bi_vars.o : array.h bi_vars.h config.h field.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
bi_vars.o : array.h bi_vars.h config.h field.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
cast.o : array.h config.h field.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
code.o : array.h code.h config.h field.h init.h jmp.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
da.o : array.h bi_funct.h code.h config.h field.h mawk.h memory.h nstd.h regexp.h repl.h rexp.h sizes.h symtype.h types.h zmalloc.h
error.o : array.h bi_vars.h config.h mawk.h nstd.h parse.h scan.h scancode.h sizes.h symtype.h types.h
error.o : array.h bi_vars.h config.h mawk.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h
execute.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h fin.h mawk.h memory.h nstd.h regexp.h repl.h rexp.h sizes.h symtype.h types.h zmalloc.h
fcall.o : array.h code.h config.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
fcall.o : array.h code.h config.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
field.o : array.h bi_vars.h config.h field.h init.h mawk.h memory.h nstd.h parse.h regexp.h repl.h rexp.h scan.h scancode.h sizes.h split.h symtype.h types.h zmalloc.h
files.o : array.h config.h files.h fin.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
fin.o : array.h bi_vars.h config.h field.h fin.h mawk.h memory.h nstd.h parse.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
hash.o : array.h bi_vars.h config.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
init.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
jmp.o : array.h code.h config.h init.h jmp.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
kw.o : array.h config.h init.h mawk.h nstd.h parse.h sizes.h symtype.h types.h
main.o : array.h bi_vars.h code.h config.h files.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
files.o : array.h config.h files.h fin.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
fin.o : array.h bi_vars.h config.h field.h fin.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
hash.o : array.h bi_vars.h config.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
init.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
jmp.o : array.h code.h config.h init.h jmp.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
kw.o : array.h config.h init.h mawk.h nstd.h parse.h repl.h sizes.h symtype.h types.h
main.o : array.h bi_vars.h code.h config.h files.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
makescan.o : config.h nstd.h scancode.h
matherr.o : array.h config.h init.h mawk.h nstd.h sizes.h symtype.h types.h
memory.o : config.h mawk.h memory.h nstd.h sizes.h types.h zmalloc.h
parse.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h jmp.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h
print.o : array.h bi_funct.h bi_vars.h config.h field.h files.h init.h mawk.h memory.h nstd.h parse.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
matherr.o : array.h config.h init.h mawk.h nstd.h repl.h sizes.h symtype.h types.h
memory.o : config.h mawk.h memory.h nstd.h repl.h sizes.h types.h zmalloc.h
parse.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h jmp.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h
print.o : array.h bi_funct.h bi_vars.h config.h field.h files.h init.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
re_cmpl.o : array.h config.h mawk.h memory.h nstd.h parse.h regexp.h repl.h rexp.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
regexp.o : config.h
scan.o : array.h code.h config.h field.h files.h fin.h init.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h
scancode.o : scancode.h
split.o : array.h bi_funct.h bi_vars.h config.h field.h mawk.h memory.h nstd.h parse.h regexp.h repl.h rexp.h scan.h scancode.h sizes.h split.h symtype.h types.h zmalloc.h
trace.o : code.h config.h mawk.h memory.h nstd.h repl.h sizes.h types.h zmalloc.h
version.o : array.h config.h init.h mawk.h nstd.h patchlev.h sizes.h symtype.h types.h
zmalloc.o : config.h mawk.h nstd.h sizes.h types.h zmalloc.h
regexp.o : rexpdb.c rexp4.c rexp2.c regexp_system.c sizes.h mawk.h rexp0.c rexp1.c config.h rexp.h regexp.h nstd.h rexp3.c rexp.c field.h
version.o : array.h config.h init.h mawk.h nstd.h patchlev.h repl.h sizes.h symtype.h types.h
zmalloc.o : config.h mawk.h nstd.h repl.h sizes.h types.h zmalloc.h
regexp.o : rexpdb.c rexp4.c rexp2.c regexp_system.c sizes.h rexp0.c mawk.h rexp1.c config.h rexp.h regexp.h nstd.h repl.h rexp3.c rexp.c field.h

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: bi_funct.c,v 1.132 2024/08/26 08:11:02 tom Exp $
* $MawkId: bi_funct.c,v 1.134 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_ARRAY
@ -30,7 +30,6 @@ the GNU General Public License, version 2, 1991.
#include <fin.h>
#include <field.h>
#include <regexp.h>
#include <repl.h>
#include <ctype.h>
#include <math.h>
@ -88,7 +87,7 @@ const BI_REC bi_funct[] =
{ "strftime", bi_strftime, 0, 3 },
#endif
{ (char *) 0, (PF_CP) 0, 0, 0 }
{ "", (PF_CP) 0, 0, 0 }
};
/* *INDENT-ON* */
@ -99,7 +98,7 @@ bi_funct_init(void)
register const BI_REC *p;
register SYMTAB *stp;
for (p = bi_funct; p->name; p++) {
for (p = bi_funct; p->name[0]; p++) {
stp = insert(p->name);
stp->type = ST_BUILTIN;
stp->stval.bip = p;

3
cast.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: cast.c,v 1.30 2024/08/25 17:09:56 tom Exp $
* $MawkId: cast.c,v 1.31 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_CELL
@ -22,7 +22,6 @@ the GNU General Public License, version 2, 1991.
#include <field.h>
#include <memory.h>
#include <scan.h>
#include <repl.h>
const int mpow2[NUM_CELL_TYPES] =
{1, 2, 4, 8, 16, 32, 64, 128, 256, 512};

3
code.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: code.c,v 1.47 2024/08/25 19:47:39 tom Exp $
* $MawkId: code.c,v 1.48 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_CELL
@ -26,7 +26,6 @@ the GNU General Public License, version 2, 1991.
#include <field.h>
#ifdef NO_LEAKS
#include <repl.h>
#include <scan.h>
#endif

5
da.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: da.c,v 1.51 2024/08/25 19:37:17 tom Exp $
* $MawkId: da.c,v 1.53 2024/09/05 17:44:48 tom Exp $
*/
/* disassemble code */
@ -28,7 +28,6 @@ the GNU General Public License, version 2, 1991.
#include <code.h>
#include <bi_funct.h>
#include <repl.h>
#include <field.h>
typedef struct fdump {
@ -469,7 +468,7 @@ find_bi_name(PF_CP p)
const BI_REC *q;
int i;
for (q = bi_funct; q->name; q++) {
for (q = bi_funct; q->name[0]; q++) {
if (q->fp == p) {
/* found */
return q->name;

21
error.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: error.c,v 1.27 2024/08/25 17:21:36 tom Exp $
* $MawkId: error.c,v 1.28 2024/08/29 00:19:40 tom Exp $
*/
#define Visible_CELL
@ -110,6 +110,9 @@ missing(int c, const char *n, unsigned ln)
s0 = s1 = "";
errmsg(0, "%s%sline %u: missing %c near %s", s0, s1, ln, c, n);
if (++compile_error_count >= MAX_COMPILE_ERRORS)
mawk_exit(2);
}
void
@ -134,7 +137,7 @@ yyerror(const char *s GCC_UNUSED)
if (*ip == current_token) {
missing(')', ss, token_lineno);
paren_cnt = 0;
goto done;
return;
}
if (brace_cnt)
@ -142,16 +145,16 @@ yyerror(const char *s GCC_UNUSED)
if (*ip == current_token) {
missing('}', ss, token_lineno);
brace_cnt = 0;
goto done;
return;
}
compile_error("syntax error at or near %s", ss);
} else /* special cases */
} else { /* special cases */
switch (current_token) {
case UNEXPECTED:
unexpected_char();
goto done;
break;
case BAD_DECIMAL:
compile_error(
@ -169,11 +172,7 @@ yyerror(const char *s GCC_UNUSED)
compile_error("syntax error");
break;
}
return;
done:
if (++compile_error_count == MAX_COMPILE_ERRORS)
mawk_exit(2);
}
}
/* generic error message with a hook into the system error
@ -291,6 +290,8 @@ unexpected_char(void)
fprintf(stderr, "unexpected character '%c'\n", c);
else
fprintf(stderr, "unexpected character 0x%02x\n", c);
if (++compile_error_count >= MAX_COMPILE_ERRORS)
mawk_exit(2);
}
const char *

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: execute.c,v 1.61 2024/08/26 23:38:53 tom Exp $
* $MawkId: execute.c,v 1.62 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_ARRAY
@ -32,7 +32,6 @@ the GNU General Public License, version 2, 1991.
#include <bi_funct.h>
#include <bi_vars.h>
#include <regexp.h>
#include <repl.h>
#include <fin.h>
#include <math.h>

18
field.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: field.c,v 1.44 2024/08/25 17:04:08 tom Exp $
* $MawkId: field.c,v 1.46 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_CELL
@ -27,7 +27,6 @@ the GNU General Public License, version 2, 1991.
#include <memory.h>
#include <scan.h>
#include <bi_vars.h>
#include <repl.h>
#include <regexp.h>
/* initial fields and pseudo fields,
@ -92,7 +91,8 @@ static void build_field0(void);
If RS is changed, so is rs_shadow */
SEPARATOR rs_shadow =
{
SEP_CHAR, '\n', NULL
SEP_CHAR, '\n',
{NULL}
};
/* a splitting CELL version of FS */
CELL fs_shadow =
@ -116,7 +116,7 @@ set_rs_shadow(void)
scan_code['\n'] = SC_UNEXPECTED;
if (rs_shadow.type == SEP_STR) {
free_STRING((STRING *) rs_shadow.ptr);
free_STRING(rs_shadow.u.s_ptr);
}
cast_for_split(cellcpy(&c, RS));
@ -128,11 +128,11 @@ set_rs_shadow(void)
rs_shadow.c = s[0];
} else {
rs_shadow.type = SEP_STR;
rs_shadow.ptr = (PTR) new_STRING(s);
rs_shadow.u.s_ptr = new_STRING(s);
}
} else {
rs_shadow.type = SEP_RE;
rs_shadow.ptr = c.ptr;
rs_shadow.u.r_ptr = (RE_NODE *) c.ptr;
}
break;
@ -146,7 +146,7 @@ set_rs_shadow(void)
scan_code['\n'] = SC_SPACE;
rs_shadow.type = SEP_MLR;
sval = new_STRING("\n\n+");
rs_shadow.ptr = re_compile(sval);
rs_shadow.u.r_ptr = re_compile(sval);
free_STRING(sval);
break;
@ -779,10 +779,10 @@ field_leaks(void)
switch (rs_shadow.type) {
case SEP_STR:
free_STRING(((STRING *) (&rs_shadow.ptr)));
free_STRING(rs_shadow.u.s_ptr);
break;
case SEP_RE:
re_destroy(rs_shadow.ptr);
re_destroy(rs_shadow.u.r_ptr);
break;
}
}

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: field.h,v 1.18 2024/08/25 17:06:41 tom Exp $
* $MawkId: field.h,v 1.20 2024/09/05 17:44:48 tom Exp $
*/
/* field.h */
@ -80,7 +80,10 @@ typedef struct _separator
{
char type;
char c;
PTR ptr; /* STRING* or RE machine* */
union {
STRING *s_ptr;
RE_NODE *r_ptr;
} u;
}
#endif
SEPARATOR;

8
fin.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: fin.c,v 1.56 2024/08/25 17:04:35 tom Exp $
* $MawkId: fin.c,v 1.57 2024/09/05 17:38:30 tom Exp $
*/
#define Visible_CELL
@ -285,13 +285,13 @@ FINgets(FIN * fin, size_t *len_p)
case SEP_STR:
q = str_str(p,
(size_t) (fin->limit - p),
((STRING *) rs_shadow.ptr)->str,
match_len = ((STRING *) rs_shadow.ptr)->len);
rs_shadow.u.s_ptr->str,
match_len = (rs_shadow.u.s_ptr)->len);
break;
case SEP_MLR:
case SEP_RE:
q = re_pos_match(p, (size_t) (fin->limit - p), rs_shadow.ptr,
q = re_pos_match(p, (size_t) (fin->limit - p), rs_shadow.u.r_ptr,
&match_len,
(p != fin->buff) ||
(fin->flags & FIN_FLAG));

4
init.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: init.c,v 1.81 2024/08/25 18:27:39 tom Exp $
* $MawkId: init.c,v 1.82 2024/09/04 22:21:58 tom Exp $
*/
#define Visible_ARRAY
@ -170,7 +170,7 @@ ok_abbrev(const char *fullName, const char *partName, int partLen)
UChar ch = (UChar) partName[n];
if (isalpha(ch))
ch = (UChar) toupper(ch);
if (ch != (UChar) toupper(fullName[n])) {
if (ch != (UChar) toupper((UChar) fullName[n])) {
result = 0;
break;
}

3
main.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: main.c,v 1.33 2024/08/25 17:02:09 tom Exp $
* $MawkId: main.c,v 1.34 2024/09/04 20:28:21 tom Exp $
*/
#define Visible_CELL
@ -76,6 +76,7 @@ main(int argc, char **argv)
void
mawk_exit(int x)
{
TRACE(("mawk_exit(%d)\n", x));
#ifdef HAVE_REAL_PIPES
close_out_pipes(); /* actually closes all output */
#else

View File

@ -1,7 +1,7 @@
#!/bin/sh
# $MawkId: makedeps.sh,v 1.3 2023/10/31 23:04:46 tom Exp $
# $MawkId: makedeps.sh,v 1.4 2024/09/05 22:45:32 tom Exp $
###############################################################################
# copyright 2009-2010,2023 Thomas E. Dickey
# copyright 2009-2023,2024 Thomas E. Dickey
#
# This is a source file for mawk, an implementation of
# the AWK programming language.
@ -43,6 +43,11 @@ grep -E 'include.*\.c"' regexp.c |
-e 's/\.c/\\.o/' \
-e 's/"/\/ { AddDeps(); next; }/' \
>>makedeps.awk
grep -E 'include.*\.c>' regexp.c |
sed -e 's/^#[^<]*</\/^/' \
-e 's/\.c/\\.o/' \
-e 's/>/\/ { AddDeps(); next; }/' \
>>makedeps.awk
cat >>makedeps.awk <<'EOF'
{ print; }

View File

@ -1,4 +1,4 @@
.\" $MawkId: mawk.1,v 1.64 2024/01/23 22:05:53 tom Exp $
.\" $MawkId: mawk.1,v 1.67 2024/09/05 22:23:55 tom Exp $
.\" ###########################################################################
.\" # copyright 2008-2023,2024, Thomas E. Dickey
.\" # copyright 1996, Michael D. Brennan
@ -11,7 +11,7 @@
.\" ###########################################################################
.ds N Mawk
.ds n mawk
.TH MAWK 1 2024-01-23 "Version 1.3.4" "User commands"
.TH MAWK 1 2024-09-05 "Version 1.3.4" "User commands"
.\" strings
.ds ex \fIexpr\fR
.\" Bulleted paragraph
@ -1788,14 +1788,21 @@ across implementations.
.PP
Some features were not part of the POSIX standard until long after
their introduction in \fB\*n\fP and other implementations.
These have been approved, though still (as of July 2020),
are not part of a published standard:
These were published in IEEE 1003.1-2024
(The Open Group Base Specifications Issue 8):
.bP
The built-in
.B fflush
first appeared in a 1993 AT&T awk released to netlib.
It was approved for the POSIX standard in 2012.
.bP
The built-in
.B nextfile
first appeared in gawk in 1988,
was adopted by BWK in 1996,
and by mawk in 2012.
It was approved for the POSIX standard in 2012.
.bP
Aggregate deletion with
.B delete
.I array
@ -1818,11 +1825,6 @@ can call \fBsrand\fP at startup with no parameter
this feature may be suppressed using conditional compilation.
.
.SS "Extensions added for compatibility for GAWK and BWK"
.B Nextfile
is a \fBgawk\fP extension (also implemented by BWK awk).
It was approved for the POSIX standard in September 2012,
and is expected to be part of the next revision of the standard.
.PP
.BR Mktime ,
.BR strftime \ and
.B systime
@ -2019,3 +2021,11 @@ It also discusses POSIX requirements for AWK.
\fBmawk-arrays\fP(7) discusses \fB\*n\fP's implementation of arrays.
.PP
\fBmawk-code\fP(7) gives more information on the \fB\-W\ dump\fP option.
.PP
\fIawk \(en pattern scanning and processing language\fP
.br
The Open Group Base Specifications Issue 8
.br
IEEE Std 1003.1-2024
.br
https://pubs.opengroup.org/onlinepubs/9799919799/utilities/awk.html

View File

@ -1005,13 +1005,17 @@ CCOOMMPPAATTIIBBIILLIITTYY
currently this use is not portable across implementations.
Some features were not part of the POSIX standard until long after
their introduction in mmaawwkk and other implementations. These have been
approved, though still (as of July 2020), are not part of a published
standard:
their introduction in mmaawwkk and other implementations. These were pub-
lished in IEEE 1003.1-2024 (The Open Group Base Specifications Issue
8):
+o The built-in fffflluusshh first appeared in a 1993 AT&T awk released to
netlib. It was approved for the POSIX standard in 2012.
+o The built-in nneexxttffiillee first appeared in gawk in 1988, was adopted
by BWK in 1996, and by mawk in 2012. It was approved for the POSIX
standard in 2012.
+o Aggregate deletion with ddeelleettee _a_r_r_a_y was approved in 2018.
RRaannddoomm nnuummbbeerrss
@ -1029,10 +1033,6 @@ CCOOMMPPAATTIIBBIILLIITTYY
conditional compilation.
EExxtteennssiioonnss aaddddeedd ffoorr ccoommppaattiibbiilliittyy ffoorr GGAAWWKK aanndd BBWWKK
NNeexxttffiillee is a ggaawwkk extension (also implemented by BWK awk). It was ap-
proved for the POSIX standard in September 2012, and is expected to be
part of the next revision of the standard.
MMkkttiimmee, ssttrrffttiimmee and ssyyssttiimmee are ggaawwkk extensions.
The "/dev/stdin" feature was added to mmaawwkk after 1.3.4, for compatibil-
@ -1179,6 +1179,11 @@ SSEEEE AALLSSOO
mmaawwkk--ccooddee(7) gives more information on the --WW dduummpp option.
_a_w_k _- _p_a_t_t_e_r_n _s_c_a_n_n_i_n_g _a_n_d _p_r_o_c_e_s_s_i_n_g _l_a_n_g_u_a_g_e
The Open Group Base Specifications Issue 8
IEEE Std 1003.1-2024
https://pubs.opengroup.org/onlinepubs/9799919799/utilities/awk.html
Version 1.3.4 2024-01-23 MAWK(1)
Version 1.3.4 2024-09-05 MAWK(1)

View File

@ -1005,13 +1005,17 @@ COMPATIBILITY
currently this use is not portable across implementations.
Some features were not part of the POSIX standard until long after
their introduction in mawk and other implementations. These have been
approved, though still (as of July 2020), are not part of a published
standard:
their introduction in mawk and other implementations. These were pub-
lished in IEEE 1003.1-2024 (The Open Group Base Specifications Issue
8):
o The built-in fflush first appeared in a 1993 AT&T awk released to
netlib. It was approved for the POSIX standard in 2012.
o The built-in nextfile first appeared in gawk in 1988, was adopted
by BWK in 1996, and by mawk in 2012. It was approved for the POSIX
standard in 2012.
o Aggregate deletion with delete array was approved in 2018.
Random numbers
@ -1029,10 +1033,6 @@ COMPATIBILITY
conditional compilation.
Extensions added for compatibility for GAWK and BWK
Nextfile is a gawk extension (also implemented by BWK awk). It was ap-
proved for the POSIX standard in September 2012, and is expected to be
part of the next revision of the standard.
Mktime, strftime and systime are gawk extensions.
The "/dev/stdin" feature was added to mawk after 1.3.4, for compatibil-
@ -1179,6 +1179,11 @@ SEE ALSO
mawk-code(7) gives more information on the -W dump option.
awk - pattern scanning and processing language
The Open Group Base Specifications Issue 8
IEEE Std 1003.1-2024
https://pubs.opengroup.org/onlinepubs/9799919799/utilities/awk.html
Version 1.3.4 2024-01-23 MAWK(1)
Version 1.3.4 2024-09-05 MAWK(1)

6
mawk.h
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: mawk.h,v 1.71 2024/08/05 22:18:07 tom Exp $
* $MawkId: mawk.h,v 1.73 2024/09/05 17:21:05 tom Exp $
*/
/* mawk.h */
@ -30,6 +30,7 @@ the GNU General Public License, version 2, 1991.
#include <assert.h>
#include <repl.h>
#include <types.h>
#ifdef HAVE_STDNORETURN_H
@ -165,7 +166,8 @@ extern GCC_NORETURN void mawk_exit(int);
extern void da(INST *, FILE *);
extern INST *da_this(INST *, const INST *, FILE *);
extern char *rm_escape(char *, size_t *);
extern char *re_pos_match(char *, size_t, PTR, size_t *, int);
extern char *re_pos_match(char *, size_t, RE_NODE *, size_t *, int);
extern char *safe_string(char *);
extern int binmode(void);
#ifndef REXP_H

View File

@ -1,3 +1,9 @@
mawk-cur (1.3.4-20240905) unstable; urgency=low
* maintenance updates
-- Thomas E. Dickey <dickey@invisible-island.net> Wed, 28 Aug 2024 04:03:44 -0400
mawk-cur (1.3.4-20240827) unstable; urgency=low
* maintenance updates

View File

@ -2,7 +2,7 @@
# $FreeBSD: head/lang/mawk/Makefile 516890 2019-11-06 14:17:48Z wen $
PORTNAME= mawk
DISTVERSION= 1.3.4.20240827
DISTVERSION= 1.3.4.20240905
CATEGORIES= lang
MASTER_SITES= https://invisible-island.net/archives/${PORTNAME}/ \
https://invisible-mirror.net/archives/${PORTNAME}/

View File

@ -1,9 +1,9 @@
Summary: mawk - pattern scanning and text processing language
%global AppProgram mawk
%global AppVersion 1.3.4
%global AppPatched 20240827
%global AppPatched 20240905
%global MySite https://invisible-island.net
# $MawkId: mawk.spec,v 1.126 2024/08/27 07:48:11 tom Exp $
# $MawkId: mawk.spec,v 1.128 2024/09/05 22:57:27 tom Exp $
Name: %{AppProgram}
Version: %{AppVersion}
Release: %{AppPatched}

View File

@ -11,9 +11,9 @@ the GNU General Public License, version 2, 1991.
*/
/*
* $MawkId: patchlev.h,v 1.153 2024/08/27 07:48:11 tom Exp $
* $MawkId: patchlev.h,v 1.155 2024/09/05 22:57:27 tom Exp $
*/
#define PATCH_BASE 1
#define PATCH_LEVEL 3
#define PATCH_STRING ".4"
#define DATE_STRING "20240827"
#define DATE_STRING "20240905"

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: re_cmpl.c,v 1.37 2024/08/25 17:04:26 tom Exp $
* $MawkId: re_cmpl.c,v 1.41 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_CELL
@ -23,7 +23,6 @@ the GNU General Public License, version 2, 1991.
#include <memory.h>
#include <scan.h>
#include <regexp.h>
#include <repl.h>
/* a list of compiled regular expressions */
static RE_NODE *re_list;
@ -33,7 +32,7 @@ static const char efmt[] = "regular expression compile failed (%s)\n%s";
/* compile a STRING to a regular expression machine.
Search a list of pre-compiled strings first
*/
PTR
RE_NODE *
re_compile(STRING * sval)
{
register RE_NODE *p;
@ -71,10 +70,12 @@ re_compile(STRING * sval)
if (!(p->re.compiled = REcompile(s, sval->len))) {
ZFREE(p);
sval->ref_cnt--;
if (mawk_state == EXECUTION)
rt_error(efmt, REerror(), s);
else { /* compiling */
compile_error(efmt, REerror(), s);
if (mawk_state == EXECUTION) {
rt_error(efmt, REerror(), safe_string(s));
} else { /* compiling */
char *safe = safe_string(s);
compile_error(efmt, REerror(), safe);
free(safe);
return (PTR) 0;
}
}
@ -90,7 +91,7 @@ re_compile(STRING * sval)
if (dump_RE)
REmprint(p->re.compiled, stderr);
#endif
return refRE_DATA(p->re);
return p;
}
/* this is only used by da() */

5
repl.h
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: repl.h,v 1.13 2024/08/25 17:15:43 tom Exp $
* $MawkId: repl.h,v 1.14 2024/09/05 17:24:56 tom Exp $
*/
/* repl.h */
@ -47,9 +47,8 @@ RE_NODE;
#define isAnchored(ptr) (((RE_DATA *)(ptr))->anchored)
#define isEmpty_RE(ptr) (((RE_DATA *)(ptr))->is_empty)
#define cast_to_re(ptr) (((RE_DATA *)(ptr))->compiled)
#define refRE_DATA(re) ((PTR) &(re))
PTR re_compile(STRING *);
RE_NODE *re_compile(STRING *);
STRING *re_uncompile(PTR);
CELL *repl_compile(STRING *);

34
rexp.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp.c,v 1.48 2024/08/25 21:19:50 tom Exp $
* $MawkId: rexp.c,v 1.51 2024/09/04 23:02:39 tom Exp $
*/
/* op precedence parser for regular expressions */
@ -23,14 +23,14 @@ the GNU General Public License, version 2, 1991.
int REerrno;
const char *const REerrlist[] =
{(char *) 0,
/* 1 */ "missing '('",
/* 2 */ "missing ')'",
/* 3 */ "bad class -- [], [^] or [",
/* 4 */ "missing operand",
/* 5 */ "resource exhaustion -- regular expression too large",
/* 6 */ "syntax error ^* or ^+",
/* 7 */ "bad interval expression",
/* 8 */ ""
/* ERR_1 */ "missing '('",
/* ERR_2 */ "missing ')'",
/* ERR_3 */ "bad class -- [], [^] or [",
/* ERR_4 */ "missing operand",
/* ERR_5 */ "resource exhaustion -- regular expression too large",
/* ERR_6 */ "syntax error ^* or ^+",
/* ERR_7 */ "bad interval expression",
/* ERR_8 */ ""
};
/* ERR_5 is very unlikely to occur */
@ -108,6 +108,7 @@ token_name(int token)
void
RE_error_trap(int x)
{
TRACE(("RE_error_trap(%d)\n", x));
REerrno = x;
longjmp(err_buf, 1);
}
@ -130,6 +131,7 @@ REcompile(char *re, size_t len)
register OPS *op_ptr;
register int t;
TRACE(("REcompile %.*s\n", (int) len, re));
/* do this first because it also checks if we have a
run time stack */
RE_lex_init(re, len);
@ -152,6 +154,7 @@ REcompile(char *re, size_t len)
op_ptr->token = 0;
t = RE_lex(m_stack(0));
memset(m_ptr, 0, sizeof(*m_ptr));
while (1) {
TRACE(("RE_lex token %s\n", token_name(t)));
@ -177,6 +180,8 @@ REcompile(char *re, size_t len)
* convert m{3,10} to mmm* with a limit of 10
*/
TRACE(("interval {%ld,%ld}\n", (long) intrvalmin, (long) intrvalmax));
if ((m_ptr - m_array) < STACKSZ)
memset(m_ptr + 1, 0, sizeof(*m_ptr));
if (intrvalmin == 0) { /* zero or more */
switch (intrvalmax) {
case 0:
@ -244,14 +249,20 @@ REcompile(char *re, size_t len)
RE_poscl_limit(m_ptr, intrvalmin, intrvalmax);
TRACE(("RE_lex token %s\n", token_name(T_PLUS)));
#endif
} else { /* n or more */
} else if (m_ptr->start != 0) { /* n or more */
register Int i;
/* copy 2 copies of m_ptr, use 2nd copy to replace
the first copy that gets swallowed by concat */
MACHINE *result_mp = m_ptr;
MACHINE *concat_mp = (m_ptr + 1);
MACHINE *new_mp = (m_ptr + 2);
TRACE(("calling duplicate_m result_mp %ld -> concat_mp %ld\n",
result_mp - m_array,
concat_mp - m_array));
duplicate_m(concat_mp, result_mp);
TRACE(("calling duplicate_m result_mp %ld -> new_mp %ld\n",
result_mp - m_array,
new_mp - m_array));
duplicate_m(new_mp, result_mp);
for (i = 2; i <= intrvalmin; i++) {
RE_cat(result_mp, concat_mp);
@ -427,6 +438,9 @@ void
duplicate_m(MACHINE * newmp, MACHINE * oldmp)
{
register STATE *p;
TRACE(("duplicate_m %p -> %p\n", oldmp, newmp));
TRACE(("...start %p\n", oldmp->start));
TRACE(("...stop %p\n", oldmp->stop));
p = (STATE *) RE_malloc(2 * STATESZ);
RE_copy_states(p, oldmp->start, 2);
newmp->start = (STATE *) p;

View File

@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp2.c,v 1.45 2024/08/25 17:16:24 tom Exp $
* $MawkId: rexp2.c,v 1.46 2024/09/05 17:44:48 tom Exp $
*/
/* test a string against a machine */
@ -450,7 +450,6 @@ REtest(char *str, /* string to test */
#undef push
#include <field.h>
#include <repl.h>
char *
is_string_split(PTR q, size_t * lenp)

View File

@ -10,10 +10,9 @@ the GNU General Public License, version 2, 1991.
*/
/*
* $MawkId: rexp4.c,v 1.11 2024/08/25 17:16:24 tom Exp $
* $MawkId: rexp4.c,v 1.12 2024/09/05 17:44:48 tom Exp $
*/
#include <field.h>
#include <repl.h>
char *
is_string_split(PTR q, size_t *lenp)

76
scan.c
View File

@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: scan.c,v 1.59 2024/08/25 17:01:57 tom Exp $
* $MawkId: scan.c,v 1.66 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_ARRAY
@ -31,7 +31,6 @@ the GNU General Public License, version 2, 1991.
#include <field.h>
#include <init.h>
#include <fin.h>
#include <repl.h>
#include <code.h>
#ifdef HAVE_FCNTL_H
@ -48,8 +47,13 @@ the GNU General Public License, version 2, 1991.
#define ct_ret(x) do { current_token = (x); return scan_scope(current_token); } while (0)
#if OPT_TRACE > 1
static int next(void);
static void un_next(void);
#else
#define next() (*buffp ? *buffp++ : slow_next())
#define un_next() buffp--
#endif
#define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\
else { un_next() ; ct_ret(d) ; }
@ -111,7 +115,8 @@ static struct {
} repair_syms[MAX_REPAIR];
/* use unsigned chars for index into scan_code[] */
#define NextUChar(c) (UChar)(c = (char) next())
#define NextUChar(c) (UChar)(c = next()) /* use if c is not a char */
#define NextChar(c) (UChar)(c = (char) next()) /* use if c is a char */
/* overused tmp buffer */
char string_buff[SPRINTF_LIMIT];
@ -232,7 +237,6 @@ scan_fillbuff(void)
static int
slow_next(void)
{
while (*buffp == 0) {
if (!eof_flag) {
buffp = buffer;
@ -254,9 +258,32 @@ slow_next(void)
}
}
return *buffp++; /* note can un_next() , eof which is zero */
return 0xff & *buffp++; /* note can un_next(), eof which is zero */
}
#if OPT_TRACE > 1
#define SHOW(tag,c) \
TRACE((((c) >= ' ' && (c) <= '~') ? "%s %c\n" : "%s 0x%x\n", tag, c))
static int
next(void)
{
int ch;
if (*buffp != '\0') {
ch = *buffp++;
} else {
ch = slow_next();
}
SHOW("* GET", ch);
return ch;
}
static void
un_next(void)
{
buffp--;
SHOW("UNGET", *buffp);
}
#endif
static void
eat_comment(void)
{
@ -324,8 +351,6 @@ eat_nl(void) /* eat all space including newlines */
/* can't un_next() twice so deal with it */
yylval.ival = '\\';
unexpected_char();
if (++compile_error_count == MAX_COMPILE_ERRORS)
mawk_exit(2);
return;
}
}
@ -724,7 +749,7 @@ yylex(void)
while (1) {
CheckStringSize(p);
c = scan_code[NextUChar(*p++)];
c = scan_code[NextChar(*p++)];
if (c != SC_IDCHAR && c != SC_DIGIT)
break;
}
@ -827,7 +852,7 @@ collect_decimal(int c, int *flag)
if (c == '.') {
last_decimal = p - 1;
CheckStringSize(p);
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
*flag = UNEXPECTED;
yylval.ival = '.';
return 0.0;
@ -835,7 +860,7 @@ collect_decimal(int c, int *flag)
} else {
while (1) {
CheckStringSize(p);
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
break;
}
};
@ -849,7 +874,7 @@ collect_decimal(int c, int *flag)
/* get rest of digits after decimal point */
while (1) {
CheckStringSize(p);
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
break;
}
}
@ -859,7 +884,7 @@ collect_decimal(int c, int *flag)
un_next();
*--p = 0;
} else { /* get the exponent */
if (scan_code[NextUChar(*p)] != SC_DIGIT &&
if (scan_code[NextChar(*p)] != SC_DIGIT &&
*p != '-' && *p != '+') {
/* if we can, undo and try again */
if (buffp - buffer >= 2) {
@ -875,7 +900,7 @@ collect_decimal(int c, int *flag)
p++;
while (1) {
CheckStringSize(p);
if (scan_code[NextUChar(*p++)] != SC_DIGIT) {
if (scan_code[NextChar(*p++)] != SC_DIGIT) {
break;
}
}
@ -1069,6 +1094,23 @@ rm_escape(char *s, size_t *lenp)
return s;
}
char *
safe_string(char *value)
{
char *result = strdup(value);
if (result == NULL) {
result = value;
} else {
char *s;
/* replace nonprintable characters with '@', which is illegal too */
for (s = result; *s != '\0'; ++s) {
if (scan_code[(UChar) * s] == SC_UNEXPECTED)
*s = '@';
}
}
return result;
}
static int
collect_string(void)
{
@ -1079,7 +1121,7 @@ collect_string(void)
while (1) {
CheckStringSize(p);
switch (scan_code[NextUChar(*p++)]) {
switch (scan_code[NextChar(*p++)]) {
case SC_DQUOTE: /* done */
*--p = 0;
goto out;
@ -1091,7 +1133,7 @@ collect_string(void)
case 0: /* unterminated string */
compile_error(
"runaway string constant \"%.10s ...",
string_buff);
safe_string(string_buff));
mawk_exit(2);
case SC_ESCAPE:
@ -1141,7 +1183,7 @@ collect_RE(void)
mawk_exit(2);
}
CheckStringSize(p);
switch (scan_code[NextUChar(c = *p++)]) {
switch (scan_code[NextChar(c = *p++)]) {
case SC_POW:
/* Handle [^]] and [^^] correctly. */
if ((p - 1) == first && first != 0 && first[-1] == '[') {
@ -1198,7 +1240,7 @@ collect_RE(void)
case 0: /* unterminated re */
compile_error(
"runaway regular expression /%.10s ...",
string_buff);
safe_string(string_buff));
mawk_exit(2);
case SC_ESCAPE:

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: split.c,v 1.34 2024/08/25 17:04:18 tom Exp $
* $MawkId: split.c,v 1.36 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_BI_REC
@ -27,7 +27,6 @@ the GNU General Public License, version 2, 1991.
#include <memory.h>
#include <scan.h>
#include <regexp.h>
#include <repl.h>
#include <field.h>
#ifndef SP_SIZE
@ -161,7 +160,7 @@ re_split(char *s, size_t slen, PTR re)
* length of match is returned in *lenp
*/
char *
re_pos_match(char *str, size_t str_len, PTR re, size_t *lenp, int no_bol)
re_pos_match(char *str, size_t str_len, RE_NODE * re, size_t *lenp, int no_bol)
{
const char *end = str + str_len;

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: symtype.h,v 1.29 2024/08/26 08:08:39 tom Exp $
* $MawkId: symtype.h,v 1.30 2024/09/05 11:40:11 tom Exp $
*/
/* types related to symbols are defined here */
@ -29,7 +29,7 @@ typedef unsigned char SYM_TYPE;
typedef struct _bi_rec
#ifdef Visible_BI_REC
{
const char *name;
const char name[12];
PF_CP fp; /* ptr to function that does the builtin */
NUM_ARGS min_args, max_args;
/* info for parser to check correct number of arguments */

View File

@ -10,14 +10,13 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: trace.c,v 1.23 2024/08/25 17:41:21 tom Exp $
* $MawkId: trace.c,v 1.24 2024/09/05 17:44:48 tom Exp $
*/
#define Visible_CELL
#define Visible_STRING
#include <mawk.h>
#include <repl.h>
#include <code.h>
static FILE *trace_fp;