diff --git a/CHANGES b/CHANGES index 7e4e9b6..da41979 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,12 @@ --- $MawkId: CHANGES,v 1.391 2024/08/27 23:54:59 tom Exp $ +-- $MawkId: CHANGES,v 1.397 2024/09/05 22:57:27 tom Exp $ + +20240905 + + update manual page discussing approved extensions. + + mask nonprintable characters when reporting errors in scripts + + fix for unmatchable pattern workaround in 20240819, in case the regex + also has a syntax error immediately after (report/testcase by Tyler + Hawkins). + + fix sign-extension in scan.c (report/testcase by Tyler Hawkins). 20240827 + amend fix for Original Mawk #48, providing for deep function @@ -119,7 +127,7 @@ 20230808 + modify input buffer-resizing to improve performance with very long - longs (report/testcase by Leif LeBaron). + lines (report/testcase by Leif LeBaron). 20230804 > fixes for nulls (patches by Miguel Pineiro Jr). diff --git a/MANIFEST b/MANIFEST index a9cb427..df250c5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,4 +1,4 @@ -MANIFEST for mawk, version t20240827 +MANIFEST for mawk, version t20240905 -------------------------------------------------------------------------------- MANIFEST this file ACKNOWLEDGMENT acknowledgements diff --git a/Makefile.in b/Makefile.in index 8b156e6..d7c0f95 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,4 +1,4 @@ -# $MawkId: Makefile.in,v 1.64 2024/08/18 18:17:25 tom Exp $ +# $MawkId: Makefile.in,v 1.65 2024/09/05 22:44:14 tom Exp $ # Makefile-template for MAWK ############################################################################### # copyright 2009-2023,2024 Thomas E. Dickey @@ -170,34 +170,34 @@ $(BINDIR) : mkdir -p "$@" # output from makedeps.sh -array.o : array.h bi_vars.h config.h field.h mawk.h memory.h nstd.h sizes.h split.h symtype.h types.h zmalloc.h +array.o : array.h bi_vars.h config.h field.h mawk.h memory.h nstd.h repl.h sizes.h split.h symtype.h types.h zmalloc.h bi_funct.o : array.h bi_funct.h bi_vars.h config.h field.h files.h fin.h init.h mawk.h memory.h nstd.h regexp.h repl.h rexp.h sizes.h symtype.h types.h zmalloc.h -bi_vars.o : array.h bi_vars.h config.h field.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h +bi_vars.o : array.h bi_vars.h config.h field.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h cast.o : array.h config.h field.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h code.o : array.h code.h config.h field.h init.h jmp.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h da.o : array.h bi_funct.h code.h config.h field.h mawk.h memory.h nstd.h regexp.h repl.h rexp.h sizes.h symtype.h types.h zmalloc.h -error.o : array.h bi_vars.h config.h mawk.h nstd.h parse.h scan.h scancode.h sizes.h symtype.h types.h +error.o : array.h bi_vars.h config.h mawk.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h execute.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h fin.h mawk.h memory.h nstd.h regexp.h repl.h rexp.h sizes.h symtype.h types.h zmalloc.h -fcall.o : array.h code.h config.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h +fcall.o : array.h code.h config.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h field.o : array.h bi_vars.h config.h field.h init.h mawk.h memory.h nstd.h parse.h regexp.h repl.h rexp.h scan.h scancode.h sizes.h split.h symtype.h types.h zmalloc.h -files.o : array.h config.h files.h fin.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h -fin.o : array.h bi_vars.h config.h field.h fin.h mawk.h memory.h nstd.h parse.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h -hash.o : array.h bi_vars.h config.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h -init.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h -jmp.o : array.h code.h config.h init.h jmp.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h -kw.o : array.h config.h init.h mawk.h nstd.h parse.h sizes.h symtype.h types.h -main.o : array.h bi_vars.h code.h config.h files.h init.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h +files.o : array.h config.h files.h fin.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h +fin.o : array.h bi_vars.h config.h field.h fin.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h +hash.o : array.h bi_vars.h config.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h +init.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h +jmp.o : array.h code.h config.h init.h jmp.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h +kw.o : array.h config.h init.h mawk.h nstd.h parse.h repl.h sizes.h symtype.h types.h +main.o : array.h bi_vars.h code.h config.h files.h init.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h makescan.o : config.h nstd.h scancode.h -matherr.o : array.h config.h init.h mawk.h nstd.h sizes.h symtype.h types.h -memory.o : config.h mawk.h memory.h nstd.h sizes.h types.h zmalloc.h -parse.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h jmp.h mawk.h memory.h nstd.h sizes.h symtype.h types.h zmalloc.h -print.o : array.h bi_funct.h bi_vars.h config.h field.h files.h init.h mawk.h memory.h nstd.h parse.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h +matherr.o : array.h config.h init.h mawk.h nstd.h repl.h sizes.h symtype.h types.h +memory.o : config.h mawk.h memory.h nstd.h repl.h sizes.h types.h zmalloc.h +parse.o : array.h bi_funct.h bi_vars.h code.h config.h field.h files.h jmp.h mawk.h memory.h nstd.h repl.h sizes.h symtype.h types.h zmalloc.h +print.o : array.h bi_funct.h bi_vars.h config.h field.h files.h init.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h re_cmpl.o : array.h config.h mawk.h memory.h nstd.h parse.h regexp.h repl.h rexp.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h regexp.o : config.h scan.o : array.h code.h config.h field.h files.h fin.h init.h mawk.h memory.h nstd.h parse.h repl.h scan.h scancode.h sizes.h symtype.h types.h zmalloc.h scancode.o : scancode.h split.o : array.h bi_funct.h bi_vars.h config.h field.h mawk.h memory.h nstd.h parse.h regexp.h repl.h rexp.h scan.h scancode.h sizes.h split.h symtype.h types.h zmalloc.h trace.o : code.h config.h mawk.h memory.h nstd.h repl.h sizes.h types.h zmalloc.h -version.o : array.h config.h init.h mawk.h nstd.h patchlev.h sizes.h symtype.h types.h -zmalloc.o : config.h mawk.h nstd.h sizes.h types.h zmalloc.h -regexp.o : rexpdb.c rexp4.c rexp2.c regexp_system.c sizes.h mawk.h rexp0.c rexp1.c config.h rexp.h regexp.h nstd.h rexp3.c rexp.c field.h +version.o : array.h config.h init.h mawk.h nstd.h patchlev.h repl.h sizes.h symtype.h types.h +zmalloc.o : config.h mawk.h nstd.h repl.h sizes.h types.h zmalloc.h +regexp.o : rexpdb.c rexp4.c rexp2.c regexp_system.c sizes.h rexp0.c mawk.h rexp1.c config.h rexp.h regexp.h nstd.h repl.h rexp3.c rexp.c field.h diff --git a/bi_funct.c b/bi_funct.c index 672e9f1..60a1b96 100644 --- a/bi_funct.c +++ b/bi_funct.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: bi_funct.c,v 1.132 2024/08/26 08:11:02 tom Exp $ + * $MawkId: bi_funct.c,v 1.134 2024/09/05 17:44:48 tom Exp $ */ #define Visible_ARRAY @@ -30,7 +30,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include #include #include @@ -88,7 +87,7 @@ const BI_REC bi_funct[] = { "strftime", bi_strftime, 0, 3 }, #endif - { (char *) 0, (PF_CP) 0, 0, 0 } + { "", (PF_CP) 0, 0, 0 } }; /* *INDENT-ON* */ @@ -99,7 +98,7 @@ bi_funct_init(void) register const BI_REC *p; register SYMTAB *stp; - for (p = bi_funct; p->name; p++) { + for (p = bi_funct; p->name[0]; p++) { stp = insert(p->name); stp->type = ST_BUILTIN; stp->stval.bip = p; diff --git a/cast.c b/cast.c index 98c607c..02a2904 100644 --- a/cast.c +++ b/cast.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: cast.c,v 1.30 2024/08/25 17:09:56 tom Exp $ + * $MawkId: cast.c,v 1.31 2024/09/05 17:44:48 tom Exp $ */ #define Visible_CELL @@ -22,7 +22,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include const int mpow2[NUM_CELL_TYPES] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512}; diff --git a/code.c b/code.c index 23c5cb9..44dee4c 100644 --- a/code.c +++ b/code.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: code.c,v 1.47 2024/08/25 19:47:39 tom Exp $ + * $MawkId: code.c,v 1.48 2024/09/05 17:44:48 tom Exp $ */ #define Visible_CELL @@ -26,7 +26,6 @@ the GNU General Public License, version 2, 1991. #include #ifdef NO_LEAKS -#include #include #endif diff --git a/da.c b/da.c index 8d1e88f..8d50aee 100644 --- a/da.c +++ b/da.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: da.c,v 1.51 2024/08/25 19:37:17 tom Exp $ + * $MawkId: da.c,v 1.53 2024/09/05 17:44:48 tom Exp $ */ /* disassemble code */ @@ -28,7 +28,6 @@ the GNU General Public License, version 2, 1991. #include #include -#include #include typedef struct fdump { @@ -469,7 +468,7 @@ find_bi_name(PF_CP p) const BI_REC *q; int i; - for (q = bi_funct; q->name; q++) { + for (q = bi_funct; q->name[0]; q++) { if (q->fp == p) { /* found */ return q->name; diff --git a/error.c b/error.c index 28d852a..44cbe84 100644 --- a/error.c +++ b/error.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: error.c,v 1.27 2024/08/25 17:21:36 tom Exp $ + * $MawkId: error.c,v 1.28 2024/08/29 00:19:40 tom Exp $ */ #define Visible_CELL @@ -110,6 +110,9 @@ missing(int c, const char *n, unsigned ln) s0 = s1 = ""; errmsg(0, "%s%sline %u: missing %c near %s", s0, s1, ln, c, n); + + if (++compile_error_count >= MAX_COMPILE_ERRORS) + mawk_exit(2); } void @@ -134,7 +137,7 @@ yyerror(const char *s GCC_UNUSED) if (*ip == current_token) { missing(')', ss, token_lineno); paren_cnt = 0; - goto done; + return; } if (brace_cnt) @@ -142,16 +145,16 @@ yyerror(const char *s GCC_UNUSED) if (*ip == current_token) { missing('}', ss, token_lineno); brace_cnt = 0; - goto done; + return; } compile_error("syntax error at or near %s", ss); - } else /* special cases */ + } else { /* special cases */ switch (current_token) { case UNEXPECTED: unexpected_char(); - goto done; + break; case BAD_DECIMAL: compile_error( @@ -169,11 +172,7 @@ yyerror(const char *s GCC_UNUSED) compile_error("syntax error"); break; } - return; - - done: - if (++compile_error_count == MAX_COMPILE_ERRORS) - mawk_exit(2); + } } /* generic error message with a hook into the system error @@ -291,6 +290,8 @@ unexpected_char(void) fprintf(stderr, "unexpected character '%c'\n", c); else fprintf(stderr, "unexpected character 0x%02x\n", c); + if (++compile_error_count >= MAX_COMPILE_ERRORS) + mawk_exit(2); } const char * diff --git a/execute.c b/execute.c index 44360a9..d4d85a1 100644 --- a/execute.c +++ b/execute.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: execute.c,v 1.61 2024/08/26 23:38:53 tom Exp $ + * $MawkId: execute.c,v 1.62 2024/09/05 17:44:48 tom Exp $ */ #define Visible_ARRAY @@ -32,7 +32,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include #include #include diff --git a/field.c b/field.c index 944457c..3d7e9df 100644 --- a/field.c +++ b/field.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: field.c,v 1.44 2024/08/25 17:04:08 tom Exp $ + * $MawkId: field.c,v 1.46 2024/09/05 17:44:48 tom Exp $ */ #define Visible_CELL @@ -27,7 +27,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include #include /* initial fields and pseudo fields, @@ -92,7 +91,8 @@ static void build_field0(void); If RS is changed, so is rs_shadow */ SEPARATOR rs_shadow = { - SEP_CHAR, '\n', NULL + SEP_CHAR, '\n', + {NULL} }; /* a splitting CELL version of FS */ CELL fs_shadow = @@ -116,7 +116,7 @@ set_rs_shadow(void) scan_code['\n'] = SC_UNEXPECTED; if (rs_shadow.type == SEP_STR) { - free_STRING((STRING *) rs_shadow.ptr); + free_STRING(rs_shadow.u.s_ptr); } cast_for_split(cellcpy(&c, RS)); @@ -128,11 +128,11 @@ set_rs_shadow(void) rs_shadow.c = s[0]; } else { rs_shadow.type = SEP_STR; - rs_shadow.ptr = (PTR) new_STRING(s); + rs_shadow.u.s_ptr = new_STRING(s); } } else { rs_shadow.type = SEP_RE; - rs_shadow.ptr = c.ptr; + rs_shadow.u.r_ptr = (RE_NODE *) c.ptr; } break; @@ -146,7 +146,7 @@ set_rs_shadow(void) scan_code['\n'] = SC_SPACE; rs_shadow.type = SEP_MLR; sval = new_STRING("\n\n+"); - rs_shadow.ptr = re_compile(sval); + rs_shadow.u.r_ptr = re_compile(sval); free_STRING(sval); break; @@ -779,10 +779,10 @@ field_leaks(void) switch (rs_shadow.type) { case SEP_STR: - free_STRING(((STRING *) (&rs_shadow.ptr))); + free_STRING(rs_shadow.u.s_ptr); break; case SEP_RE: - re_destroy(rs_shadow.ptr); + re_destroy(rs_shadow.u.r_ptr); break; } } diff --git a/field.h b/field.h index 62457b9..0c1245e 100644 --- a/field.h +++ b/field.h @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: field.h,v 1.18 2024/08/25 17:06:41 tom Exp $ + * $MawkId: field.h,v 1.20 2024/09/05 17:44:48 tom Exp $ */ /* field.h */ @@ -80,7 +80,10 @@ typedef struct _separator { char type; char c; - PTR ptr; /* STRING* or RE machine* */ + union { + STRING *s_ptr; + RE_NODE *r_ptr; + } u; } #endif SEPARATOR; diff --git a/fin.c b/fin.c index fe03d13..59f2178 100644 --- a/fin.c +++ b/fin.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: fin.c,v 1.56 2024/08/25 17:04:35 tom Exp $ + * $MawkId: fin.c,v 1.57 2024/09/05 17:38:30 tom Exp $ */ #define Visible_CELL @@ -285,13 +285,13 @@ FINgets(FIN * fin, size_t *len_p) case SEP_STR: q = str_str(p, (size_t) (fin->limit - p), - ((STRING *) rs_shadow.ptr)->str, - match_len = ((STRING *) rs_shadow.ptr)->len); + rs_shadow.u.s_ptr->str, + match_len = (rs_shadow.u.s_ptr)->len); break; case SEP_MLR: case SEP_RE: - q = re_pos_match(p, (size_t) (fin->limit - p), rs_shadow.ptr, + q = re_pos_match(p, (size_t) (fin->limit - p), rs_shadow.u.r_ptr, &match_len, (p != fin->buff) || (fin->flags & FIN_FLAG)); diff --git a/init.c b/init.c index 3213a30..7af490f 100644 --- a/init.c +++ b/init.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: init.c,v 1.81 2024/08/25 18:27:39 tom Exp $ + * $MawkId: init.c,v 1.82 2024/09/04 22:21:58 tom Exp $ */ #define Visible_ARRAY @@ -170,7 +170,7 @@ ok_abbrev(const char *fullName, const char *partName, int partLen) UChar ch = (UChar) partName[n]; if (isalpha(ch)) ch = (UChar) toupper(ch); - if (ch != (UChar) toupper(fullName[n])) { + if (ch != (UChar) toupper((UChar) fullName[n])) { result = 0; break; } diff --git a/main.c b/main.c index 0bb2569..2b42362 100644 --- a/main.c +++ b/main.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: main.c,v 1.33 2024/08/25 17:02:09 tom Exp $ + * $MawkId: main.c,v 1.34 2024/09/04 20:28:21 tom Exp $ */ #define Visible_CELL @@ -76,6 +76,7 @@ main(int argc, char **argv) void mawk_exit(int x) { + TRACE(("mawk_exit(%d)\n", x)); #ifdef HAVE_REAL_PIPES close_out_pipes(); /* actually closes all output */ #else diff --git a/makedeps.sh b/makedeps.sh index 59ca5ea..3ea4662 100755 --- a/makedeps.sh +++ b/makedeps.sh @@ -1,7 +1,7 @@ #!/bin/sh -# $MawkId: makedeps.sh,v 1.3 2023/10/31 23:04:46 tom Exp $ +# $MawkId: makedeps.sh,v 1.4 2024/09/05 22:45:32 tom Exp $ ############################################################################### -# copyright 2009-2010,2023 Thomas E. Dickey +# copyright 2009-2023,2024 Thomas E. Dickey # # This is a source file for mawk, an implementation of # the AWK programming language. @@ -43,6 +43,11 @@ grep -E 'include.*\.c"' regexp.c | -e 's/\.c/\\.o/' \ -e 's/"/\/ { AddDeps(); next; }/' \ >>makedeps.awk +grep -E 'include.*\.c>' regexp.c | + sed -e 's/^#[^<]*/\/ { AddDeps(); next; }/' \ + >>makedeps.awk cat >>makedeps.awk <<'EOF' { print; } diff --git a/man/mawk.1 b/man/mawk.1 index c99f5c7..797ff8e 100644 --- a/man/mawk.1 +++ b/man/mawk.1 @@ -1,4 +1,4 @@ -.\" $MawkId: mawk.1,v 1.64 2024/01/23 22:05:53 tom Exp $ +.\" $MawkId: mawk.1,v 1.67 2024/09/05 22:23:55 tom Exp $ .\" ########################################################################### .\" # copyright 2008-2023,2024, Thomas E. Dickey .\" # copyright 1996, Michael D. Brennan @@ -11,7 +11,7 @@ .\" ########################################################################### .ds N Mawk .ds n mawk -.TH MAWK 1 2024-01-23 "Version 1.3.4" "User commands" +.TH MAWK 1 2024-09-05 "Version 1.3.4" "User commands" .\" strings .ds ex \fIexpr\fR .\" Bulleted paragraph @@ -1788,14 +1788,21 @@ across implementations. .PP Some features were not part of the POSIX standard until long after their introduction in \fB\*n\fP and other implementations. -These have been approved, though still (as of July 2020), -are not part of a published standard: +These were published in IEEE 1003.1-2024 +(The Open Group Base Specifications Issue 8): .bP The built-in .B fflush first appeared in a 1993 AT&T awk released to netlib. It was approved for the POSIX standard in 2012. .bP +The built-in +.B nextfile +first appeared in gawk in 1988, +was adopted by BWK in 1996, +and by mawk in 2012. +It was approved for the POSIX standard in 2012. +.bP Aggregate deletion with .B delete .I array @@ -1818,11 +1825,6 @@ can call \fBsrand\fP at startup with no parameter this feature may be suppressed using conditional compilation. . .SS "Extensions added for compatibility for GAWK and BWK" -.B Nextfile -is a \fBgawk\fP extension (also implemented by BWK awk). -It was approved for the POSIX standard in September 2012, -and is expected to be part of the next revision of the standard. -.PP .BR Mktime , .BR strftime \ and .B systime @@ -2019,3 +2021,11 @@ It also discusses POSIX requirements for AWK. \fBmawk-arrays\fP(7) discusses \fB\*n\fP's implementation of arrays. .PP \fBmawk-code\fP(7) gives more information on the \fB\-W\ dump\fP option. +.PP +\fIawk \(en pattern scanning and processing language\fP +.br +The Open Group Base Specifications Issue 8 +.br +IEEE Std 1003.1-2024 +.br +https://pubs.opengroup.org/onlinepubs/9799919799/utilities/awk.html diff --git a/man/mawk.doc b/man/mawk.doc index 579ab8b..a0032f8 100644 --- a/man/mawk.doc +++ b/man/mawk.doc @@ -1005,13 +1005,17 @@ CCOOMMPPAATTIIBBIILLIITTYY currently this use is not portable across implementations. Some features were not part of the POSIX standard until long after - their introduction in mmaawwkk and other implementations. These have been - approved, though still (as of July 2020), are not part of a published - standard: + their introduction in mmaawwkk and other implementations. These were pub- + lished in IEEE 1003.1-2024 (The Open Group Base Specifications Issue + 8): +o The built-in fffflluusshh first appeared in a 1993 AT&T awk released to netlib. It was approved for the POSIX standard in 2012. + +o The built-in nneexxttffiillee first appeared in gawk in 1988, was adopted + by BWK in 1996, and by mawk in 2012. It was approved for the POSIX + standard in 2012. + +o Aggregate deletion with ddeelleettee _a_r_r_a_y was approved in 2018. RRaannddoomm nnuummbbeerrss @@ -1029,10 +1033,6 @@ CCOOMMPPAATTIIBBIILLIITTYY conditional compilation. EExxtteennssiioonnss aaddddeedd ffoorr ccoommppaattiibbiilliittyy ffoorr GGAAWWKK aanndd BBWWKK - NNeexxttffiillee is a ggaawwkk extension (also implemented by BWK awk). It was ap- - proved for the POSIX standard in September 2012, and is expected to be - part of the next revision of the standard. - MMkkttiimmee, ssttrrffttiimmee and ssyyssttiimmee are ggaawwkk extensions. The "/dev/stdin" feature was added to mmaawwkk after 1.3.4, for compatibil- @@ -1179,6 +1179,11 @@ SSEEEE AALLSSOO mmaawwkk--ccooddee(7) gives more information on the --WW dduummpp option. + _a_w_k _- _p_a_t_t_e_r_n _s_c_a_n_n_i_n_g _a_n_d _p_r_o_c_e_s_s_i_n_g _l_a_n_g_u_a_g_e + The Open Group Base Specifications Issue 8 + IEEE Std 1003.1-2024 + https://pubs.opengroup.org/onlinepubs/9799919799/utilities/awk.html -Version 1.3.4 2024-01-23 MAWK(1) + +Version 1.3.4 2024-09-05 MAWK(1) diff --git a/man/mawk.txt b/man/mawk.txt index 15023f1..a884c75 100644 --- a/man/mawk.txt +++ b/man/mawk.txt @@ -1005,13 +1005,17 @@ COMPATIBILITY currently this use is not portable across implementations. Some features were not part of the POSIX standard until long after - their introduction in mawk and other implementations. These have been - approved, though still (as of July 2020), are not part of a published - standard: + their introduction in mawk and other implementations. These were pub- + lished in IEEE 1003.1-2024 (The Open Group Base Specifications Issue + 8): o The built-in fflush first appeared in a 1993 AT&T awk released to netlib. It was approved for the POSIX standard in 2012. + o The built-in nextfile first appeared in gawk in 1988, was adopted + by BWK in 1996, and by mawk in 2012. It was approved for the POSIX + standard in 2012. + o Aggregate deletion with delete array was approved in 2018. Random numbers @@ -1029,10 +1033,6 @@ COMPATIBILITY conditional compilation. Extensions added for compatibility for GAWK and BWK - Nextfile is a gawk extension (also implemented by BWK awk). It was ap- - proved for the POSIX standard in September 2012, and is expected to be - part of the next revision of the standard. - Mktime, strftime and systime are gawk extensions. The "/dev/stdin" feature was added to mawk after 1.3.4, for compatibil- @@ -1179,6 +1179,11 @@ SEE ALSO mawk-code(7) gives more information on the -W dump option. + awk - pattern scanning and processing language + The Open Group Base Specifications Issue 8 + IEEE Std 1003.1-2024 + https://pubs.opengroup.org/onlinepubs/9799919799/utilities/awk.html -Version 1.3.4 2024-01-23 MAWK(1) + +Version 1.3.4 2024-09-05 MAWK(1) diff --git a/mawk.h b/mawk.h index 132da48..13f6ace 100644 --- a/mawk.h +++ b/mawk.h @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: mawk.h,v 1.71 2024/08/05 22:18:07 tom Exp $ + * $MawkId: mawk.h,v 1.73 2024/09/05 17:21:05 tom Exp $ */ /* mawk.h */ @@ -30,6 +30,7 @@ the GNU General Public License, version 2, 1991. #include +#include #include #ifdef HAVE_STDNORETURN_H @@ -165,7 +166,8 @@ extern GCC_NORETURN void mawk_exit(int); extern void da(INST *, FILE *); extern INST *da_this(INST *, const INST *, FILE *); extern char *rm_escape(char *, size_t *); -extern char *re_pos_match(char *, size_t, PTR, size_t *, int); +extern char *re_pos_match(char *, size_t, RE_NODE *, size_t *, int); +extern char *safe_string(char *); extern int binmode(void); #ifndef REXP_H diff --git a/package/debian/changelog b/package/debian/changelog index d348d46..9a42ef3 100644 --- a/package/debian/changelog +++ b/package/debian/changelog @@ -1,3 +1,9 @@ +mawk-cur (1.3.4-20240905) unstable; urgency=low + + * maintenance updates + + -- Thomas E. Dickey Wed, 28 Aug 2024 04:03:44 -0400 + mawk-cur (1.3.4-20240827) unstable; urgency=low * maintenance updates diff --git a/package/freebsd/Makefile b/package/freebsd/Makefile index 08e9201..85dd77b 100644 --- a/package/freebsd/Makefile +++ b/package/freebsd/Makefile @@ -2,7 +2,7 @@ # $FreeBSD: head/lang/mawk/Makefile 516890 2019-11-06 14:17:48Z wen $ PORTNAME= mawk -DISTVERSION= 1.3.4.20240827 +DISTVERSION= 1.3.4.20240905 CATEGORIES= lang MASTER_SITES= https://invisible-island.net/archives/${PORTNAME}/ \ https://invisible-mirror.net/archives/${PORTNAME}/ diff --git a/package/mawk.spec b/package/mawk.spec index 14f7ba1..b65aaee 100644 --- a/package/mawk.spec +++ b/package/mawk.spec @@ -1,9 +1,9 @@ Summary: mawk - pattern scanning and text processing language %global AppProgram mawk %global AppVersion 1.3.4 -%global AppPatched 20240827 +%global AppPatched 20240905 %global MySite https://invisible-island.net -# $MawkId: mawk.spec,v 1.126 2024/08/27 07:48:11 tom Exp $ +# $MawkId: mawk.spec,v 1.128 2024/09/05 22:57:27 tom Exp $ Name: %{AppProgram} Version: %{AppVersion} Release: %{AppPatched} diff --git a/patchlev.h b/patchlev.h index 2db60b9..d68673d 100644 --- a/patchlev.h +++ b/patchlev.h @@ -11,9 +11,9 @@ the GNU General Public License, version 2, 1991. */ /* - * $MawkId: patchlev.h,v 1.153 2024/08/27 07:48:11 tom Exp $ + * $MawkId: patchlev.h,v 1.155 2024/09/05 22:57:27 tom Exp $ */ #define PATCH_BASE 1 #define PATCH_LEVEL 3 #define PATCH_STRING ".4" -#define DATE_STRING "20240827" +#define DATE_STRING "20240905" diff --git a/re_cmpl.c b/re_cmpl.c index dfa9d41..7098c09 100644 --- a/re_cmpl.c +++ b/re_cmpl.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: re_cmpl.c,v 1.37 2024/08/25 17:04:26 tom Exp $ + * $MawkId: re_cmpl.c,v 1.41 2024/09/05 17:44:48 tom Exp $ */ #define Visible_CELL @@ -23,7 +23,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include /* a list of compiled regular expressions */ static RE_NODE *re_list; @@ -33,7 +32,7 @@ static const char efmt[] = "regular expression compile failed (%s)\n%s"; /* compile a STRING to a regular expression machine. Search a list of pre-compiled strings first */ -PTR +RE_NODE * re_compile(STRING * sval) { register RE_NODE *p; @@ -71,10 +70,12 @@ re_compile(STRING * sval) if (!(p->re.compiled = REcompile(s, sval->len))) { ZFREE(p); sval->ref_cnt--; - if (mawk_state == EXECUTION) - rt_error(efmt, REerror(), s); - else { /* compiling */ - compile_error(efmt, REerror(), s); + if (mawk_state == EXECUTION) { + rt_error(efmt, REerror(), safe_string(s)); + } else { /* compiling */ + char *safe = safe_string(s); + compile_error(efmt, REerror(), safe); + free(safe); return (PTR) 0; } } @@ -90,7 +91,7 @@ re_compile(STRING * sval) if (dump_RE) REmprint(p->re.compiled, stderr); #endif - return refRE_DATA(p->re); + return p; } /* this is only used by da() */ diff --git a/repl.h b/repl.h index d0fac8c..2d858b8 100644 --- a/repl.h +++ b/repl.h @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: repl.h,v 1.13 2024/08/25 17:15:43 tom Exp $ + * $MawkId: repl.h,v 1.14 2024/09/05 17:24:56 tom Exp $ */ /* repl.h */ @@ -47,9 +47,8 @@ RE_NODE; #define isAnchored(ptr) (((RE_DATA *)(ptr))->anchored) #define isEmpty_RE(ptr) (((RE_DATA *)(ptr))->is_empty) #define cast_to_re(ptr) (((RE_DATA *)(ptr))->compiled) -#define refRE_DATA(re) ((PTR) &(re)) -PTR re_compile(STRING *); +RE_NODE *re_compile(STRING *); STRING *re_uncompile(PTR); CELL *repl_compile(STRING *); diff --git a/rexp.c b/rexp.c index 44843c1..fcb5f0d 100644 --- a/rexp.c +++ b/rexp.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: rexp.c,v 1.48 2024/08/25 21:19:50 tom Exp $ + * $MawkId: rexp.c,v 1.51 2024/09/04 23:02:39 tom Exp $ */ /* op precedence parser for regular expressions */ @@ -23,14 +23,14 @@ the GNU General Public License, version 2, 1991. int REerrno; const char *const REerrlist[] = {(char *) 0, - /* 1 */ "missing '('", - /* 2 */ "missing ')'", - /* 3 */ "bad class -- [], [^] or [", - /* 4 */ "missing operand", - /* 5 */ "resource exhaustion -- regular expression too large", - /* 6 */ "syntax error ^* or ^+", - /* 7 */ "bad interval expression", - /* 8 */ "" + /* ERR_1 */ "missing '('", + /* ERR_2 */ "missing ')'", + /* ERR_3 */ "bad class -- [], [^] or [", + /* ERR_4 */ "missing operand", + /* ERR_5 */ "resource exhaustion -- regular expression too large", + /* ERR_6 */ "syntax error ^* or ^+", + /* ERR_7 */ "bad interval expression", + /* ERR_8 */ "" }; /* ERR_5 is very unlikely to occur */ @@ -108,6 +108,7 @@ token_name(int token) void RE_error_trap(int x) { + TRACE(("RE_error_trap(%d)\n", x)); REerrno = x; longjmp(err_buf, 1); } @@ -130,6 +131,7 @@ REcompile(char *re, size_t len) register OPS *op_ptr; register int t; + TRACE(("REcompile %.*s\n", (int) len, re)); /* do this first because it also checks if we have a run time stack */ RE_lex_init(re, len); @@ -152,6 +154,7 @@ REcompile(char *re, size_t len) op_ptr->token = 0; t = RE_lex(m_stack(0)); + memset(m_ptr, 0, sizeof(*m_ptr)); while (1) { TRACE(("RE_lex token %s\n", token_name(t))); @@ -177,6 +180,8 @@ REcompile(char *re, size_t len) * convert m{3,10} to mmm* with a limit of 10 */ TRACE(("interval {%ld,%ld}\n", (long) intrvalmin, (long) intrvalmax)); + if ((m_ptr - m_array) < STACKSZ) + memset(m_ptr + 1, 0, sizeof(*m_ptr)); if (intrvalmin == 0) { /* zero or more */ switch (intrvalmax) { case 0: @@ -244,14 +249,20 @@ REcompile(char *re, size_t len) RE_poscl_limit(m_ptr, intrvalmin, intrvalmax); TRACE(("RE_lex token %s\n", token_name(T_PLUS))); #endif - } else { /* n or more */ + } else if (m_ptr->start != 0) { /* n or more */ register Int i; /* copy 2 copies of m_ptr, use 2nd copy to replace the first copy that gets swallowed by concat */ MACHINE *result_mp = m_ptr; MACHINE *concat_mp = (m_ptr + 1); MACHINE *new_mp = (m_ptr + 2); + TRACE(("calling duplicate_m result_mp %ld -> concat_mp %ld\n", + result_mp - m_array, + concat_mp - m_array)); duplicate_m(concat_mp, result_mp); + TRACE(("calling duplicate_m result_mp %ld -> new_mp %ld\n", + result_mp - m_array, + new_mp - m_array)); duplicate_m(new_mp, result_mp); for (i = 2; i <= intrvalmin; i++) { RE_cat(result_mp, concat_mp); @@ -427,6 +438,9 @@ void duplicate_m(MACHINE * newmp, MACHINE * oldmp) { register STATE *p; + TRACE(("duplicate_m %p -> %p\n", oldmp, newmp)); + TRACE(("...start %p\n", oldmp->start)); + TRACE(("...stop %p\n", oldmp->stop)); p = (STATE *) RE_malloc(2 * STATESZ); RE_copy_states(p, oldmp->start, 2); newmp->start = (STATE *) p; diff --git a/rexp2.c b/rexp2.c index 6f46b8c..7eea9d9 100644 --- a/rexp2.c +++ b/rexp2.c @@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: rexp2.c,v 1.45 2024/08/25 17:16:24 tom Exp $ + * $MawkId: rexp2.c,v 1.46 2024/09/05 17:44:48 tom Exp $ */ /* test a string against a machine */ @@ -450,7 +450,6 @@ REtest(char *str, /* string to test */ #undef push #include -#include char * is_string_split(PTR q, size_t * lenp) diff --git a/rexp4.c b/rexp4.c index 4841f5d..16139d4 100644 --- a/rexp4.c +++ b/rexp4.c @@ -10,10 +10,9 @@ the GNU General Public License, version 2, 1991. */ /* - * $MawkId: rexp4.c,v 1.11 2024/08/25 17:16:24 tom Exp $ + * $MawkId: rexp4.c,v 1.12 2024/09/05 17:44:48 tom Exp $ */ #include -#include char * is_string_split(PTR q, size_t *lenp) diff --git a/scan.c b/scan.c index e526ddb..024f6cf 100644 --- a/scan.c +++ b/scan.c @@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: scan.c,v 1.59 2024/08/25 17:01:57 tom Exp $ + * $MawkId: scan.c,v 1.66 2024/09/05 17:44:48 tom Exp $ */ #define Visible_ARRAY @@ -31,7 +31,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include #include #ifdef HAVE_FCNTL_H @@ -48,8 +47,13 @@ the GNU General Public License, version 2, 1991. #define ct_ret(x) do { current_token = (x); return scan_scope(current_token); } while (0) +#if OPT_TRACE > 1 +static int next(void); +static void un_next(void); +#else #define next() (*buffp ? *buffp++ : slow_next()) #define un_next() buffp-- +#endif #define test1_ret(c,x,d) if ( next() == (c) ) ct_ret(x) ;\ else { un_next() ; ct_ret(d) ; } @@ -111,7 +115,8 @@ static struct { } repair_syms[MAX_REPAIR]; /* use unsigned chars for index into scan_code[] */ -#define NextUChar(c) (UChar)(c = (char) next()) +#define NextUChar(c) (UChar)(c = next()) /* use if c is not a char */ +#define NextChar(c) (UChar)(c = (char) next()) /* use if c is a char */ /* overused tmp buffer */ char string_buff[SPRINTF_LIMIT]; @@ -232,7 +237,6 @@ scan_fillbuff(void) static int slow_next(void) { - while (*buffp == 0) { if (!eof_flag) { buffp = buffer; @@ -254,9 +258,32 @@ slow_next(void) } } - return *buffp++; /* note can un_next() , eof which is zero */ + return 0xff & *buffp++; /* note can un_next(), eof which is zero */ } +#if OPT_TRACE > 1 +#define SHOW(tag,c) \ + TRACE((((c) >= ' ' && (c) <= '~') ? "%s %c\n" : "%s 0x%x\n", tag, c)) +static int +next(void) +{ + int ch; + if (*buffp != '\0') { + ch = *buffp++; + } else { + ch = slow_next(); + } + SHOW("* GET", ch); + return ch; +} +static void +un_next(void) +{ + buffp--; + SHOW("UNGET", *buffp); +} +#endif + static void eat_comment(void) { @@ -324,8 +351,6 @@ eat_nl(void) /* eat all space including newlines */ /* can't un_next() twice so deal with it */ yylval.ival = '\\'; unexpected_char(); - if (++compile_error_count == MAX_COMPILE_ERRORS) - mawk_exit(2); return; } } @@ -724,7 +749,7 @@ yylex(void) while (1) { CheckStringSize(p); - c = scan_code[NextUChar(*p++)]; + c = scan_code[NextChar(*p++)]; if (c != SC_IDCHAR && c != SC_DIGIT) break; } @@ -827,7 +852,7 @@ collect_decimal(int c, int *flag) if (c == '.') { last_decimal = p - 1; CheckStringSize(p); - if (scan_code[NextUChar(*p++)] != SC_DIGIT) { + if (scan_code[NextChar(*p++)] != SC_DIGIT) { *flag = UNEXPECTED; yylval.ival = '.'; return 0.0; @@ -835,7 +860,7 @@ collect_decimal(int c, int *flag) } else { while (1) { CheckStringSize(p); - if (scan_code[NextUChar(*p++)] != SC_DIGIT) { + if (scan_code[NextChar(*p++)] != SC_DIGIT) { break; } }; @@ -849,7 +874,7 @@ collect_decimal(int c, int *flag) /* get rest of digits after decimal point */ while (1) { CheckStringSize(p); - if (scan_code[NextUChar(*p++)] != SC_DIGIT) { + if (scan_code[NextChar(*p++)] != SC_DIGIT) { break; } } @@ -859,7 +884,7 @@ collect_decimal(int c, int *flag) un_next(); *--p = 0; } else { /* get the exponent */ - if (scan_code[NextUChar(*p)] != SC_DIGIT && + if (scan_code[NextChar(*p)] != SC_DIGIT && *p != '-' && *p != '+') { /* if we can, undo and try again */ if (buffp - buffer >= 2) { @@ -875,7 +900,7 @@ collect_decimal(int c, int *flag) p++; while (1) { CheckStringSize(p); - if (scan_code[NextUChar(*p++)] != SC_DIGIT) { + if (scan_code[NextChar(*p++)] != SC_DIGIT) { break; } } @@ -1069,6 +1094,23 @@ rm_escape(char *s, size_t *lenp) return s; } +char * +safe_string(char *value) +{ + char *result = strdup(value); + if (result == NULL) { + result = value; + } else { + char *s; + /* replace nonprintable characters with '@', which is illegal too */ + for (s = result; *s != '\0'; ++s) { + if (scan_code[(UChar) * s] == SC_UNEXPECTED) + *s = '@'; + } + } + return result; +} + static int collect_string(void) { @@ -1079,7 +1121,7 @@ collect_string(void) while (1) { CheckStringSize(p); - switch (scan_code[NextUChar(*p++)]) { + switch (scan_code[NextChar(*p++)]) { case SC_DQUOTE: /* done */ *--p = 0; goto out; @@ -1091,7 +1133,7 @@ collect_string(void) case 0: /* unterminated string */ compile_error( "runaway string constant \"%.10s ...", - string_buff); + safe_string(string_buff)); mawk_exit(2); case SC_ESCAPE: @@ -1141,7 +1183,7 @@ collect_RE(void) mawk_exit(2); } CheckStringSize(p); - switch (scan_code[NextUChar(c = *p++)]) { + switch (scan_code[NextChar(c = *p++)]) { case SC_POW: /* Handle [^]] and [^^] correctly. */ if ((p - 1) == first && first != 0 && first[-1] == '[') { @@ -1198,7 +1240,7 @@ collect_RE(void) case 0: /* unterminated re */ compile_error( "runaway regular expression /%.10s ...", - string_buff); + safe_string(string_buff)); mawk_exit(2); case SC_ESCAPE: diff --git a/split.c b/split.c index ed8b051..b2b9767 100644 --- a/split.c +++ b/split.c @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: split.c,v 1.34 2024/08/25 17:04:18 tom Exp $ + * $MawkId: split.c,v 1.36 2024/09/05 17:44:48 tom Exp $ */ #define Visible_BI_REC @@ -27,7 +27,6 @@ the GNU General Public License, version 2, 1991. #include #include #include -#include #include #ifndef SP_SIZE @@ -161,7 +160,7 @@ re_split(char *s, size_t slen, PTR re) * length of match is returned in *lenp */ char * -re_pos_match(char *str, size_t str_len, PTR re, size_t *lenp, int no_bol) +re_pos_match(char *str, size_t str_len, RE_NODE * re, size_t *lenp, int no_bol) { const char *end = str + str_len; diff --git a/symtype.h b/symtype.h index 99cd455..508548c 100644 --- a/symtype.h +++ b/symtype.h @@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: symtype.h,v 1.29 2024/08/26 08:08:39 tom Exp $ + * $MawkId: symtype.h,v 1.30 2024/09/05 11:40:11 tom Exp $ */ /* types related to symbols are defined here */ @@ -29,7 +29,7 @@ typedef unsigned char SYM_TYPE; typedef struct _bi_rec #ifdef Visible_BI_REC { - const char *name; + const char name[12]; PF_CP fp; /* ptr to function that does the builtin */ NUM_ARGS min_args, max_args; /* info for parser to check correct number of arguments */ diff --git a/trace.c b/trace.c index 66ca38a..6fdb3fe 100644 --- a/trace.c +++ b/trace.c @@ -10,14 +10,13 @@ the GNU General Public License, version 2, 1991. ********************************************/ /* - * $MawkId: trace.c,v 1.23 2024/08/25 17:41:21 tom Exp $ + * $MawkId: trace.c,v 1.24 2024/09/05 17:44:48 tom Exp $ */ #define Visible_CELL #define Visible_STRING #include -#include #include static FILE *trace_fp;