snapshot of project "mawk", label t20241231

This commit is contained in:
Thomas E. Dickey 2024-12-31 15:21:17 +00:00
parent 9d0711d4f8
commit b022461c88
No known key found for this signature in database
GPG Key ID: CC2AF4472167BE03
16 changed files with 537 additions and 391 deletions

View File

@ -1,4 +1,10 @@
-- $MawkId: CHANGES,v 1.412 2024/12/14 17:00:30 tom Exp $
-- $MawkId: CHANGES,v 1.414 2024/12/31 15:21:17 tom Exp $
20241231
+ in-progress changes to improve regex brace expressions, using new
machine codes M_ENTER and M_LOOP.
+ add a T_CAT in compiled regex to fix a panic (report by Dimitar
Dimitrov).
20241214
+ fix stricter gcc15 warnings.

View File

@ -1,4 +1,4 @@
MANIFEST for mawk, version t20241214
MANIFEST for mawk, version t20241231
--------------------------------------------------------------------------------
MANIFEST this file
ACKNOWLEDGMENT acknowledgements

4
mawk.h
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: mawk.h,v 1.76 2024/12/14 21:21:20 tom Exp $
* $MawkId: mawk.h,v 1.77 2024/12/24 16:51:37 tom Exp $
*/
/* mawk.h */
@ -153,6 +153,8 @@ extern Int d_to_I(double);
extern Long d_to_L(double);
extern ULong d_to_UL(double d);
#define NonNull(s) ((s) == NULL ? "<null>" : (s))
#define d_to_i(d) ((int)d_to_I(d))
#define d_to_l(d) ((long)d_to_L(d))

View File

@ -1,3 +1,9 @@
mawk-cur (1.3.4-20241231) unstable; urgency=low
* maintenance updates
-- Thomas E. Dickey <dickey@invisible-island.net> Tue, 31 Dec 2024 06:23:05 -0500
mawk-cur (1.3.4-20241214) unstable; urgency=low
* maintenance updates

View File

@ -2,7 +2,7 @@
# $FreeBSD: head/lang/mawk/Makefile 516890 2019-11-06 14:17:48Z wen $
PORTNAME= mawk
DISTVERSION= 1.3.4.20241214
DISTVERSION= 1.3.4.20241231
CATEGORIES= lang
MASTER_SITES= https://invisible-island.net/archives/${PORTNAME}/ \
https://invisible-mirror.net/archives/${PORTNAME}/

View File

@ -1,9 +1,9 @@
Summary: mawk - pattern scanning and text processing language
%global AppProgram mawk
%global AppVersion 1.3.4
%global AppPatched 20241214
%global AppPatched 20241231
%global MySite https://invisible-island.net
# $MawkId: mawk.spec,v 1.136 2024/12/14 17:00:30 tom Exp $
# $MawkId: mawk.spec,v 1.137 2024/12/31 11:23:05 tom Exp $
Name: %{AppProgram}
Version: %{AppVersion}
Release: %{AppPatched}

View File

@ -11,9 +11,9 @@ the GNU General Public License, version 2, 1991.
*/
/*
* $MawkId: patchlev.h,v 1.163 2024/12/14 17:00:30 tom Exp $
* $MawkId: patchlev.h,v 1.164 2024/12/31 11:23:05 tom Exp $
*/
#define PATCH_BASE 1
#define PATCH_LEVEL 3
#define PATCH_STRING ".4"
#define DATE_STRING "20241214"
#define DATE_STRING "20241231"

View File

@ -10,7 +10,7 @@ Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
*/
/* $MawkId: regexp.c,v 1.16 2024/08/25 17:34:05 tom Exp $ */
/* $MawkId: regexp.c,v 1.17 2024/12/31 15:13:35 tom Exp $ */
#include <config.h>
@ -24,8 +24,18 @@ the GNU General Public License, version 2, 1991.
#define Visible_RT_STATE
#define Visible_STATE
# include <mawk.h>
#define RE_FILL() { goto refill; }
#define RE_CASE() { goto reswitch; }
#define RE_FILL() do { TRACE2((rt_form "refill...\n", rt_args)); goto refill; } while (0)
#define RE_CASE() do { goto reswitch; } while (0)
#define rt_form "[%s@%d] %d:%03d "
#define rt_args __FILE__, __LINE__, \
(int)(run_entry - RE_run_stack_base), \
(int)(m - machine)
#define TR_AT(what) \
TRACE2((rt_form "%s\n", rt_args, what))
# include <rexp.c>
# include <rexpdb.c>
# include <rexp0.c>

178
rexp.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp.c,v 1.53 2024/12/14 12:55:59 tom Exp $
* $MawkId: rexp.c,v 1.56 2024/12/31 12:56:54 tom Exp $
*/
/* op precedence parser for regular expressions */
@ -19,6 +19,10 @@ the GNU General Public License, version 2, 1991.
#include <rexp.h>
#include <regexp.h>
#ifndef FIXME_INTERVAL_LIMITS
#define FIXME_INTERVAL_LIMITS 0 /* =1 for pre-bugfix */
#endif
/* DATA */
int REerrno;
const char *const REerrlist[] =
@ -118,6 +122,91 @@ typedef struct {
int prec;
} OPS;
#ifndef NO_INTERVAL_EXPR
/* duplicate a machine, oldmp into newmp */
static void
duplicate_m(MACHINE * newmp, MACHINE * oldmp)
{
register STATE *p;
TRACE(("duplicate_m %p -> %p\n", (void *) oldmp, (void *) newmp));
TRACE(("...start %p\n", (void *) oldmp->start));
TRACE(("...stop %p\n", (void *) oldmp->stop));
p = (STATE *) RE_malloc(2 * STATESZ);
RE_copy_states(p, oldmp->start, 2);
newmp->start = (STATE *) p;
newmp->stop = (STATE *) (p + 1);
}
static void
RE_set_limit(MACHINE * mp, Int minlimit, Int maxlimit)
{
STATE *p = mp->start;
STATE *q = NULL;
if (p->s_type == M_2JA)
++p;
if (p->s_type == M_SAVE_POS) {
int depth = 0;
STATE *r = p;
do {
switch (r->s_type) {
case M_SAVE_POS:
depth++;
break;
case M_2JC:
case M_LOOP:
if (--depth == 0) {
q = r;
}
break;
case M_ACCEPT:
depth = -1;
break;
}
++r;
} while (depth > 0);
}
if (q != NULL) {
size_t len = (size_t) (mp->stop - mp->start + 2);
int offset = (int) (q - mp->start);
q->s_type = M_LOOP;
q->it_min = minlimit;
q->it_max = maxlimit;
/* reallocate the states, to insert an item at the beginning */
mp->start = (STATE *) RE_realloc(mp->start, len * STATESZ);
mp->stop = mp->start + len - 1;
q = mp->start;
while (--len != 0) {
q[len] = q[len - 1];
}
q->s_type = M_ENTER;
q->s_data.jump = offset + 1;
}
}
/* replace m with m* limited to the max iterations
(variation of m* closure) */
static void
RE_close_limit(MACHINE * mp, Int min_limit, Int max_limit)
{
RE_close(mp);
RE_set_limit(mp, min_limit, max_limit);
}
/* replace m with m+ limited to the max iterations
which is one or more, limited
(variation of m+ positive closure) */
static void
RE_poscl_limit(MACHINE * mp, Int min_limit, Int max_limit)
{
RE_poscl(mp);
RE_set_limit(mp, min_limit, max_limit);
}
#endif /* ! NO_INTERVAL_EXPR */
/* duplicate_m() relies upon copying machines whose size is 1, i.e., atoms */
#define BigMachine(mp) (((mp)->stop - (mp)->start) > 1)
@ -156,8 +245,15 @@ REcompile(char *re, size_t len)
t = RE_lex(m_stack(0));
memset(m_ptr, 0, sizeof(*m_ptr));
/* provide for making the trace a little easier to read by indenting */
#if OPT_TRACE > 1
#define M_FMT(format) "@%d: %*s " format, __LINE__, 4 * ((int) (m_ptr - m_array)), " "
#else
#define M_FMT(format) format
#endif
while (1) {
TRACE(("RE_lex token %s\n", token_name(t)));
TRACE((M_FMT("RE_lex token %s\n"), token_name(t)));
switch (t) {
case T_STR:
case T_ANY:
@ -179,7 +275,7 @@ REcompile(char *re, size_t len)
* convert m{3,} to mmm* (with a limit of MAX_INT)
* convert m{3,10} to mmm* with a limit of 10
*/
TRACE(("interval {%ld,%ld}\n", (long) intrvalmin, (long) intrvalmax));
TRACE((M_FMT("interval {%ld,%ld}\n"), (long) intrvalmin, (long) intrvalmax));
if ((m_ptr - m_array) < STACKSZ)
memset(m_ptr + 1, 0, sizeof(*m_ptr));
if (intrvalmin == 0) { /* zero or more */
@ -227,49 +323,49 @@ REcompile(char *re, size_t len)
RE_free(m_ptr->start);
m_ptr--;
}
TRACE(("RE_lex token %s\n",
TRACE((M_FMT("RE_lex token %s\n"),
"of zero interval is ignored!"));
break;
case 1:
RE_01(m_ptr); /* m{0,1} which is m? */
TRACE(("RE_lex token %s\n", token_name(T_Q)));
TRACE((M_FMT("RE_lex token %s\n"), token_name(T_Q)));
break;
default:
RE_close_limit(m_ptr, intrvalmin, intrvalmax);
TRACE(("RE_lex token %s\n", token_name(T_Q)));
TRACE((M_FMT("RE_lex token %s\n"), token_name(T_Q)));
}
} else if (BigMachine(m_ptr)) {
RE_poscl_limit(m_ptr, intrvalmin, intrvalmax);
#ifdef NO_RI_LOOP_UNROLL
} else if (intrvalmin >= 1) { /* one or more */
RE_poscl_limit(m_ptr, intrvalmin, intrvalmax);
TRACE(("RE_lex token %s\n", token_name(T_PLUS)));
#else
} else if (intrvalmin == 1) { /* one or more */
RE_poscl_limit(m_ptr, intrvalmin, intrvalmax);
TRACE(("RE_lex token %s\n", token_name(T_PLUS)));
#endif
} else if (m_ptr->start != NULL) { /* n or more */
register Int i;
/* copy 2 copies of m_ptr, use 2nd copy to replace
the first copy that gets swallowed by concat */
MACHINE *result_mp = m_ptr;
MACHINE *concat_mp = (m_ptr + 1);
MACHINE *new_mp = (m_ptr + 2);
TRACE(("calling duplicate_m result_mp %ld -> concat_mp %ld\n",
result_mp - m_array,
concat_mp - m_array));
duplicate_m(concat_mp, result_mp);
TRACE(("calling duplicate_m result_mp %ld -> new_mp %ld\n",
result_mp - m_array,
new_mp - m_array));
duplicate_m(new_mp, result_mp);
for (i = 2; i <= intrvalmin; i++) {
RE_cat(result_mp, concat_mp);
duplicate_m(concat_mp, new_mp);
/* loop-unrolling only works if min==max, so that the loops in
* test/match functions can process the whole loop in each
* iteration */
if (FIXME_INTERVAL_LIMITS || intrvalmin == intrvalmax) {
register Int i;
/* copy 2 copies of m_ptr, use 2nd copy to replace
the first copy that gets swallowed by concat */
MACHINE *result_mp = m_ptr;
MACHINE *concat_mp = (m_ptr + 1);
MACHINE *new_mp = (m_ptr + 2);
TRACE((M_FMT("calling duplicate_m result_mp %ld -> concat_mp %ld\n"),
result_mp - m_array,
concat_mp - m_array));
duplicate_m(concat_mp, result_mp);
TRACE((M_FMT("calling duplicate_m result_mp %ld -> new_mp %ld\n"),
result_mp - m_array,
new_mp - m_array));
duplicate_m(new_mp, result_mp);
for (i = 2; i <= intrvalmin; i++) {
RE_cat(result_mp, concat_mp);
duplicate_m(concat_mp, new_mp);
}
/* don't need 2nd copy in new_mp */
RE_free(new_mp->start);
} else {
RE_poscl_limit(m_ptr, intrvalmin, intrvalmax);
}
/* don't need 2nd copy in new_mp */
RE_free(new_mp->start);
}
break;
#endif /* ! NO_INTERVAL_EXPR */
@ -358,7 +454,7 @@ REcompile(char *re, size_t len)
op_ptr->token = t;
} /* end of switch */
if (m_ptr == m_stack(STACKSZ - 1)) {
if (m_ptr >= m_stack(STACKSZ - 1)) {
/*overflow */
RE_error_trap(-ERR_5);
}
@ -431,19 +527,3 @@ REerror(void)
{
return REerrlist[REerrno];
}
#ifndef NO_INTERVAL_EXPR
/* duplicate a machine, oldmp into newmp */
void
duplicate_m(MACHINE * newmp, MACHINE * oldmp)
{
register STATE *p;
TRACE(("duplicate_m %p -> %p\n", (void *) oldmp, (void *) newmp));
TRACE(("...start %p\n", (void *) oldmp->start));
TRACE(("...stop %p\n", (void *) oldmp->stop));
p = (STATE *) RE_malloc(2 * STATESZ);
RE_copy_states(p, oldmp->start, 2);
newmp->start = (STATE *) p;
newmp->stop = (STATE *) (p + 1);
}
#endif /* NO_INTERVAL_EXPR */

136
rexp.h
View File

@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp.h,v 1.44 2024/11/11 20:59:21 tom Exp $
* $MawkId: rexp.h,v 1.45 2024/12/31 11:42:44 tom Exp $
*/
#ifndef REXP_H
@ -46,7 +46,11 @@ typedef enum {
,M_2JA /* optional (undesirable) jump */
,M_2JB /* optional (desirable) jump */
,M_SAVE_POS /* push position onto stack */
,M_2JC /* pop pos'n, optional jump if advanced */
,M_2JC /* pop position, optional jump if advanced */
#ifndef NO_INTERVAL_EXPR
,M_ENTER /* begin counted loop (reset counter) */
,M_LOOP /* end counted loop (update/test counter) */
#endif
,M_ACCEPT /* end of match */
,U_ON /* ...distinct from the preceding */
} MAWK_REGEX;
@ -71,8 +75,8 @@ typedef struct _state
int jump;
} s_data;
#ifndef NO_INTERVAL_EXPR
Int it_min; /* used for s_type == M_2JC */
Int it_max; /* used for s_type == M_2JC */
Int it_min; /* used for s_type == M_LOOP */
Int it_max; /* used for s_type == M_LOOP */
Int it_cnt;
#endif
}
@ -139,8 +143,8 @@ typedef struct _rt_state
STATE *m; /* save the machine ptr */
int u; /* save the u_flag */
char *s; /* save the active string ptr */
int sp; /* size of position stack */
int tp; /* offset to top entry of position stack */
int pos_index; /* index into position stack */
int top_index; /* offset to top entry of position stack */
char *ss; /* save the match start -- only used by REmatch */
}
#endif
@ -157,7 +161,7 @@ typedef struct _rt_pos_entry
/* run time stack frame responsible for removing this node */
int owner;
/* previous node is this - this->prev_offset. See RE_pos_pop() */
/* previous node is this - this->prev_offset. See pos_pop() */
int prev_offset;
}
#endif
@ -184,12 +188,6 @@ extern STATE *RE_poscl(MACHINE *);
extern void RE_01(MACHINE *);
extern GCC_NORETURN void RE_panic(const char *, ...) GCC_PRINTFLIKE(1,2);
#ifndef NO_INTERVAL_EXPR
extern void RE_close_limit(MACHINE *, Int, Int);
extern void RE_poscl_limit(MACHINE *, Int, Int);
extern void duplicate_m(MACHINE *, MACHINE *);
#endif
#ifndef MAWK_H
extern char *str_str(char *, size_t, char *, size_t);
#endif
@ -213,73 +211,57 @@ extern Int intrvalmin;
extern Int intrvalmax;
extern char *re_exp;
#if defined(LOCAL_REGEXP) && defined(REGEXP_INTERNALS)
static /* inline */ RT_POS_ENTRY *
RE_pos_push(RT_POS_ENTRY * head, const RT_STATE * owner, const char *s)
{
head->pos = s;
head->owner = (int) (owner - RE_run_stack_base);
if (++head == RE_pos_stack_limit) {
head = RE_new_pos_stack();
}
head->prev_offset = 1;
return head;
}
static /* inline */ const char *
RE_pos_pop(RT_POS_ENTRY ** head, const RT_STATE * current)
{
RT_POS_ENTRY *prev2 = *head - (*head)->prev_offset;
if (prev2->owner == current - RE_run_stack_base) { /* likely */
/* no need to preserve intervening nodes */
*head = prev2;
} else if (*head == prev2) {
RE_panic("unbalanced M_SAVE_POS and M_2JC");
} else {
(*head)->prev_offset += prev2->prev_offset;
}
return prev2->pos;
}
#ifndef NO_INTERVAL_EXPR
/* reset it_cnt to zero for the M_2JC state
* which is where loop count is checked
*/
static void
RE_init_it_cnt(STATE * s)
{
STATE *p = s;
while (p->s_type < M_ACCEPT) {
if (p->s_type == M_2JC)
p->it_cnt = 0;
p++;
}
}
#if OPT_TRACE
#define if_TRACE(stmt) stmt
#else
#define RE_init_it_cnt(s) /* nothing */
#define if_TRACE(stmt) /*nothing*/
#endif
#ifndef NO_INTERVAL_EXPR
#undef NO_RI_LOOP_UNROLL /* experimental 2020/10/22 -TD */
#ifdef NO_RI_LOOP_UNROLL
#else
static void
RE_set_limit(STATE * s, Int minlimit, Int maxlimit)
{
STATE *p = s;
while (p->s_type < M_ACCEPT) {
if (p->s_type == M_2JC) {
p->it_min = minlimit;
p->it_max = maxlimit;
}
p++;
}
}
#endif /* ! NO_RI_LOOP_UNROLL */
#endif /* ! NO_INTERVAL_EXPR */
#define pos_push(pos_param, run_param, position) do { \
pos_param->pos = s; \
pos_param->owner = (int) (run_param - RE_run_stack_base); \
\
TRACE2(("[%s@%d] pos_push #%ld: \"%s\" owner %d\n", \
__FILE__, __LINE__, \
(pos_param - RE_pos_stack_base), \
NonNull(position), \
pos_param->owner)); \
\
if (++pos_param == RE_pos_stack_limit) { \
pos_param = RE_new_pos_stack(); \
} \
if_TRACE(pos_param->pos = NULL); \
if_TRACE(pos_param->owner = 0); \
pos_param->prev_offset = 1; \
} while (0)
#define pos_pop(pos_param, run_param, popped_position) do { \
RT_POS_ENTRY *prev2 = pos_param - pos_param->prev_offset; \
\
if (prev2->owner == run_param - RE_run_stack_base) { /* likely */ \
/* no need to preserve intervening nodes */ \
TRACE2(("[%s@%d] pos_pop #%ld -> #%ld \"%s\" owner %d\n", \
__FILE__, __LINE__, \
(pos_param - RE_pos_stack_base), \
(prev2 - RE_pos_stack_base), \
NonNull(prev2->pos), \
prev2->owner)); \
pos_param = prev2; \
} else if (pos_param == prev2) { \
RE_panic("unbalanced M_SAVE_POS and M_2JC"); \
} else { \
TRACE2(("[%s@%d] pos_pop #%ld: \"%s\" offset %d -> %d\n", \
__FILE__, __LINE__, \
(pos_param - RE_pos_stack_base), \
NonNull(pos_param->pos), \
pos_param->prev_offset, \
pos_param->prev_offset + prev2->prev_offset)); \
pos_param->prev_offset += prev2->prev_offset; \
} \
popped_position = prev2->pos; \
} while (0)
#if defined(LOCAL_REGEXP) && defined(REGEXP_INTERNALS)
#ifdef NO_LEAKS
extern void RE_copy_states(STATE *, const STATE *, size_t);

19
rexp0.c
View File

@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp0.c,v 1.50 2024/12/14 21:21:20 tom Exp $
* $MawkId: rexp0.c,v 1.52 2024/12/30 19:17:41 tom Exp $
*/
/* lexical scanner */
@ -276,6 +276,9 @@ RE_lex(MACHINE * mp)
case T_LP:
switch (prev) {
#ifndef NO_INTERVAL_EXPR
case T_RB:
#endif
case T_CHAR:
case T_STR:
case T_ANY:
@ -288,13 +291,6 @@ RE_lex(MACHINE * mp)
case T_U:
return prev = T_CAT;
#ifndef NO_INTERVAL_EXPR
case T_RB:
if (!repetitions_flag) {
return prev = T_CAT;
}
#endif
/* FALLTHRU */
default:
nest++;
@ -626,6 +622,9 @@ lookup_cclass(char **start)
int first = -2;
int last = -2;
if (data == NULL)
RE_error_trap(-ERR_3);
for (ch = 0; ch < 256; ++ch) {
switch (code) {
case CCLASS_NONE:
@ -678,6 +677,8 @@ lookup_cclass(char **start)
if (used + 2 >= have) {
have *= 2;
data = realloc(data, sizeof(CCLASS) * have);
if (data == NULL)
RE_error_trap(-ERR_3);
}
data[used].first = first;
data[used].last = last;
@ -689,6 +690,8 @@ lookup_cclass(char **start)
if (used + 2 >= have) {
have *= 2;
data = realloc(data, sizeof(CCLASS) * have);
if (data == NULL)
RE_error_trap(-ERR_3);
}
data[used].first = first;
data[used].last = last;

66
rexp1.c
View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp1.c,v 1.28 2024/12/14 12:57:40 tom Exp $
* $MawkId: rexp1.c,v 1.31 2024/12/30 15:46:23 tom Exp $
*/
/* re machine operations */
@ -113,9 +113,6 @@ RE_cat(MACHINE * mp, MACHINE * np)
#endif
mp->stop = mp->start + (sz - 1);
RE_copy_states(mp->start + sz1, np->start, sz2);
#ifndef NO_INTERVAL_EXPR
mp->start[sz].s_type = M_ACCEPT; /* this is needed in RE_init_it_cnt */
#endif
RE_free(np->start);
}
@ -143,6 +140,12 @@ RE_or(MACHINE * mp, MACHINE * np)
p->s_data.jump = (int) szn;
}
#ifndef NO_INTERVAL_EXPR
#define is_LOOP_TYPE(type) ((type) == M_2JC || (type) == M_LOOP)
#else
#define is_LOOP_TYPE(type) ((type) == M_2JC)
#endif
/*
* Ignore attempts to wrap an atom using zero-or-more repetitions in another
* loop with the same condition.
@ -162,59 +165,15 @@ RE_or(MACHINE * mp, MACHINE * np)
((ps + 2)->s_type % U_ON) != M_STR && \
((ps + 2)->s_type % U_ON) != M_U) { \
TRACE((".. expected atom %s\n", REs_type(ps + 2))); \
} else if (((ps + 3)->s_type % U_ON) != M_2JC) { \
TRACE((".. expected loop %s\n", REs_type(ps + 3))); \
} else { \
} else if (is_LOOP_TYPE((ps + 3)->s_type)) { \
TRACE(("ignore repeated loop\n")); \
} else { \
TRACE((".. expected loop %s\n", REs_type(ps + 3))); \
return NULL; \
} \
} \
}
#ifndef NO_INTERVAL_EXPR
/* replace m with m* limited to the max iterations
(variation of m* closure) */
void
RE_close_limit(MACHINE * mp, Int min_limit, Int max_limit)
{
#ifdef NO_RI_LOOP_UNROLL
STATE *s;
TRACE(("RE_close_limit " INT_FMT ".." INT_FMT "\n", min_limit, max_limit));
if ((s = RE_close(mp)) != 0) {
if (s->s_type == M_2JC) {
s->it_min = min_limit;
s->it_max = max_limit;
}
}
#else
RE_close(mp);
RE_set_limit(mp->start, min_limit, max_limit);
#endif
}
/* replace m with m+ limited to the max iterations
which is one or more, limited
(variation of m+ positive closure) */
void
RE_poscl_limit(MACHINE * mp, Int min_limit, Int max_limit)
{
#ifdef NO_RI_LOOP_UNROLL
STATE *s;
TRACE(("RE_poscl_limit " INT_FMT ".." INT_FMT "\n", min_limit, max_limit));
if ((s = RE_poscl(mp)) != NULL) {
if (s->s_type == M_2JC) {
s->it_min = min_limit;
s->it_max = max_limit;
}
}
#else
RE_poscl(mp);
RE_set_limit(mp->start, min_limit, max_limit);
#endif
}
#endif /* ! NO_INTERVAL_EXPR */
/* UNARY OPERATIONS */
/* replace m by m* (zero or more) */
@ -226,7 +185,7 @@ RE_close(MACHINE * mp)
size_t sz;
/*
* 2JA end
* 2JA end
* loop:
* SAVE_POS
* m
@ -329,6 +288,9 @@ RE_malloc(size_t sz)
TRACE(("RE_malloc(%lu) ->%p\n", (unsigned long) sz, p));
if (p == NULL)
RE_error_trap(MEMORY_FAILURE);
#ifdef OPT_TRACE
memset(p, 0, sz);
#endif
return p;
}

211
rexp2.c
View File

@ -12,7 +12,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp2.c,v 1.49 2024/12/14 12:57:40 tom Exp $
* $MawkId: rexp2.c,v 1.50 2024/12/31 15:21:17 tom Exp $
*/
/* test a string against a machine */
@ -91,7 +91,6 @@ RE_new_run_stack(void)
RE_run_stack_limit = RE_run_stack_base + newsize;
RE_run_stack_empty = RE_run_stack_base - 1;
/* return the new stackp */
return RE_run_stack_base + oldsize;
}
@ -111,49 +110,35 @@ RE_new_pos_stack(void)
fprintf(stderr, "out of memory for RE string position stack\n");
mawk_exit(100);
}
#if OPT_TRACE
memset(RE_pos_stack_base + oldsize, 0,
(newsize - oldsize) * sizeof(RT_POS_ENTRY));
#endif
RE_pos_stack_limit = RE_pos_stack_base + newsize;
RE_pos_stack_empty = RE_pos_stack_base;
/* return the new stackp */
return RE_pos_stack_base + oldsize;
}
#ifdef DEBUG
static RT_STATE *
slow_push(
RT_STATE * sp,
STATE * m,
char *s,
RT_POS_ENTRY * pos_top,
int u)
{
if (sp == RE_run_stack_limit)
sp = RE_new_run_stack();
sp->m = m;
sp->s = s;
sp->u = u;
sp->sp = pos_top - RE_pos_stack_base;
sp->tp = pos_top->prev_offset;
return sp;
}
#endif
#define rt_push(mx,sx,px,ux) do { \
if (++run_entry == RE_run_stack_limit) \
run_entry = RE_new_run_stack(); \
run_entry->m = (mx); \
run_entry->s = (sx); \
run_entry->pos_index = (int) ((px) - RE_pos_stack_base); \
run_entry->top_index = (px)->prev_offset; \
run_entry->u = (ux); \
TRACE2((rt_form "rt_push %s pos@%d top@%d\n", rt_args, \
REs_type(mx), \
run_entry->pos_index, \
run_entry->top_index)); \
} while(0)
#ifdef DEBUG
#define push(mx,sx,px,ux) do { \
stackp = slow_push(++stackp, mx, sx, px, ux); \
} while(0)
#else
#define push(mx,sx,px,ux) do { \
if (++stackp == RE_run_stack_limit) \
stackp = RE_new_run_stack(); \
stackp->m = (mx); \
stackp->s = (sx); \
stackp->u = (ux); \
stackp->sp = (int) ((px) - RE_pos_stack_base); \
stackp->tp = (px)->prev_offset; \
} while(0)
#endif
#define rt_pop() do { \
TRACE2((rt_form "rt_pop\n", rt_args)); \
run_entry--; \
} while (0)
#define CASE_UANY(x) case (x)+U_OFF: /* FALLTHRU */ case (x)+U_ON
@ -190,10 +175,11 @@ REtest(char *str, /* string to test */
{
register STATE *m = machine;
char *s = str;
register RT_STATE *stackp;
const char *old_s;
register RT_STATE *run_entry;
int u_flag;
char *str_end = str + len;
RT_POS_ENTRY *sp;
RT_POS_ENTRY *pos_entry;
int ti; /*convenient temps */
STATE *tm;
@ -201,42 +187,101 @@ REtest(char *str, /* string to test */
/* handle the easy case quickly */
if (m->s_type == M_STR && (m + 1)->s_type == M_ACCEPT) {
return str_str(s, len, m->s_data.str, (size_t) m->s_len) != (char *) 0;
TRACE(("returning str_str\n"));
return str_str(s, len, m->s_data.str, m->s_len) != (char *) 0;
} else {
u_flag = U_ON;
stackp = RE_run_stack_empty;
sp = RE_pos_stack_empty;
RE_init_it_cnt(m);
run_entry = RE_run_stack_empty;
pos_entry = RE_pos_stack_empty;
if_TRACE(memset(pos_entry, 0, 2 * sizeof(*pos_entry)));
RE_CASE();
}
refill:
if (stackp == RE_run_stack_empty) {
#ifndef NO_INTERVAL_EXPR
if (run_entry != RE_run_stack_empty) {
STATE *m2;
int found;
#if OPT_TRACE > 1
RT_STATE *statep;
RT_POS_ENTRY *posp;
for (statep = RE_run_stack_base; statep <= run_entry; ++statep) {
TRACE(("check - STATE %d: m %03d s \"%s\" pos@%d top@%d u %d\n",
(int) (statep - RE_run_stack_base),
(int) (statep->m - machine),
NonNull(statep->s),
statep->pos_index,
statep->top_index,
statep->u));
}
for (posp = RE_pos_stack_base; posp <= pos_entry; ++posp) {
TRACE(("check - POS %d: pos \"%s\" owner@%d prev@%d\n",
(int) (posp - RE_pos_stack_base),
NonNull(posp->pos),
posp->owner,
posp->prev_offset));
}
#endif
/*
* We're here because we had a mismatch in a loop. Find the end of the
* loop, and reset it if the mismatch was due to too-few matches.
* FIXME - provide this info in compile-stage
*/
found = 0;
for (m2 = run_entry->m; m2->s_type < M_ACCEPT; ++m2) {
TRACE(("CHECK %03d %s\n", (int) (m2 - machine), REs_type(m2)));
switch (m2->s_type) {
case M_SAVE_POS:
case M_2JA:
case M_2JB:
case M_2JC:
found = 1;
break;
case M_LOOP:
found = 1;
TRACE2(("Found M_LOOP: %03d\n", (int) (m2 - machine)));
TRACE2(("currently " INT_FMT " [" INT_FMT ".." INT_FMT "]\n",
m2->it_cnt, m2->it_min, m2->it_max));
if (m2->it_cnt < m2->it_min) {
TRACE2(("too few - invoke M_ENTER\n"));
run_entry->m = m2 + m2->s_data.jump - 1;
}
break;
}
if (found)
break;
}
}
#endif
if (run_entry == RE_run_stack_empty) {
TR_AT("accept failure");
return 0;
}
m = stackp->m;
s = stackp->s;
sp = RE_pos_stack_base + stackp->sp;
sp->prev_offset = stackp->tp;
u_flag = (stackp--)->u;
m = run_entry->m;
s = run_entry->s;
pos_entry = RE_pos_stack_base + run_entry->pos_index;
pos_entry->prev_offset = run_entry->top_index;
u_flag = run_entry->u;
rt_pop();
reswitch:
TRACE2(("[%s@%d] %d:%03d %-8s %-15s: %s\n", __FILE__, __LINE__,
(int) (stackp - RE_run_stack_base),
(int) (m - machine),
REs_type(m),
RE_u_end(u_flag),
s));
TRACE((rt_form "%-8s %-15s: \"%s\"\n", rt_args,
REs_type(m),
RE_u_end(u_flag),
s));
switch (m->s_type + u_flag) {
case M_STR + U_OFF + END_OFF:
if (s > str_end
|| (size_t) (str_end - s) < m->s_len
|| memcmp(s, m->s_data.str, m->s_len)) {
TR_AT("no match");
RE_FILL();
}
s += m->s_len;
m++;
TR_AT("match");
RE_CASE();
case M_STR + U_OFF + END_ON:
@ -249,10 +294,11 @@ REtest(char *str, /* string to test */
RE_CASE();
case M_STR + U_ON + END_OFF:
if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) {
s = str_str(s, (size_t) (str_end - s), m->s_data.str, m->s_len);
if (s == NULL) {
RE_FILL();
}
push(m, s + 1, sp, U_ON);
rt_push(m, s + 1, pos_entry, U_ON);
s += m->s_len;
m++;
u_flag = U_OFF;
@ -297,7 +343,7 @@ REtest(char *str, /* string to test */
s++;
}
s++;
push(m, s, sp, U_ON);
rt_push(m, s, pos_entry, U_ON);
m++;
u_flag = U_OFF;
RE_CASE();
@ -336,7 +382,7 @@ REtest(char *str, /* string to test */
RE_FILL();
}
s++;
push(m, s, sp, U_ON);
rt_push(m, s, pos_entry, U_ON);
m++;
u_flag = U_OFF;
RE_CASE();
@ -392,63 +438,88 @@ REtest(char *str, /* string to test */
RE_CASE();
CASE_UANY(M_SAVE_POS): /* save position for a later M_2JC */
sp = RE_pos_push(sp, stackp, s);
pos_push(pos_entry, run_entry, s);
m++;
RE_CASE();
CASE_UANY(M_2JA): /* take the non jump branch */
/* don't stack an ACCEPT */
if ((tm = m + m->s_data.jump)->s_type == M_ACCEPT) {
TR_AT("accept success");
return 1;
}
push(tm, s, sp, u_flag);
rt_push(tm, s, pos_entry, u_flag);
m++;
RE_CASE();
CASE_UANY(M_2JC): /* take the jump branch if position changed */
#ifndef NO_INTERVAL_EXPR
if (m->it_max < MAX__INT && ++(m->it_cnt) >= m->it_max) {
RE_pos_pop(&sp, stackp);
CASE_UANY(M_ENTER): /* take the jump branch if position changed */
TRACE(("reset loop " INT_FMT " [" INT_FMT ".." INT_FMT "]\n",
m->it_cnt, m->it_min, m->it_max));
(m + m->s_data.jump)->it_cnt = 0;
m++;
RE_CASE();
CASE_UANY(M_LOOP): /* take the jump branch if position changed */
m->it_cnt++;
TRACE(("checking loop " INT_FMT " [" INT_FMT ".." INT_FMT "]\n",
m->it_cnt, m->it_min, m->it_max));
if (m->it_max < MAX__INT && m->it_cnt >= m->it_max) {
m++;
TR_AT("past maximum for M_LOOP");
RE_CASE(); /* test the next thing */
} else
} else if (m->it_cnt < m->it_min) {
m += m->s_data.jump;
TR_AT("under minimum for M_LOOP");
RE_CASE();
}
goto fall_through; /* workaround for gcc bug */
fall_through:
/* FALLTHRU */
#endif /* ! NO_INTERVAL_EXPR */
if (RE_pos_pop(&sp, stackp) == s) {
CASE_UANY(M_2JC): /* take the jump branch if position changed */
pos_pop(pos_entry, run_entry, old_s);
if (old_s == s) {
/* did not advance: do not jump back */
m++;
RE_CASE();
}
/* don't stack an ACCEPT */
if ((tm = m + 1)->s_type == M_ACCEPT) {
TR_AT("accept success");
return 1;
}
push(tm, s, sp, u_flag);
rt_push(tm, s, pos_entry, u_flag);
m += m->s_data.jump;
RE_CASE();
CASE_UANY(M_2JB):
/* don't stack an ACCEPT */
if ((tm = m + 1)->s_type == M_ACCEPT) {
TR_AT("accept success");
return 1;
}
push(tm, s, sp, u_flag);
rt_push(tm, s, pos_entry, u_flag);
m += m->s_data.jump;
RE_CASE();
CASE_UANY(M_ACCEPT):
TR_AT("accept success");
return 1;
default:
RE_bad_state("REtest", m, u_flag);
}
return 0;
}
#undef push
#undef rt_push
#include <field.h>
char *
is_string_split(PTR q, size_t * lenp)
is_string_split(PTR q, size_t *lenp)
{
STATE *p = cast_to_re(q);

232
rexp3.c
View File

@ -12,54 +12,36 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp3.c,v 1.69 2024/12/11 21:45:11 tom Exp $
* $MawkId: rexp3.c,v 1.70 2024/12/31 10:20:48 tom Exp $
*/
/* match a string against a machine */
#include <rexp.h>
#define push(mx,sx,px,ssx,ux) do { \
if (++stackp == RE_run_stack_limit) \
stackp = RE_new_run_stack() ;\
TRACE2(("[%s@%d] pushing %d:%03d\n", __FILE__, __LINE__, \
(int)(stackp - RE_run_stack_base), \
(int)(m - machine))); \
stackp->m = (mx); \
stackp->s = (sx); \
stackp->sp = (int) ((px) - RE_pos_stack_base); \
stackp->tp = (px)->prev_offset; \
stackp->ss = (ssx); \
stackp->u = (ux); \
#define rt_push(mx,sx,px,ssx,ux) do { \
if (++run_entry == RE_run_stack_limit) \
run_entry = RE_new_run_stack() ;\
run_entry->m = (mx); \
run_entry->s = (sx); \
run_entry->pos_index = (int) ((px) - RE_pos_stack_base); \
run_entry->top_index = (px)->prev_offset; \
run_entry->ss = (ssx); \
run_entry->u = (ux); \
TRACE2((rt_form "rt_push %s\n", rt_args, REs_type(mx))); \
} while(0)
#ifdef NO_RI_LOOP_UNROLL
#define restart_count(old,new) \
if (old != new) { \
TRACE2(("RESET %p ->%p\n", old, new)); \
m->it_cnt = 1; \
}
#else
#define restart_count(old,new) /* nothing */
#endif
#define CASE_UANY(x) case (x)+U_OFF: /* FALLTHRU */ case (x)+U_ON
#define TR_AT(what) \
TRACE2(("[%s@%d] %d.%03d %s\n", __FILE__, __LINE__, \
(int) (stackp - RE_run_stack_base), \
(int) (m - machine), \
what))
#define TR_BEST() \
TRACE2(("[%s@%d] new best [%d..%d]'%.*s'\n", __FILE__, __LINE__, \
TRACE2((rt_form "new best [%d..%d] \"%.*s\"\n", rt_args, \
(int) (cb_ss - str), \
(int) (cb_e - str), \
(int) (cb_e - cb_ss), \
cb_ss))
#define TR_STR(s) \
TRACE(("[%s@%d] str:%i len:%lu\n", __FILE__, __LINE__, \
TRACE((rt_form "str:%i len:%lu\n", rt_args, \
((s) ? (int) ((s) - str) : -99), \
(unsigned long) *lenp))
@ -68,7 +50,7 @@ the GNU General Public License, version 2, 1991.
*lenp = (size_t) (cb_e - cb_ss); \
} \
TR_STR(s); \
TRACE2(("[%s@%d] returning %d\n", __FILE__, __LINE__, \
TRACE2((rt_form "returning %d\n", rt_args, \
cb_ss ? (int)(cb_ss - str) : -1)); \
return cb_ss
@ -85,10 +67,11 @@ REmatch(char *str, /* string to test */
register STATE *m = machine;
char *s;
char *ss;
register RT_STATE *stackp;
const char *old_s;
RT_STATE *run_entry = NULL;
int u_flag;
char *str_end;
RT_POS_ENTRY *sp;
RT_POS_ENTRY *pos_entry;
char *ts;
/* state of current best match stored here */
@ -102,7 +85,7 @@ REmatch(char *str, /* string to test */
/* check for the easy case */
if (m->s_type == M_STR && (m + 1)->s_type == M_ACCEPT) {
if ((ts = str_str(str, str_len, m->s_data.str, (size_t) m->s_len))) {
if ((ts = str_str(str, str_len, m->s_data.str, m->s_len))) {
*lenp = m->s_len;
}
TR_STR(ts);
@ -113,18 +96,49 @@ REmatch(char *str, /* string to test */
s = str;
u_flag = U_ON;
cb_e = cb_ss = ss = (char *) 0;
stackp = RE_run_stack_empty;
sp = RE_pos_stack_empty;
RE_init_it_cnt(m);
run_entry = RE_run_stack_empty;
pos_entry = RE_pos_stack_empty;
RE_CASE();
refill:
TR_AT(("refill..."));
if (stackp == RE_run_stack_empty) {
TRACE((rt_form "refill... pos@%d\n", rt_args,
(int) (pos_entry - RE_pos_stack_base)));
#ifndef NO_INTERVAL_EXPR
if (0) {
#if OPT_TRACE > 1
RT_STATE *statep;
RT_POS_ENTRY *posp;
for (statep = RE_run_stack_base; statep <= run_entry; ++statep) {
TRACE(("%s - STATE %ld: m %03ld s \"%s\" pos@%d top@%d u %d\n",
statep == run_entry ? "CHECK" : "check",
(statep - RE_run_stack_base),
(statep->m - machine),
NonNull(statep->s),
statep->pos_index,
statep->top_index,
statep->u));
}
for (posp = RE_pos_stack_base; posp <= pos_entry; ++posp) {
TRACE(("%s - POS %ld: pos \"%s\" owner@%d prev@%d\n",
posp == pos_entry ? "CHECK" : "check",
(posp - RE_pos_stack_base),
NonNull(posp->pos),
posp->owner,
posp->prev_offset));
}
#endif
}
#endif
if (run_entry == RE_run_stack_empty) {
RE_TURN();
}
ss = stackp->ss;
s = (stackp--)->s;
ss = run_entry->ss;
s = run_entry->s;
rt_pop();
TRACE((rt_form "run-sp s=\"%s\", ss=\"%s\"\n", rt_args,
NonNull(s),
NonNull(ss)));
if (cb_ss) { /* does new state start too late ? */
if (ss) {
if (current_best(ss)) {
@ -135,32 +149,33 @@ REmatch(char *str, /* string to test */
}
}
m = (stackp + 1)->m;
TR_AT("now");
sp = RE_pos_stack_base + (stackp + 1)->sp;
sp->prev_offset = (stackp + 1)->tp;
u_flag = (stackp + 1)->u;
TRACE((rt_form "run-sp type %s -> %s\n", rt_args,
REs_type(m),
REs_type((run_entry + 1)->m)));
m = (run_entry + 1)->m;
pos_entry = RE_pos_stack_base + (run_entry + 1)->pos_index;
pos_entry->prev_offset = (run_entry + 1)->top_index;
u_flag = (run_entry + 1)->u;
reswitch:
TRACE(("[%s@%d] %d:%03d %-8s %-15s: %s\n", __FILE__, __LINE__,
(int) (stackp - RE_run_stack_base),
(int) (m - machine),
TRACE((rt_form "%-8s %-15s: \"%s\"\n", rt_args,
REs_type(m),
RE_u_end(u_flag),
cb_ss ? cb_ss : s));
switch (m->s_type + u_flag) {
case M_STR + U_OFF + END_OFF:
TR_AT("now");
if (s >= str_end || (str_end - s) < (ptrdiff_t) m->s_len) {
TR_AT("now");
TR_AT("now too far to match");
RE_FILL();
} else if (memcmp(s, m->s_data.str, m->s_len) != 0) {
TR_AT("now");
TR_AT("now mismatched");
RE_FILL();
}
TR_AT("now matched");
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("now");
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -175,11 +190,12 @@ REmatch(char *str, /* string to test */
TR_AT("now");
if ((str_end - s) != (ptrdiff_t) m->s_len) {
RE_FILL();
} else if (memcmp(s, m->s_data.str, (size_t) m->s_len) != 0) {
} else if (memcmp(s, m->s_data.str, m->s_len) != 0) {
RE_FILL();
}
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -197,12 +213,14 @@ REmatch(char *str, /* string to test */
} else if (s < str) {
s = str;
}
if (!(s = str_str(s, (size_t) (str_end - s), m->s_data.str, (size_t) m->s_len))) {
s = str_str(s, (size_t) (str_end - s), m->s_data.str, m->s_len);
if (s == NULL) {
RE_FILL();
}
push(m, s + 1, sp, ss, U_ON);
rt_push(m, s + 1, pos_entry, ss, U_ON);
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -220,8 +238,7 @@ REmatch(char *str, /* string to test */
RE_FILL();
} else if (s < str) {
s = str;
}
{
} {
ptrdiff_t ti = (str_end - s) - (ptrdiff_t) m->s_len;
if (ti < 0 || memcmp(s = s + ti, m->s_data.str, m->s_len) != 0) {
RE_FILL();
@ -261,6 +278,7 @@ REmatch(char *str, /* string to test */
RE_FILL();
} else if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -281,9 +299,10 @@ REmatch(char *str, /* string to test */
break;
s++;
}
push(m, s + 1, sp, ss, U_ON);
rt_push(m, s + 1, pos_entry, ss, U_ON);
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -304,6 +323,7 @@ REmatch(char *str, /* string to test */
} else if (!ss) {
char *xs = str_end - 1;
if (cb_ss && current_best(xs)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = xs;
@ -320,6 +340,7 @@ REmatch(char *str, /* string to test */
RE_FILL();
} else if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -334,6 +355,7 @@ REmatch(char *str, /* string to test */
RE_FILL();
} else if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -349,9 +371,10 @@ REmatch(char *str, /* string to test */
if (s >= str_end) {
RE_FILL();
}
push(m, s + 1, sp, ss, U_ON);
rt_push(m, s + 1, pos_entry, ss, U_ON);
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -371,6 +394,7 @@ REmatch(char *str, /* string to test */
s = str_end - 1;
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -408,6 +432,7 @@ REmatch(char *str, /* string to test */
RE_FILL();
} else if (!ss) {
if (cb_ss) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = str_end;
@ -424,6 +449,7 @@ REmatch(char *str, /* string to test */
}
} else if (!ss) {
if (cb_ss) {
TR_AT("new match is not better");
RE_FILL();
} else
ss = str_end;
@ -438,6 +464,7 @@ REmatch(char *str, /* string to test */
s = str;
if (!ss) {
if (cb_ss && current_best(s)) {
TR_AT("new match is not better");
RE_FILL();
} else {
ss = s;
@ -453,66 +480,56 @@ REmatch(char *str, /* string to test */
CASE_UANY(M_SAVE_POS): /* save position for a later M_2JC */
/* see also REtest */
sp = RE_pos_push(sp, stackp, s);
pos_push(pos_entry, run_entry, s);
m++;
RE_CASE();
CASE_UANY(M_2JA): /* take the non jump branch */
push(m + m->s_data.jump, s, sp, ss, u_flag);
rt_push(m + m->s_data.jump, s, pos_entry, ss, u_flag);
m++;
RE_CASE();
CASE_UANY(M_2JB): /* take the non jump branch */
push(m + m->s_data.jump, s, sp, ss, u_flag);
rt_push(m + m->s_data.jump, s, pos_entry, ss, u_flag);
m++;
RE_CASE();
CASE_UANY(M_2JC): /* take the jump branch if position changed */
/* see REtest */
#ifndef NO_INTERVAL_EXPR
#ifdef NO_RI_LOOP_UNROLL
m->it_cnt++;
TRACE(("checking loop " INT_FMT " [" INT_FMT ".." INT_FMT "]\n",
m->it_cnt, m->it_min, m->it_max));
TR_STR(s);
if (m->it_cnt < m->it_min) {
/* keep looping until minimum is met */
RE_pos_pop(&sp, stackp);
push(m + 1, s, sp, ss, u_flag);
m += m->s_data.jump;
TR_AT("now");
} else if ((m->it_cnt >= m->it_min)
&& (m->it_max == MAX__INT
|| (m->it_max < MAX__INT && m->it_cnt >= m->it_max))) {
/* quit looping once maximum is met */
RE_pos_pop(&sp, stackp);
m++;
TR_AT("now");
} else
#else /* !NO_RI_LOOP_UNROLL */
if (m->it_max < MAX__INT && ++(m->it_cnt) >= m->it_max) {
++m;
RE_CASE(); /* test the next thing */
} else
#endif /* NO_RI_LOOP_UNROLL */
if (RE_pos_pop(&sp, stackp) == s) {
/* fall out of loop, to next instruction */
m++;
TR_AT("now");
} else {
/* continue looping as long as matching */
push(m + 1, s, sp, ss, u_flag);
m += m->s_data.jump;
TR_AT("now");
}
CASE_UANY(M_ENTER): /* take the jump branch if position changed */
(m + m->s_data.jump)->it_cnt = 0;
m++;
RE_CASE();
#else
if (RE_pos_pop(&sp, stackp) == s) {
m++;
CASE_UANY(M_LOOP): /* take the jump branch if position changed */
m->it_cnt++;
TRACE(("checking #%d: loop " INT_FMT " [" INT_FMT ".." INT_FMT "]\n",
(int) (pos_entry - RE_pos_stack_base),
m->it_cnt, m->it_min, m->it_max));
if (m->it_max < MAX__INT && m->it_cnt >= m->it_max) {
++m;
TR_AT("now test the next thing");
RE_CASE(); /* test the next thing */
} else if (m->it_cnt < m->it_min) {
TR_AT("now continue getting minimum");
m += m->s_data.jump;
RE_CASE();
}
goto fall_through; /* workaround for gcc bug */
fall_through:
/* FALLTHRU */
#endif /* ! NO_INTERVAL_EXPR */
#endif
CASE_UANY(M_2JC): /* take the jump branch if position changed */
pos_pop(pos_entry, run_entry, old_s);
if (old_s == s) {
m++;
TR_AT("now fall out of loop");
} else {
rt_push(m + 1, s, pos_entry, ss, u_flag);
m += m->s_data.jump;
TR_AT("now continue loop to match");
}
RE_CASE();
case M_ACCEPT + U_OFF:
if (s >= str_end) {
@ -522,7 +539,6 @@ REmatch(char *str, /* string to test */
ss = s;
if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) {
/* we have a new current best */
restart_count(cb_ss, ss);
cb_ss = ss;
cb_e = s;
TR_BEST();
@ -544,7 +560,6 @@ REmatch(char *str, /* string to test */
}
if (!cb_ss || ss < cb_ss || (ss == cb_ss && s > cb_e)) {
/* we have a new current best */
restart_count(cb_ss, ss);
cb_ss = ss;
cb_e = s;
TR_BEST();
@ -554,5 +569,6 @@ REmatch(char *str, /* string to test */
default:
RE_bad_state("REmatch", m, u_flag);
}
return NULL;
}
#undef push
#undef rt_push

View File

@ -11,7 +11,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexpdb.c,v 1.30 2024/08/25 17:16:24 tom Exp $
* $MawkId: rexpdb.c,v 1.31 2024/12/30 15:35:57 tom Exp $
*/
#include <rexp.h>
@ -33,6 +33,10 @@ static const char xlat[][12] =
"M_2JB",
"M_SAVE_POS",
"M_2JC",
#ifndef NO_INTERVAL_EXPR
"M_ENTER",
"M_LOOP",
#endif
"M_ACCEPT"
};
@ -78,20 +82,24 @@ REmprint(STATE * m, FILE *f)
break;
case M_2JC:
fprintf(f, "\t%03d", line + p->s_data.jump);
#ifndef NO_INTERVAL_EXPR
if (p->it_min != 1 || p->it_max != MAX__INT) {
fprintf(f, " %c", L_CURL);
if (p->it_min != 0)
fprintf(f, INT_FMT, p->it_min);
if (p->it_max != p->it_min) {
fprintf(f, ",");
if (p->it_max != MAX__INT)
fprintf(f, INT_FMT, p->it_max);
}
fprintf(f, "%c", R_CURL);
}
#endif
break;
#ifndef NO_INTERVAL_EXPR
case M_ENTER:
fprintf(f, "\t%03d", line + p->s_data.jump);
break;
case M_LOOP:
fprintf(f, "\t%03d", line + p->s_data.jump);
fprintf(f, " %c", L_CURL);
if (p->it_min != 0)
fprintf(f, INT_FMT, p->it_min);
if (p->it_max != p->it_min) {
fprintf(f, ",");
if (p->it_max != MAX__INT)
fprintf(f, INT_FMT, p->it_max);
}
fprintf(f, "%c", R_CURL);
break;
#endif
case M_CLASS:
{
UChar *q = (UChar *) p->s_data.bvp;

View File

@ -10,7 +10,7 @@ the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: trace.c,v 1.26 2024/12/14 21:21:34 tom Exp $
* $MawkId: trace.c,v 1.27 2024/12/25 01:43:52 tom Exp $
*/
#define Visible_CELL
@ -110,7 +110,7 @@ void
TraceInst(INST * p, INST * base)
{
INST *q = da_this(p, base, trace_fp);
TRACE((" ...%ld\n", (long) (q - p)));
TRACE(("\t...%ld\n", (long) (q - p)));
if (p++ != q) {
switch ((MAWK_OPCODES) (base->op)) {
case AE_PUSHA: