mirror of
https://github.com/ThomasDickey/mawk-snapshots.git
synced 2026-01-26 19:09:15 +00:00
616 lines
14 KiB
C
616 lines
14 KiB
C
/* array.c */
|
|
/*
|
|
$MawkId: array.c,v 1.26 2021/05/28 23:59:47 tom Exp $
|
|
|
|
copyright 2009-2020,2021 Thomas E. Dickey
|
|
copyright 1991-1996,2014 Michael D. Brennan
|
|
|
|
This is a source file for mawk, an implementation of
|
|
the AWK programming language.
|
|
|
|
Mawk is distributed without warranty under the terms of
|
|
the GNU General Public License, version 2, 1991.
|
|
|
|
array.c and array.h were originally generated with the commands
|
|
|
|
notangle -R'"array.c"' array.w > array.c
|
|
notangle -R'"array.h"' array.w > array.h
|
|
|
|
Notangle is part of Norman Ramsey's noweb literate programming package
|
|
available from CTAN(ftp.shsu.edu).
|
|
*/
|
|
|
|
#include "mawk.h"
|
|
#include "symtype.h"
|
|
#include "memory.h"
|
|
#include "split.h"
|
|
#include "field.h"
|
|
#include "bi_vars.h"
|
|
|
|
struct anode;
|
|
typedef struct {
|
|
struct anode *slink, *ilink;
|
|
} DUAL_LINK;
|
|
|
|
typedef struct anode {
|
|
struct anode *slink;
|
|
struct anode *ilink;
|
|
STRING *sval;
|
|
unsigned hval;
|
|
Int ival;
|
|
CELL cell;
|
|
} ANODE;
|
|
|
|
static ANODE *find_by_ival(ARRAY, Int, int, int *);
|
|
|
|
static ANODE *find_by_sval(ARRAY, STRING *, int, int *);
|
|
|
|
#define NOT_AN_IVALUE (-Max_Int-1) /* usually 0x80000000 */
|
|
static void add_string_associations(ARRAY);
|
|
|
|
#define STARTING_HMASK 63 /* 2^6-1, must have form 2^n-1 */
|
|
#define MAX_AVE_LIST_LENGTH 12
|
|
#define hmask_to_limit(x) (((x)+1)*MAX_AVE_LIST_LENGTH)
|
|
#define ahash(sval) hash2((sval)->str, (sval)->len)
|
|
static void make_empty_table(ARRAY, int);
|
|
|
|
static void convert_split_array_to_table(ARRAY);
|
|
|
|
static void double_the_hash_table(ARRAY);
|
|
|
|
CELL *
|
|
array_find(
|
|
ARRAY A,
|
|
CELL *cp,
|
|
int create_flag)
|
|
{
|
|
ANODE *ap;
|
|
int redid;
|
|
if (A->size == 0 && !create_flag)
|
|
/* eliminating this trivial case early avoids unnecessary conversions later */
|
|
return (CELL *) 0;
|
|
switch (cp->type) {
|
|
case C_DOUBLE:
|
|
{
|
|
double d = cp->dval;
|
|
Int ival = d_to_I(d);
|
|
if ((double) ival == d) {
|
|
if (A->type == AY_SPLIT) {
|
|
if (ival >= 1 && ival <= (int) A->size)
|
|
return (CELL *) A->ptr + (ival - 1);
|
|
if (!create_flag)
|
|
return (CELL *) 0;
|
|
convert_split_array_to_table(A);
|
|
} else if (A->type == AY_NULL)
|
|
make_empty_table(A, AY_INT);
|
|
ap = find_by_ival(A, ival, create_flag, &redid);
|
|
} else {
|
|
/* convert to string */
|
|
char buff[260];
|
|
STRING *sval;
|
|
sprintf(buff, string(CONVFMT)->str, d);
|
|
sval = new_STRING(buff);
|
|
ap = find_by_sval(A, sval, create_flag, &redid);
|
|
free_STRING(sval);
|
|
}
|
|
}
|
|
|
|
break;
|
|
case C_NOINIT:
|
|
ap = find_by_sval(A, &null_str, create_flag, &redid);
|
|
break;
|
|
default:
|
|
ap = find_by_sval(A, string(cp), create_flag, &redid);
|
|
break;
|
|
}
|
|
return ap ? &ap->cell : (CELL *) 0;
|
|
}
|
|
|
|
void
|
|
array_delete(
|
|
ARRAY A,
|
|
CELL *cp)
|
|
{
|
|
ANODE *ap;
|
|
int redid;
|
|
if (A->size == 0)
|
|
return;
|
|
switch (cp->type) {
|
|
case C_DOUBLE:
|
|
{
|
|
double d = cp->dval;
|
|
Int ival = d_to_I(d);
|
|
if ((double) ival == d) {
|
|
if (A->type == AY_SPLIT) {
|
|
if (ival >= 1 && ival <= (int) A->size)
|
|
convert_split_array_to_table(A);
|
|
else
|
|
return; /* ival not in range */
|
|
}
|
|
ap = find_by_ival(A, ival, NO_CREATE, &redid);
|
|
if (ap) { /* remove from the front of the ilist */
|
|
DUAL_LINK *table = (DUAL_LINK *) A->ptr;
|
|
table[(unsigned) ap->ival & A->hmask].ilink = ap->ilink;
|
|
if (ap->sval) {
|
|
ANODE *p, *q = 0;
|
|
unsigned indx = (unsigned) ap->hval & A->hmask;
|
|
p = table[indx].slink;
|
|
while (p != ap) {
|
|
q = p;
|
|
p = q->slink;
|
|
}
|
|
if (q)
|
|
q->slink = p->slink;
|
|
else
|
|
table[indx].slink = p->slink;
|
|
free_STRING(ap->sval);
|
|
}
|
|
|
|
cell_destroy(&ap->cell);
|
|
ZFREE(ap);
|
|
if (--A->size == 0)
|
|
array_clear(A);
|
|
|
|
}
|
|
return;
|
|
}
|
|
|
|
else { /* get the string value */
|
|
char buff[260];
|
|
STRING *sval;
|
|
sprintf(buff, string(CONVFMT)->str, d);
|
|
sval = new_STRING(buff);
|
|
ap = find_by_sval(A, sval, NO_CREATE, &redid);
|
|
free_STRING(sval);
|
|
}
|
|
}
|
|
break;
|
|
case C_NOINIT:
|
|
ap = find_by_sval(A, &null_str, NO_CREATE, &redid);
|
|
break;
|
|
default:
|
|
ap = find_by_sval(A, string(cp), NO_CREATE, &redid);
|
|
break;
|
|
}
|
|
if (ap) { /* remove from the front of the slist */
|
|
DUAL_LINK *table = (DUAL_LINK *) A->ptr;
|
|
table[ap->hval & A->hmask].slink = ap->slink;
|
|
if (ap->ival != NOT_AN_IVALUE) {
|
|
ANODE *p, *q = 0;
|
|
unsigned indx = (unsigned) ap->ival & A->hmask;
|
|
p = table[indx].ilink;
|
|
while (p != ap) {
|
|
q = p;
|
|
p = q->ilink;
|
|
}
|
|
if (q)
|
|
q->ilink = p->ilink;
|
|
else
|
|
table[indx].ilink = p->ilink;
|
|
}
|
|
|
|
free_STRING(ap->sval);
|
|
cell_destroy(&ap->cell);
|
|
ZFREE(ap);
|
|
if (--A->size == 0)
|
|
array_clear(A);
|
|
|
|
}
|
|
}
|
|
|
|
void
|
|
array_load(
|
|
ARRAY A,
|
|
size_t cnt)
|
|
{
|
|
if (A->type != AY_SPLIT || A->limit < cnt) {
|
|
array_clear(A);
|
|
A->limit = (cnt & (size_t) ~3) + 4;
|
|
A->ptr = zmalloc(A->limit * sizeof(CELL));
|
|
A->type = AY_SPLIT;
|
|
} else {
|
|
/* reusing an existing AY_SPLIT array */
|
|
size_t i;
|
|
for (i = 0; i < A->size; i++) {
|
|
cell_destroy((CELL *) A->ptr + i);
|
|
}
|
|
}
|
|
|
|
A->size = cnt;
|
|
transfer_to_array((CELL *) A->ptr, cnt);
|
|
}
|
|
|
|
void
|
|
array_clear(ARRAY A)
|
|
{
|
|
unsigned i;
|
|
ANODE *p, *q;
|
|
if (A->type == AY_SPLIT) {
|
|
for (i = 0; i < A->size; i++)
|
|
cell_destroy((CELL *) A->ptr + i);
|
|
zfree(A->ptr, A->limit * sizeof(CELL));
|
|
} else if (A->type & AY_STR) {
|
|
DUAL_LINK *table = (DUAL_LINK *) A->ptr;
|
|
for (i = 0; (unsigned) i <= A->hmask; i++) {
|
|
p = table[i].slink;
|
|
while (p) {
|
|
q = p;
|
|
p = q->slink;
|
|
free_STRING(q->sval);
|
|
cell_destroy(&q->cell);
|
|
ZFREE(q);
|
|
}
|
|
}
|
|
zfree(A->ptr, (A->hmask + 1) * sizeof(DUAL_LINK));
|
|
} else if (A->type & AY_INT) {
|
|
DUAL_LINK *table = (DUAL_LINK *) A->ptr;
|
|
for (i = 0; (unsigned) i <= A->hmask; i++) {
|
|
p = table[i].ilink;
|
|
while (p) {
|
|
q = p;
|
|
p = q->ilink;
|
|
cell_destroy(&q->cell);
|
|
ZFREE(q);
|
|
}
|
|
}
|
|
zfree(A->ptr, (A->hmask + 1) * sizeof(DUAL_LINK));
|
|
}
|
|
memset(A, 0, sizeof(*A));
|
|
}
|
|
|
|
static int
|
|
string_compare(
|
|
const void *l,
|
|
const void *r)
|
|
{
|
|
STRING *const *a = (STRING * const *) l;
|
|
STRING *const *b = (STRING * const *) r;
|
|
|
|
return strcmp((*a)->str, (*b)->str);
|
|
}
|
|
|
|
STRING **
|
|
array_loop_vector(
|
|
ARRAY A,
|
|
size_t *sizep)
|
|
{
|
|
*sizep = A->size;
|
|
if (A->size > 0) {
|
|
STRING **ret;
|
|
if (!(A->type & AY_STR))
|
|
add_string_associations(A);
|
|
ret = (STRING **) zmalloc(A->size * sizeof(STRING *));
|
|
{
|
|
int r = 0; /* indexes ret */
|
|
DUAL_LINK *table = (DUAL_LINK *) A->ptr;
|
|
int i; /* indexes table */
|
|
ANODE *p; /* walks slists */
|
|
for (i = 0; (unsigned) i <= A->hmask; i++) {
|
|
for (p = table[i].slink; p; p = p->slink) {
|
|
ret[r++] = p->sval;
|
|
p->sval->ref_cnt++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (getenv("WHINY_USERS") != NULL) /* gawk compatibility */
|
|
qsort(ret, A->size, sizeof(STRING *), string_compare);
|
|
return ret;
|
|
}
|
|
return (STRING **) 0;
|
|
}
|
|
|
|
CELL *
|
|
array_cat(
|
|
CELL *sp,
|
|
int cnt)
|
|
{
|
|
CELL *p; /* walks the eval stack */
|
|
CELL subsep; /* local copy of SUBSEP */
|
|
size_t subsep_len; /* string length of subsep_str */
|
|
char *subsep_str;
|
|
|
|
size_t total_len; /* length of cat'ed expression */
|
|
CELL *top; /* value of sp at entry */
|
|
char *target; /* build cat'ed char* here */
|
|
STRING *sval; /* build cat'ed STRING here */
|
|
cellcpy(&subsep, SUBSEP);
|
|
if (subsep.type < C_STRING)
|
|
cast1_to_s(&subsep);
|
|
subsep_len = string(&subsep)->len;
|
|
subsep_str = string(&subsep)->str;
|
|
|
|
assert(cnt > 0);
|
|
|
|
top = sp;
|
|
sp -= (cnt - 1);
|
|
|
|
total_len = ((size_t) (cnt - 1)) * subsep_len;
|
|
for (p = sp; p <= top; p++) {
|
|
if (p->type < C_STRING)
|
|
cast1_to_s(p);
|
|
total_len += string(p)->len;
|
|
}
|
|
|
|
sval = new_STRING0(total_len);
|
|
target = sval->str;
|
|
for (p = sp; p < top; p++) {
|
|
memcpy(target, string(p)->str, string(p)->len);
|
|
target += string(p)->len;
|
|
memcpy(target, subsep_str, subsep_len);
|
|
target += subsep_len;
|
|
}
|
|
/* now p == top */
|
|
memcpy(target, string(p)->str, string(p)->len);
|
|
|
|
for (p = sp; p <= top; p++)
|
|
free_STRING(string(p));
|
|
free_STRING(string(&subsep));
|
|
/* set contents of sp , sp->type > C_STRING is possible so reset */
|
|
sp->type = C_STRING;
|
|
sp->ptr = (PTR) sval;
|
|
return sp;
|
|
|
|
}
|
|
|
|
static ANODE *
|
|
find_by_ival(
|
|
ARRAY A,
|
|
Int ival,
|
|
int create_flag,
|
|
int *redo)
|
|
{
|
|
DUAL_LINK *table = (DUAL_LINK *) A->ptr;
|
|
unsigned indx = (unsigned) ival & A->hmask;
|
|
ANODE *p = table[indx].ilink; /* walks ilist */
|
|
ANODE *q = (ANODE *) 0; /* trails p */
|
|
while (1) {
|
|
if (!p) {
|
|
/* search failed */
|
|
if (A->type & AY_STR) {
|
|
/* need to search by string */
|
|
char buff[256];
|
|
STRING *sval;
|
|
sprintf(buff, LONG_FMT, ival);
|
|
sval = new_STRING(buff);
|
|
p = find_by_sval(A, sval, create_flag, redo);
|
|
if (*redo) {
|
|
table = (DUAL_LINK *) A->ptr;
|
|
}
|
|
free_STRING(sval);
|
|
if (!p)
|
|
return (ANODE *) 0;
|
|
} else if (create_flag) {
|
|
p = ZMALLOC(ANODE);
|
|
p->sval = (STRING *) 0;
|
|
p->cell.type = C_NOINIT;
|
|
if (++A->size > A->limit) {
|
|
double_the_hash_table(A); /* changes table, may change index */
|
|
table = (DUAL_LINK *) A->ptr;
|
|
indx = A->hmask & (unsigned) ival;
|
|
}
|
|
} else
|
|
return (ANODE *) 0;
|
|
p->ival = ival;
|
|
A->type |= AY_INT;
|
|
|
|
break;
|
|
} else if (p->ival == ival) {
|
|
/* found it, now move to the front */
|
|
if (!q) /* already at the front */
|
|
return p;
|
|
/* delete for insertion at the front */
|
|
q->ilink = p->ilink;
|
|
break;
|
|
}
|
|
q = p;
|
|
p = q->ilink;
|
|
}
|
|
/* insert at the front */
|
|
p->ilink = table[indx].ilink;
|
|
table[indx].ilink = p;
|
|
return p;
|
|
}
|
|
|
|
static ANODE *
|
|
find_by_sval(
|
|
ARRAY A,
|
|
STRING * sval,
|
|
int create_flag,
|
|
int *redo)
|
|
{
|
|
unsigned hval = ahash(sval);
|
|
char *str = sval->str;
|
|
DUAL_LINK *table;
|
|
unsigned indx;
|
|
ANODE *p; /* walks list */
|
|
ANODE *q = (ANODE *) 0; /* trails p */
|
|
if (!(A->type & AY_STR))
|
|
add_string_associations(A);
|
|
table = (DUAL_LINK *) A->ptr;
|
|
indx = hval & A->hmask;
|
|
p = table[indx].slink;
|
|
*redo = 0;
|
|
while (1) {
|
|
if (!p) {
|
|
if (create_flag) {
|
|
{
|
|
p = ZMALLOC(ANODE);
|
|
p->sval = sval;
|
|
sval->ref_cnt++;
|
|
p->ival = NOT_AN_IVALUE;
|
|
p->hval = hval;
|
|
p->cell.type = C_NOINIT;
|
|
if (++A->size > A->limit) {
|
|
double_the_hash_table(A); /* changes table, may change index */
|
|
table = (DUAL_LINK *) A->ptr;
|
|
indx = hval & A->hmask;
|
|
*redo = 1;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
return (ANODE *) 0;
|
|
} else if (p->hval == hval) {
|
|
if (strcmp(p->sval->str, str) == 0) {
|
|
/* found */
|
|
if (!q) /* already at the front */
|
|
return p;
|
|
else { /* delete for move to the front */
|
|
q->slink = p->slink;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
q = p;
|
|
p = q->slink;
|
|
}
|
|
p->slink = table[indx].slink;
|
|
table[indx].slink = p;
|
|
return p;
|
|
}
|
|
|
|
static void
|
|
add_string_associations(ARRAY A)
|
|
{
|
|
if (A->type == AY_NULL)
|
|
make_empty_table(A, AY_STR);
|
|
else {
|
|
DUAL_LINK *table;
|
|
int i; /* walks table */
|
|
ANODE *p; /* walks ilist */
|
|
char buff[256];
|
|
if (A->type == AY_SPLIT)
|
|
convert_split_array_to_table(A);
|
|
table = (DUAL_LINK *) A->ptr;
|
|
for (i = 0; (unsigned) i <= A->hmask; i++) {
|
|
p = table[i].ilink;
|
|
while (p) {
|
|
sprintf(buff, LONG_FMT, p->ival);
|
|
p->sval = new_STRING(buff);
|
|
p->hval = ahash(p->sval);
|
|
p->slink = table[A->hmask & p->hval].slink;
|
|
table[A->hmask & p->hval].slink = p;
|
|
p = p->ilink;
|
|
}
|
|
}
|
|
A->type |= AY_STR;
|
|
}
|
|
}
|
|
|
|
static void
|
|
make_empty_table(
|
|
ARRAY A,
|
|
int type) /* AY_INT or AY_STR */
|
|
{
|
|
size_t sz = (STARTING_HMASK + 1) * sizeof(DUAL_LINK);
|
|
A->type = (short) type;
|
|
A->hmask = STARTING_HMASK;
|
|
A->limit = hmask_to_limit(STARTING_HMASK);
|
|
A->ptr = memset(zmalloc(sz), 0, sz);
|
|
}
|
|
|
|
static void
|
|
convert_split_array_to_table(ARRAY A)
|
|
{
|
|
CELL *cells = (CELL *) A->ptr;
|
|
unsigned i; /* walks cells */
|
|
DUAL_LINK *table;
|
|
unsigned j; /* walks table */
|
|
size_t entry_limit = A->limit;
|
|
A->hmask = STARTING_HMASK;
|
|
A->limit = hmask_to_limit(STARTING_HMASK);
|
|
while (A->size > A->limit) {
|
|
A->hmask = (A->hmask << 1) + 1; /* double the size */
|
|
A->limit = hmask_to_limit(A->hmask);
|
|
}
|
|
{
|
|
size_t sz = (A->hmask + 1) * sizeof(DUAL_LINK);
|
|
A->ptr = memset(zmalloc(sz), 0, sz);
|
|
table = (DUAL_LINK *) A->ptr;
|
|
}
|
|
|
|
/* insert each cells[i] in the new hash table on an ilist */
|
|
for (i = 0, j = 1; i < A->size; i++) {
|
|
ANODE *p = ZMALLOC(ANODE);
|
|
p->sval = (STRING *) 0;
|
|
p->ival = (Int) (i + 1);
|
|
p->cell = cells[i];
|
|
p->ilink = table[j].ilink;
|
|
table[j].ilink = p;
|
|
j++;
|
|
j &= A->hmask;
|
|
}
|
|
A->type = AY_INT;
|
|
zfree(cells, entry_limit * sizeof(CELL));
|
|
}
|
|
|
|
static void
|
|
double_the_hash_table(ARRAY A)
|
|
{
|
|
unsigned old_hmask = A->hmask;
|
|
unsigned new_hmask = (old_hmask << 1) + 1;
|
|
DUAL_LINK *table;
|
|
A->ptr = zrealloc(A->ptr, (old_hmask + 1) * sizeof(DUAL_LINK),
|
|
(new_hmask + 1) * sizeof(DUAL_LINK));
|
|
table = (DUAL_LINK *) A->ptr;
|
|
/* zero out the new part which is the back half */
|
|
memset(&table[old_hmask + 1], 0, (old_hmask + 1) * sizeof(DUAL_LINK));
|
|
|
|
if (A->type & AY_STR) {
|
|
unsigned i; /* index to old lists */
|
|
unsigned j; /* index to new lists */
|
|
ANODE *p; /* walks an old list */
|
|
ANODE *q; /* trails p for deletion */
|
|
ANODE *tail; /* builds new list from the back */
|
|
ANODE dummy0, dummy1;
|
|
for (i = 0, j = old_hmask + 1; i <= old_hmask; i++, j++) {
|
|
q = &dummy0;
|
|
q->slink = p = table[i].slink;
|
|
tail = &dummy1;
|
|
while (p) {
|
|
if ((p->hval & new_hmask) != (unsigned) i) { /* move it */
|
|
q->slink = p->slink;
|
|
tail = tail->slink = p;
|
|
} else
|
|
q = p;
|
|
p = q->slink;
|
|
}
|
|
table[i].slink = dummy0.slink;
|
|
tail->slink = (ANODE *) 0;
|
|
table[j].slink = dummy1.slink;
|
|
}
|
|
|
|
}
|
|
|
|
if (A->type & AY_INT) {
|
|
unsigned i; /* index to old lists */
|
|
unsigned j; /* index to new lists */
|
|
ANODE *p; /* walks an old list */
|
|
ANODE *q; /* trails p for deletion */
|
|
ANODE *tail; /* builds new list from the back */
|
|
ANODE dummy0, dummy1;
|
|
for (i = 0, j = old_hmask + 1; i <= old_hmask; i++, j++) {
|
|
q = &dummy0;
|
|
q->ilink = p = table[i].ilink;
|
|
tail = &dummy1;
|
|
while (p) {
|
|
if (((unsigned) p->ival & new_hmask) != i) { /* move it */
|
|
q->ilink = p->ilink;
|
|
tail = tail->ilink = p;
|
|
} else
|
|
q = p;
|
|
p = q->ilink;
|
|
}
|
|
table[i].ilink = dummy0.ilink;
|
|
tail->ilink = (ANODE *) 0;
|
|
table[j].ilink = dummy1.ilink;
|
|
}
|
|
|
|
}
|
|
|
|
A->hmask = new_hmask;
|
|
A->limit = hmask_to_limit(new_hmask);
|
|
}
|