mirror of
https://github.com/ThomasDickey/mawk-snapshots.git
synced 2026-01-27 03:14:29 +00:00
687 lines
13 KiB
C
687 lines
13 KiB
C
|
|
/********************************************
|
|
field.c
|
|
copyright 1991, Michael D. Brennan
|
|
|
|
This is a source file for mawk, an implementation of
|
|
the AWK programming language.
|
|
|
|
Mawk is distributed without warranty under the terms of
|
|
the GNU General Public License, version 2, 1991.
|
|
********************************************/
|
|
|
|
/* @Log: field.c,v @
|
|
* Revision 1.5 1995/06/18 19:17:47 mike
|
|
* Create a type Int which on most machines is an int, but on machines
|
|
* with 16bit ints, i.e., the PC is a long. This fixes implicit assumption
|
|
* that int==long.
|
|
*
|
|
* Revision 1.4 1994/10/08 19:15:38 mike
|
|
* remove SM_DOS
|
|
*
|
|
* Revision 1.3 1993/07/14 12:32:39 mike
|
|
* run thru indent
|
|
*
|
|
* Revision 1.2 1993/07/14 12:22:11 mike
|
|
* rm SIZE_T and (void) casts
|
|
*
|
|
* Revision 1.1.1.1 1993/07/03 18:58:12 mike
|
|
* move source to cvs
|
|
*
|
|
* Revision 5.7 1993/05/08 18:06:00 mike
|
|
* null_split
|
|
*
|
|
* Revision 5.6 1993/02/13 21:57:25 mike
|
|
* merge patch3
|
|
*
|
|
* Revision 5.5 1993/01/01 21:30:48 mike
|
|
* split new_STRING() into new_STRING and new_STRING0
|
|
*
|
|
* Revision 5.4.1.2 1993/01/20 12:53:08 mike
|
|
* d_to_l()
|
|
*
|
|
* Revision 5.4.1.1 1993/01/15 03:33:42 mike
|
|
* patch3: safer double to int conversion
|
|
*
|
|
* Revision 5.4 1992/11/29 22:52:11 mike
|
|
* double->string conversions uses long ints for 16/32 bit
|
|
* compatibility.
|
|
* Fixed small LM_DOS bozo.
|
|
*
|
|
* Revision 5.3 1992/08/17 14:21:10 brennan
|
|
* patch2: After parsing, only bi_sprintf() uses string_buff.
|
|
*
|
|
* Revision 5.2 1992/07/10 16:17:10 brennan
|
|
* MsDOS: remove NO_BINMODE macro
|
|
*
|
|
* Revision 5.1 1991/12/05 07:55:57 brennan
|
|
* 1.1 pre-release
|
|
*
|
|
*/
|
|
|
|
|
|
/* field.c */
|
|
|
|
#include "mawk.h"
|
|
#include "field.h"
|
|
#include "init.h"
|
|
#include "memory.h"
|
|
#include "scan.h"
|
|
#include "bi_vars.h"
|
|
#include "repl.h"
|
|
#include "regexp.h"
|
|
|
|
CELL field[FBANK_SZ + NUM_PFIELDS] ;
|
|
|
|
CELL *fbank[NUM_FBANK] =
|
|
{field} ;
|
|
|
|
static int max_field = MAX_SPLIT ; /* maximum field actually created*/
|
|
|
|
static void PROTO(build_field0, (void)) ;
|
|
static void PROTO(set_rs_shadow, (void)) ;
|
|
static void PROTO(load_pfield, (char *, CELL *)) ;
|
|
static void PROTO(load_field_ov, (void)) ;
|
|
|
|
|
|
|
|
/* a description of how to split based on RS.
|
|
If RS is changed, so is rs_shadow */
|
|
SEPARATOR rs_shadow =
|
|
{SEP_CHAR, '\n'} ;
|
|
/* a splitting CELL version of FS */
|
|
CELL fs_shadow =
|
|
{C_SPACE} ;
|
|
int nf ;
|
|
/* nf holds the true value of NF. If nf < 0 , then
|
|
NF has not been computed, i.e., $0 has not been split
|
|
*/
|
|
|
|
static void
|
|
set_rs_shadow()
|
|
{
|
|
CELL c ;
|
|
STRING *sval ;
|
|
char *s ;
|
|
unsigned len ;
|
|
|
|
if (posix_space_flag && mawk_state == EXECUTION)
|
|
scan_code['\n'] = SC_UNEXPECTED ;
|
|
|
|
if (rs_shadow.type == SEP_STR)
|
|
free_STRING((STRING *) rs_shadow.ptr) ;
|
|
|
|
cast_for_split(cellcpy(&c, RS)) ;
|
|
switch (c.type)
|
|
{
|
|
case C_RE:
|
|
if (s = is_string_split(c.ptr, &len))
|
|
{
|
|
if (len == 1)
|
|
{
|
|
rs_shadow.type = SEP_CHAR ;
|
|
rs_shadow.c = s[0] ;
|
|
}
|
|
else
|
|
{
|
|
rs_shadow.type = SEP_STR ;
|
|
rs_shadow.ptr = (PTR) new_STRING(s) ;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rs_shadow.type = SEP_RE ;
|
|
rs_shadow.ptr = c.ptr ;
|
|
}
|
|
break ;
|
|
|
|
case C_SPACE:
|
|
rs_shadow.type = SEP_CHAR ;
|
|
rs_shadow.c = ' ' ;
|
|
break ;
|
|
|
|
case C_SNULL: /* RS becomes one or more blank lines */
|
|
if (mawk_state == EXECUTION) scan_code['\n'] = SC_SPACE ;
|
|
rs_shadow.type = SEP_MLR ;
|
|
sval = new_STRING("\n\n+") ;
|
|
rs_shadow.ptr = re_compile(sval) ;
|
|
free_STRING(sval) ;
|
|
break ;
|
|
|
|
default:
|
|
bozo("bad cell in set_rs_shadow") ;
|
|
}
|
|
}
|
|
|
|
static void
|
|
load_pfield(name, cp)
|
|
char *name ;
|
|
CELL *cp ;
|
|
{
|
|
SYMTAB *stp ;
|
|
|
|
stp = insert(name) ; stp->type = ST_FIELD ;
|
|
stp->stval.cp = cp ;
|
|
}
|
|
|
|
/* initialize $0 and the pseudo fields */
|
|
void
|
|
field_init()
|
|
{
|
|
field[0].type = C_STRING ;
|
|
field[0].ptr = (PTR) & null_str ;
|
|
null_str.ref_cnt++ ;
|
|
|
|
load_pfield("NF", NF) ;
|
|
NF->type = C_DOUBLE ;
|
|
NF->dval = 0.0 ;
|
|
|
|
load_pfield("RS", RS) ;
|
|
RS->type = C_STRING ;
|
|
RS->ptr = (PTR) new_STRING("\n") ;
|
|
/* rs_shadow already set */
|
|
|
|
load_pfield("FS", FS) ;
|
|
FS->type = C_STRING ;
|
|
FS->ptr = (PTR) new_STRING(" ") ;
|
|
/* fs_shadow is already set */
|
|
|
|
load_pfield("OFMT", OFMT) ;
|
|
OFMT->type = C_STRING ;
|
|
OFMT->ptr = (PTR) new_STRING("%.6g") ;
|
|
|
|
load_pfield("CONVFMT", CONVFMT) ;
|
|
CONVFMT->type = C_STRING ;
|
|
CONVFMT->ptr = OFMT->ptr ;
|
|
string(OFMT)->ref_cnt++ ;
|
|
}
|
|
|
|
|
|
|
|
void
|
|
set_field0(s, len)
|
|
char *s ;
|
|
unsigned len ;
|
|
{
|
|
cell_destroy(&field[0]) ;
|
|
nf = -1 ;
|
|
|
|
if (len)
|
|
{
|
|
field[0].type = C_MBSTRN ;
|
|
field[0].ptr = (PTR) new_STRING0(len) ;
|
|
memcpy(string(&field[0])->str, s, len) ;
|
|
}
|
|
else
|
|
{
|
|
field[0].type = C_STRING ;
|
|
field[0].ptr = (PTR) & null_str ;
|
|
null_str.ref_cnt++ ;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* split field[0] into $1, $2 ... and set NF */
|
|
|
|
void
|
|
split_field0()
|
|
{
|
|
register CELL *cp ;
|
|
register int cnt ;
|
|
CELL c ; /* copy field[0] here if not string */
|
|
|
|
|
|
if (field[0].type < C_STRING)
|
|
{
|
|
cast1_to_s(cellcpy(&c, field + 0)) ;
|
|
cp = &c ;
|
|
}
|
|
else cp = &field[0] ;
|
|
|
|
if (string(cp)->len == 0) nf = 0 ;
|
|
else
|
|
{
|
|
switch (fs_shadow.type)
|
|
{
|
|
case C_SNULL: /* FS == "" */
|
|
nf = null_split(string(cp)->str) ;
|
|
break ;
|
|
|
|
case C_SPACE:
|
|
nf = space_split(string(cp)->str, string(cp)->len) ;
|
|
break ;
|
|
|
|
default:
|
|
nf = re_split(string(cp)->str, fs_shadow.ptr) ;
|
|
break ;
|
|
}
|
|
|
|
}
|
|
|
|
cell_destroy(NF) ;
|
|
NF->type = C_DOUBLE ;
|
|
NF->dval = (double) nf ;
|
|
|
|
if (nf > MAX_SPLIT)
|
|
{
|
|
cnt = MAX_SPLIT ; load_field_ov() ;
|
|
}
|
|
else cnt = nf ;
|
|
|
|
while (cnt > 0)
|
|
{
|
|
cell_destroy(field + cnt) ;
|
|
field[cnt].ptr = (PTR) split_buff[cnt - 1] ;
|
|
field[cnt--].type = C_MBSTRN ;
|
|
}
|
|
|
|
if (cp == &c) free_STRING(string(cp)) ;
|
|
}
|
|
|
|
/*
|
|
assign CELL *cp to field or pseudo field
|
|
and take care of all side effects
|
|
*/
|
|
|
|
void
|
|
field_assign(fp, cp)
|
|
register CELL *fp ;
|
|
CELL *cp ;
|
|
{
|
|
CELL c ;
|
|
int i, j ;
|
|
|
|
/* the most common case first */
|
|
if (fp == field)
|
|
{
|
|
cell_destroy(field) ;
|
|
cellcpy(fp, cp) ;
|
|
nf = -1 ;
|
|
return ;
|
|
}
|
|
|
|
/* its not important to do any of this fast */
|
|
|
|
if (nf < 0) split_field0() ;
|
|
|
|
#ifdef MSDOS
|
|
if (!SAMESEG(fp, field))
|
|
{
|
|
i = -1 ;
|
|
goto lm_dos_label ;
|
|
}
|
|
#endif
|
|
|
|
switch (i = (fp - field))
|
|
{
|
|
|
|
case NF_field:
|
|
|
|
cell_destroy(NF) ;
|
|
cellcpy(NF, cellcpy(&c, cp)) ;
|
|
if (c.type != C_DOUBLE) cast1_to_d(&c) ;
|
|
|
|
if ((j = d_to_i(c.dval)) < 0)
|
|
rt_error("negative value assigned to NF") ;
|
|
|
|
if (j > nf)
|
|
for (i = nf + 1; i <= j; i++)
|
|
{
|
|
cp = field_ptr(i) ;
|
|
cell_destroy(cp) ;
|
|
cp->type = C_STRING ;
|
|
cp->ptr = (PTR) & null_str ;
|
|
null_str.ref_cnt++ ;
|
|
}
|
|
|
|
nf = j ;
|
|
build_field0() ;
|
|
break ;
|
|
|
|
case RS_field:
|
|
cell_destroy(RS) ;
|
|
cellcpy(RS, cp) ;
|
|
set_rs_shadow() ;
|
|
break ;
|
|
|
|
case FS_field:
|
|
cell_destroy(FS) ;
|
|
cast_for_split(cellcpy(&fs_shadow, cellcpy(FS, cp))) ;
|
|
break ;
|
|
|
|
case OFMT_field:
|
|
case CONVFMT_field:
|
|
/* If the user does something stupid with OFMT or CONVFMT,
|
|
we could crash.
|
|
We'll make an attempt to protect ourselves here. This is
|
|
why OFMT and CONVFMT are pseudo fields.
|
|
|
|
The ptrs of OFMT and CONVFMT always have a valid STRING,
|
|
even if assigned a DOUBLE or NOINIT
|
|
*/
|
|
|
|
free_STRING(string(fp)) ;
|
|
cellcpy(fp, cp) ;
|
|
if (fp->type < C_STRING) /* !! */
|
|
fp->ptr = (PTR) new_STRING("%.6g") ;
|
|
else if (fp == CONVFMT)
|
|
{
|
|
/* It's a string, but if it's really goofy and CONVFMT,
|
|
it could still damage us. Test it .
|
|
*/
|
|
char xbuff[512] ;
|
|
|
|
xbuff[256] = 0 ;
|
|
sprintf(xbuff, string(fp)->str, 3.1459) ;
|
|
if (xbuff[256])
|
|
rt_error("CONVFMT assigned unusable value") ;
|
|
}
|
|
break ;
|
|
|
|
#ifdef MSDOS
|
|
lm_dos_label:
|
|
#endif
|
|
|
|
default: /* $1 or $2 or ... */
|
|
|
|
|
|
cell_destroy(fp) ;
|
|
cellcpy(fp, cp) ;
|
|
|
|
if (i < 0 || i > MAX_SPLIT) i = field_addr_to_index(fp) ;
|
|
|
|
if (i > nf)
|
|
{
|
|
for (j = nf + 1; j < i; j++)
|
|
{
|
|
cp = field_ptr(j) ;
|
|
cell_destroy(cp) ;
|
|
cp->type = C_STRING ;
|
|
cp->ptr = (PTR) & null_str ;
|
|
null_str.ref_cnt++ ;
|
|
}
|
|
nf = i ;
|
|
cell_destroy(NF) ;
|
|
NF->type = C_DOUBLE ;
|
|
NF->dval = (double) i ;
|
|
}
|
|
|
|
build_field0() ;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/* construct field[0] from the other fields */
|
|
|
|
static void
|
|
build_field0()
|
|
{
|
|
|
|
|
|
#ifdef DEBUG
|
|
if (nf < 0) bozo("nf <0 in build_field0") ;
|
|
#endif
|
|
|
|
cell_destroy(field + 0) ;
|
|
|
|
if (nf == 0)
|
|
{
|
|
field[0].type = C_STRING ;
|
|
field[0].ptr = (PTR) & null_str ;
|
|
null_str.ref_cnt++ ;
|
|
}
|
|
else if (nf == 1)
|
|
{
|
|
cellcpy(field, field + 1) ;
|
|
}
|
|
else
|
|
{
|
|
CELL c ;
|
|
STRING *ofs, *tail ;
|
|
unsigned len ;
|
|
register CELL *cp ;
|
|
register char *p, *q ;
|
|
int cnt ;
|
|
CELL **fbp, *cp_limit ;
|
|
|
|
|
|
cast1_to_s(cellcpy(&c, OFS)) ;
|
|
ofs = (STRING *) c.ptr ;
|
|
cast1_to_s(cellcpy(&c, field_ptr(nf))) ;
|
|
tail = (STRING *) c.ptr ;
|
|
cnt = nf - 1 ;
|
|
|
|
len = cnt * ofs->len + tail->len ;
|
|
|
|
fbp = fbank ; cp_limit = field + FBANK_SZ ;
|
|
cp = field + 1 ;
|
|
|
|
while (cnt-- > 0)
|
|
{
|
|
if (cp->type < C_STRING)
|
|
{ /* use the string field temporarily */
|
|
if (cp->type == C_NOINIT)
|
|
{
|
|
cp->ptr = (PTR) & null_str ;
|
|
null_str.ref_cnt++ ;
|
|
}
|
|
else /* its a double */
|
|
{
|
|
Int ival ;
|
|
char xbuff[260] ;
|
|
|
|
ival = d_to_I(cp->dval) ;
|
|
if (ival == cp->dval) sprintf(xbuff, INT_FMT, ival) ;
|
|
else sprintf(xbuff, string(CONVFMT)->str, cp->dval) ;
|
|
|
|
cp->ptr = (PTR) new_STRING(xbuff) ;
|
|
}
|
|
}
|
|
|
|
len += string(cp)->len ;
|
|
|
|
if (++cp == cp_limit)
|
|
{
|
|
cp = *++fbp ;
|
|
cp_limit = cp + FBANK_SZ ;
|
|
}
|
|
|
|
}
|
|
|
|
field[0].type = C_STRING ;
|
|
field[0].ptr = (PTR) new_STRING0(len) ;
|
|
|
|
p = string(field)->str ;
|
|
|
|
/* walk it again , putting things together */
|
|
cnt = nf-1 ; fbp = fbank ;
|
|
cp = field+1 ; cp_limit = field + FBANK_SZ ;
|
|
while (cnt-- > 0)
|
|
{
|
|
memcpy(p, string(cp)->str, string(cp)->len) ;
|
|
p += string(cp)->len ;
|
|
/* if not really string, free temp use of ptr */
|
|
if (cp->type < C_STRING) free_STRING(string(cp)) ;
|
|
if (++cp == cp_limit)
|
|
{
|
|
cp = *++fbp ;
|
|
cp_limit = cp + FBANK_SZ ;
|
|
}
|
|
/* add the separator */
|
|
q = ofs->str ; while( *q ) *p++ = *q++ ;
|
|
}
|
|
/* tack tail on the end */
|
|
memcpy(p, tail->str, tail->len) ;
|
|
|
|
/* cleanup */
|
|
free_STRING(tail) ; free_STRING(ofs) ;
|
|
}
|
|
}
|
|
|
|
/* We are assigning to a CELL and we aren't sure if its
|
|
a field */
|
|
|
|
void
|
|
slow_cell_assign(target, source)
|
|
register CELL *target ;
|
|
CELL *source ;
|
|
{
|
|
if (
|
|
|
|
#ifdef MSDOS /* the dreaded segment nonsense */
|
|
SAMESEG(target, field) &&
|
|
#endif
|
|
target >= field && target <= LAST_PFIELD)
|
|
field_assign(target, source) ;
|
|
else
|
|
{
|
|
CELL **p = fbank + 1 ;
|
|
|
|
while (*p)
|
|
{
|
|
if (
|
|
#ifdef MSDOS
|
|
SAMESEG(target, *p) &&
|
|
#endif
|
|
target >= *p && target < *p + FBANK_SZ)
|
|
{
|
|
field_assign(target, source) ;
|
|
return ;
|
|
}
|
|
p++ ;
|
|
}
|
|
/* its not a field */
|
|
cell_destroy(target) ;
|
|
cellcpy(target, source) ;
|
|
}
|
|
}
|
|
|
|
int
|
|
field_addr_to_index(cp)
|
|
CELL *cp ;
|
|
{
|
|
CELL **p = fbank ;
|
|
|
|
while (
|
|
|
|
#ifdef MSDOS
|
|
!SAMESEG(cp, *p) ||
|
|
#endif
|
|
|
|
cp < *p || cp >= *p + FBANK_SZ)
|
|
p++ ;
|
|
|
|
return ((p - fbank) << FB_SHIFT) + (cp - *p) ;
|
|
}
|
|
|
|
/*------- more than 1 fbank needed ------------*/
|
|
|
|
/*
|
|
compute the address of a field with index
|
|
> MAX_SPLIT
|
|
*/
|
|
|
|
CELL *
|
|
slow_field_ptr(i)
|
|
register int i ;
|
|
{
|
|
|
|
if (i > max_field)
|
|
{
|
|
int j ;
|
|
|
|
if (i > MAX_FIELD)
|
|
rt_overflow("maximum number of fields", MAX_FIELD) ;
|
|
|
|
j = 1 ;
|
|
while (fbank[j]) j++ ;
|
|
|
|
do
|
|
{
|
|
fbank[j] = (CELL *) zmalloc(sizeof(CELL) * FBANK_SZ) ;
|
|
memset(fbank[j], 0, sizeof(CELL) * FBANK_SZ) ;
|
|
j++ ;
|
|
max_field += FBANK_SZ ;
|
|
}
|
|
while (i > max_field);
|
|
}
|
|
|
|
return &fbank[i >> FB_SHIFT][i & (FBANK_SZ - 1)] ;
|
|
}
|
|
|
|
/*
|
|
$0 split into more than MAX_SPLIT fields,
|
|
$(MAX_FIELD+1) ... are on the split_ov_list.
|
|
Copy into fields which start at fbank[1]
|
|
*/
|
|
|
|
static void
|
|
load_field_ov()
|
|
{
|
|
register SPLIT_OV *p ; /* walks split_ov_list */
|
|
register CELL *cp ; /* target of copy */
|
|
int j ; /* current fbank[] */
|
|
CELL *cp_limit ; /* change fbank[] */
|
|
SPLIT_OV *q ; /* trails p */
|
|
|
|
/* make sure the fields are allocated */
|
|
slow_field_ptr(nf) ;
|
|
|
|
p = split_ov_list ; split_ov_list = (SPLIT_OV*) 0 ;
|
|
j = 1 ; cp = fbank[j] ; cp_limit = cp + FBANK_SZ ;
|
|
while (p)
|
|
{
|
|
cell_destroy(cp) ;
|
|
cp->type = C_MBSTRN ;
|
|
cp->ptr = (PTR) p->sval ;
|
|
|
|
if (++cp == cp_limit)
|
|
{
|
|
cp = fbank[++j] ; cp_limit = cp + FBANK_SZ ;
|
|
}
|
|
|
|
q = p ; p = p->link ; ZFREE(q) ;
|
|
}
|
|
}
|
|
|
|
|
|
#if MSDOS
|
|
|
|
int
|
|
binmode() /* read current value of BINMODE */
|
|
{
|
|
CELL c ;
|
|
|
|
cast1_to_d(cellcpy(&c, BINMODE)) ;
|
|
return d_to_i(c.dval) ;
|
|
}
|
|
|
|
/* set BINMODE and RS and ORS
|
|
from environment or -W binmode= */
|
|
|
|
void
|
|
set_binmode(x)
|
|
int x ;
|
|
{
|
|
CELL c ;
|
|
|
|
/* set RS */
|
|
c.type = C_STRING ;
|
|
c.ptr = (PTR) new_STRING((x & 1) ? "\r\n" : "\n") ;
|
|
field_assign(RS, &c) ;
|
|
free_STRING(string(&c)) ;
|
|
|
|
/* set ORS */
|
|
cell_destroy(ORS) ;
|
|
ORS->type = C_STRING ;
|
|
ORS->ptr = (PTR) new_STRING((x & 2) ? "\r\n" : "\n") ;
|
|
|
|
cell_destroy(BINMODE) ;
|
|
BINMODE->type = C_DOUBLE ;
|
|
BINMODE->dval = (double) x ;
|
|
}
|
|
|
|
#endif /* MSDOS */
|