mirror of
https://github.com/ThomasDickey/mawk-snapshots.git
synced 2026-01-27 03:14:29 +00:00
330 lines
6.6 KiB
C
330 lines
6.6 KiB
C
/********************************************
|
|
split.c
|
|
copyright 2008-2009,2010, Thomas E. Dickey
|
|
copyright 1991-1993,1996, Michael D. Brennan
|
|
|
|
This is a source file for mawk, an implementation of
|
|
the AWK programming language.
|
|
|
|
Mawk is distributed without warranty under the terms of
|
|
the GNU General Public License, version 2, 1991.
|
|
********************************************/
|
|
|
|
/*
|
|
* $MawkId: split.c,v 1.21 2010/12/10 17:00:00 tom Exp $
|
|
* @Log: split.c,v @
|
|
* Revision 1.3 1996/02/01 04:39:42 mike
|
|
* dynamic array scheme
|
|
*
|
|
* Revision 1.2 1993/07/15 01:55:03 mike
|
|
* rm SIZE_T & indent
|
|
*
|
|
* Revision 1.1.1.1 1993/07/03 18:58:21 mike
|
|
* move source to cvs
|
|
*
|
|
* Revision 5.4 1993/05/08 18:06:00 mike
|
|
* null_split
|
|
*
|
|
* Revision 5.3 1993/01/01 21:30:48 mike
|
|
* split new_STRING() into new_STRING and new_STRING0
|
|
*
|
|
* Revision 5.2 1992/07/08 21:19:09 brennan
|
|
* patch2
|
|
* change in split() requires that
|
|
* bi_split() call load_array() even
|
|
* when cnt is 0.
|
|
*
|
|
* Revision 5.1 1991/12/05 07:56:31 brennan
|
|
* 1.1 pre-release
|
|
*
|
|
*/
|
|
|
|
/* split.c */
|
|
|
|
/* For all splitting up to MAX_SPLIT fields go into
|
|
split_buff[], the rest go onto split_ov_list ( split
|
|
overflow list)
|
|
|
|
We can split one of three ways:
|
|
(1) By space:
|
|
space_split() and space_ov_split()
|
|
(2) By regular expression:
|
|
re_split() and re_ov_split()
|
|
(3) By "" (null -- split into characters)
|
|
null_split() and null_ov_split()
|
|
*/
|
|
|
|
#define TEMPBUFF_GOES_HERE
|
|
|
|
#include "mawk.h"
|
|
#include "symtype.h"
|
|
#include "bi_vars.h"
|
|
#include "bi_funct.h"
|
|
#include "memory.h"
|
|
#include "scan.h"
|
|
#include "regexp.h"
|
|
#include "repl.h"
|
|
#include "field.h"
|
|
|
|
SPLIT_OV *split_ov_list;
|
|
|
|
#define EAT_SPACE() while ( scan_code[*(unsigned char*)s] ==\
|
|
SC_SPACE ) s++
|
|
#define EAT_NON_SPACE() \
|
|
*back = ' ' ; /* sentinel */\
|
|
while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\
|
|
*back = 0
|
|
|
|
static size_t
|
|
space_ov_split(char *s, char *back)
|
|
{
|
|
SPLIT_OV dummy;
|
|
register SPLIT_OV *tail = &dummy;
|
|
char *q;
|
|
size_t cnt = 0;
|
|
size_t len;
|
|
|
|
while (1) {
|
|
EAT_SPACE();
|
|
if (*s == 0)
|
|
break; /* done */
|
|
q = s++;
|
|
EAT_NON_SPACE();
|
|
|
|
tail = tail->link = ZMALLOC(SPLIT_OV);
|
|
tail->sval = new_STRING0(len = (size_t) (s - q));
|
|
memcpy(tail->sval->str, q, len);
|
|
cnt++;
|
|
}
|
|
|
|
tail->link = (SPLIT_OV *) 0;
|
|
split_ov_list = dummy.link;
|
|
return cnt;
|
|
}
|
|
|
|
/*
|
|
* Split string s of length slen on SPACE without changing s.
|
|
* Load the pieces into STRINGS and ptrs into split_buff[].
|
|
*
|
|
* return the number of pieces
|
|
*/
|
|
size_t
|
|
space_split(char *s, size_t slen)
|
|
{
|
|
char *back = s + slen;
|
|
size_t i = 0;
|
|
char *q;
|
|
int lcnt = MAX_SPLIT / 3;
|
|
|
|
while (lcnt--) {
|
|
EAT_SPACE();
|
|
if (*s == 0)
|
|
goto done;
|
|
/* mark the front with q */
|
|
q = s++;
|
|
EAT_NON_SPACE();
|
|
split_buff[i++] = new_STRING1(q, (size_t) (s - q));
|
|
|
|
EAT_SPACE();
|
|
if (*s == 0)
|
|
goto done;
|
|
q = s++;
|
|
EAT_NON_SPACE();
|
|
split_buff[i++] = new_STRING1(q, (size_t) (s - q));
|
|
|
|
EAT_SPACE();
|
|
if (*s == 0)
|
|
goto done;
|
|
q = s++;
|
|
EAT_NON_SPACE();
|
|
split_buff[i++] = new_STRING1(q, (size_t) (s - q));
|
|
|
|
}
|
|
/* we've overflowed */
|
|
return i + space_ov_split(s, back);
|
|
|
|
done:
|
|
return i;
|
|
}
|
|
|
|
/* match a string with a regular expression, but
|
|
* only matches of positive length count
|
|
*/
|
|
char *
|
|
re_pos_match(char *s, size_t str_len, PTR re, size_t *lenp)
|
|
{
|
|
char *result = 0;
|
|
|
|
while (str_len && (s = REmatch(s, str_len, cast_to_re(re), lenp))) {
|
|
if (*lenp) {
|
|
result = s;
|
|
break;
|
|
} else if (*s == 0) {
|
|
break;
|
|
} else {
|
|
s++;
|
|
--str_len;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* We've overflowed split_buff[], put the rest on the split_ov_list.
|
|
*
|
|
* Return number of pieces.
|
|
*/
|
|
static size_t
|
|
re_ov_split(char *s, size_t slen, PTR re)
|
|
{
|
|
SPLIT_OV dummy;
|
|
SPLIT_OV *tail = &dummy;
|
|
size_t cnt = 1;
|
|
char *limit = s + slen;
|
|
char *t;
|
|
size_t mlen;
|
|
|
|
while ((s < limit)
|
|
&& (t = re_pos_match(s, (size_t) (limit - s), re, &mlen))) {
|
|
tail = tail->link = ZMALLOC(SPLIT_OV);
|
|
tail->sval = new_STRING1(s, (size_t) (t - s));
|
|
s = t + mlen;
|
|
cnt++;
|
|
}
|
|
/* and one more */
|
|
tail = tail->link = ZMALLOC(SPLIT_OV);
|
|
tail->sval = new_STRING1(s, (size_t) (limit - s));
|
|
tail->link = (SPLIT_OV *) 0;
|
|
split_ov_list = dummy.link;
|
|
|
|
return cnt;
|
|
}
|
|
|
|
#define RE_SPLIT3 \
|
|
if (!(t = re_pos_match(s, slen, re, &mlen))) \
|
|
goto done; \
|
|
split_buff[i++] = new_STRING1(s, (size_t) (t - s)); \
|
|
s = t + mlen; \
|
|
if (s > limit) { \
|
|
slen = (size_t) (-1); \
|
|
goto done; \
|
|
} \
|
|
slen = (size_t) (limit - s)
|
|
|
|
size_t
|
|
re_split(STRING * s_param, PTR re)
|
|
{
|
|
char *limit = s_param->str + s_param->len;
|
|
char *s = s_param->str;
|
|
char *t;
|
|
size_t i = 0;
|
|
size_t slen = s_param->len;
|
|
size_t mlen;
|
|
int lcnt = MAX_SPLIT / 3;
|
|
|
|
while (lcnt--) {
|
|
RE_SPLIT3;
|
|
RE_SPLIT3;
|
|
RE_SPLIT3;
|
|
}
|
|
/* we've overflowed */
|
|
return i + re_ov_split(s, slen, re);
|
|
|
|
done:
|
|
if ((int) slen >= 0) {
|
|
split_buff[i++] = new_STRING1(s, slen);
|
|
}
|
|
return i;
|
|
}
|
|
|
|
static size_t
|
|
null_ov_split(char *s, size_t slen)
|
|
{
|
|
SPLIT_OV dummy;
|
|
SPLIT_OV *ovp = &dummy;
|
|
size_t cnt = 0;
|
|
|
|
while (slen) {
|
|
ovp = ovp->link = ZMALLOC(SPLIT_OV);
|
|
ovp->sval = new_STRING0((size_t) 1);
|
|
ovp->sval->str[0] = *s++;
|
|
cnt++;
|
|
--slen;
|
|
}
|
|
ovp->link = (SPLIT_OV *) 0;
|
|
split_ov_list = dummy.link;
|
|
return cnt;
|
|
}
|
|
|
|
size_t
|
|
null_split(char *s, size_t slen)
|
|
{
|
|
size_t cnt = 0; /* number of fields split */
|
|
STRING *sval;
|
|
int i = 0; /* indexes split_buff[] */
|
|
|
|
while (slen) {
|
|
if (cnt == MAX_SPLIT) {
|
|
cnt += null_ov_split(s, slen);
|
|
break;
|
|
} else {
|
|
sval = new_STRING0((size_t) 1);
|
|
sval->str[0] = *s++;
|
|
split_buff[i++] = sval;
|
|
cnt++;
|
|
--slen;
|
|
}
|
|
}
|
|
return cnt;
|
|
}
|
|
|
|
/* split(s, X, r)
|
|
* split s into array X on r
|
|
*
|
|
* entry: sp[0] holds r
|
|
* sp[-1] pts at X
|
|
* sp[-2] holds s
|
|
*/
|
|
CELL *
|
|
bi_split(CELL * sp)
|
|
{
|
|
size_t cnt = 0; /* the number of pieces */
|
|
|
|
if (sp->type < C_RE)
|
|
cast_for_split(sp);
|
|
/* can be C_RE, C_SPACE or C_SNULL */
|
|
sp -= 2;
|
|
if (sp->type < C_STRING)
|
|
cast1_to_s(sp);
|
|
|
|
if (string(sp)->len == 0) { /* nothing to split */
|
|
cnt = 0;
|
|
} else {
|
|
switch ((sp + 2)->type) {
|
|
case C_RE:
|
|
cnt = re_split(string(sp), (sp + 2)->ptr);
|
|
break;
|
|
|
|
case C_SPACE:
|
|
cnt = space_split(string(sp)->str, string(sp)->len);
|
|
break;
|
|
|
|
case C_SNULL: /* split on empty string */
|
|
cnt = null_split(string(sp)->str, string(sp)->len);
|
|
break;
|
|
|
|
default:
|
|
bozo("bad splitting cell in bi_split");
|
|
}
|
|
}
|
|
|
|
free_STRING(string(sp));
|
|
sp->type = C_DOUBLE;
|
|
sp->dval = (double) cnt;
|
|
|
|
array_load((ARRAY) (sp + 1)->ptr, cnt);
|
|
|
|
return sp;
|
|
}
|