mawk/rexp0.c
2024-08-25 23:18:06 +00:00

1011 lines
20 KiB
C

/********************************************
rexp0.c
copyright 2008-2020,2024, Thomas E. Dickey
copyright 2010, Jonathan Nieder
copyright 1991-1994,1996, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp0.c,v 1.48 2024/08/25 17:16:24 tom Exp $
*/
/* lexical scanner */
#undef LOCAL_REGEXP /* no need for push/pop */
#include <rexp.h>
#include <ctype.h>
typedef struct {
int first;
int last;
} CCLASS;
/* static functions */
static int do_str(int, char **, MACHINE *);
static int do_class(char **, MACHINE *);
static int escape(char **);
static BV *store_bvp(BV *);
/* make next array visible */
/* *INDENT-OFF* */
static const
char char2token[] =
{
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*07*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*0f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*17*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*1f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_END, T_CHAR, T_CHAR, T_CHAR, /*27*/
T_LP, T_RP, T_STAR, T_PLUS, T_CHAR, T_CHAR, T_ANY, T_CHAR, /*2f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*37*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_Q, /*3f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*47*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*4f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*57*/
T_CHAR, T_CHAR, T_CHAR, T_CLASS,T_SLASH,T_CHAR, T_START,T_CHAR, /*5f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*67*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*6f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*77*/
T_CHAR, T_CHAR, T_CHAR, T_LB, T_OR, T_RB, T_CHAR, T_CHAR, /*7f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*87*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*8f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*97*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*9f*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*a7*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*af*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*b7*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*bf*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*c7*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*cf*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*d7*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*df*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*e7*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*ef*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, /*f7*/
T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR, T_CHAR /*ff*/
};
/* *INDENT-ON* */
#define NOT_STARTED (-1)
static int prev;
static size_t nest;
char *re_exp; /* ptr to reg exp string */
static char *re_str; /* base of 're_exp' */
static size_t re_len;
#ifndef NO_INTERVAL_EXPR
Int intrvalmin;
Int intrvalmax;
/*
* Given a string beginning with T_LB, check if that is an interval expression.
*/
static int
ok_intervals(const char *p)
{
int result = 0;
int comma = 0;
int ch;
while ((ch = (UChar) * ++p) != '\0') {
if (ch == R_CURL) {
result = 1;
break;
} else if (isdigit(ch)) {
; /* zero or more digits */
} else if (ch == ',') {
if (++comma > 1) {
break; /* zero or one commas */
}
} else {
break;
}
}
return result;
}
/*
Collect two numbers between T_LB and T_RB, saving
the values in intrvalmin and intrvalmax.
There are three ways the interval expressions are formed:
{n} => previous regexp is repeated n times
{n,m} => previous regexp is repeated n to m times
{n,} => previous regexp is repeated n or more times
{,m} => {0,m}
Note: awk doesn't define {,m}
returns: T_RB, or on error T_CHAR
*/
static int
do_intervals(
char **pp) /* where to put the re_char pointer on exit */
{
register char *p; /* runs thru the input */
p = *pp;
intrvalmin = 0;
intrvalmax = 0;
if (!isdigit((UChar) * p) && *p != ',') /* error */
{
RE_error_trap(-ERR_7);
}
if (*p != ',') {
intrvalmin = intrvalmin * 10 + *p++ - '0';
while (*p != '\0') {
if (isdigit((UChar) * p)) {
intrvalmin = intrvalmin * 10 + *p++ - '0';
} else if ((UChar) * p == R_CURL) {
p++;
*pp = p;
intrvalmax = intrvalmin; /* {n} */
return T_RB;
} else if ((UChar) * p == ',') {
if ((UChar) * ++p == R_CURL) {
p++;
*pp = p;
intrvalmax = MAX__INT;
return T_RB; /* {n,} */
}
break;
} else {
p++;
*pp = p;
RE_error_trap(-ERR_7);
}
}
} else {
p++;
}
while (*p != '\0') {
if (isdigit((UChar) * p)) {
intrvalmax = intrvalmax * 10 + *p++ - '0';
} else if ((UChar) * p == R_CURL) {
if (intrvalmax < intrvalmin) {
RE_error_trap(-ERR_7);
}
p++;
break;
} else {
p++;
*pp = p;
RE_error_trap(-ERR_7);
}
}
*pp = p;
return T_RB;
}
#endif /* ! NO_INTERVAL_EXPR */
void
RE_lex_init(char *re, size_t len)
{
re_str = re_exp = re;
re_len = len + 1;
prev = NOT_STARTED;
nest = 0;
#ifndef NO_INTERVAL_EXPR
intrvalmin = 0;
intrvalmax = 0; /* {n,} sets max to -1 */
#endif
RE_run_stack_init();
RE_pos_stack_init();
}
/*
* Get the next token from re_str.
*
* For nullary operations (T_STR, T_ANY, T_U, T_CLASS, T_START, T_END),
* before returning the appropriate token, this will write the
* corresponding machine to *mp.
*
* For the rest (T_PLUS, T_STAR, T_OR, T_Q, T_RP, T_LP, T_LB,
* T_RB, T_CAT), *mp is left alone.
*
* Returns 0 for end of regexp.
*/
int
RE_lex(MACHINE * mp)
{
/*
* re_exp records the current position while parsing.
* nest records the parenthesis nesting level.
* prev records the last token returned.
*/
register int c;
if ((unsigned) (1 + re_exp - re_str) >= re_len) {
return 0;
}
c = char2token[(UChar) (*re_exp)];
#ifndef NO_INTERVAL_EXPR
if (repetitions_flag) {
if (c == T_LB && !ok_intervals(re_exp))
c = T_CHAR;
} else {
if (c == T_LB || c == T_RB) {
c = T_CHAR;
}
}
#endif
switch (c) {
case T_PLUS:
case T_STAR:
if (prev == T_START)
RE_error_trap(6);
/* fall thru */
case T_OR:
case T_Q:
re_exp++;
return prev = c;
#ifndef NO_INTERVAL_EXPR
case T_LB:
re_exp++;
prev = T_LB;
break;
#endif
case T_RP:
if (!nest) {
/* ) without matching ( is ordinary */
c = T_CHAR;
break;
}
nest--;
re_exp++;
return prev = c;
case 0:
return 0;
case T_LP:
switch (prev) {
case T_CHAR:
case T_STR:
case T_ANY:
case T_CLASS:
case T_START:
case T_RP:
case T_PLUS:
case T_STAR:
case T_Q:
case T_U:
return prev = T_CAT;
#ifndef NO_INTERVAL_EXPR
case T_RB:
if (!repetitions_flag) {
return prev = T_CAT;
}
#endif
/* FALLTHRU */
default:
nest++;
re_exp++;
return prev = T_LP;
} /* T_LP switch */
}
/* *re_exp is an operand, but implicit cat op is possible */
switch (prev) {
case NOT_STARTED:
case T_OR:
case T_LP:
case T_CAT:
switch (c) {
case T_ANY:
{
static int plus_is_star_flag = 0;
if (*++re_exp == '*') {
re_exp++;
*mp = RE_u();
return prev = T_U;
} else if (*re_exp == '+') {
if (plus_is_star_flag) {
re_exp++;
*mp = RE_u();
plus_is_star_flag = 0;
return prev = T_U;
} else {
plus_is_star_flag = 1;
re_exp--;
*mp = RE_any();
return prev = T_ANY;
}
} else {
*mp = RE_any();
prev = T_ANY;
}
}
break;
case T_SLASH:
re_exp++;
c = escape(&re_exp);
prev = do_str(c, &re_exp, mp);
break;
#ifndef NO_INTERVAL_EXPR
case T_LB:
case T_RB:
#endif
case T_CHAR:
c = *re_exp++;
prev = do_str(c, &re_exp, mp);
break;
case T_CLASS:
prev = do_class(&re_exp, mp);
break;
case T_START:
*mp = RE_start();
re_exp++;
prev = T_START;
break;
case T_END:
re_exp++;
*mp = RE_end();
return prev = T_END;
default:
RE_panic("bad switch in RE_lex: %d", c);
} /* T_CAT switch */
break;
#ifndef NO_INTERVAL_EXPR
case T_LB:
/* get interval expression numbers until closing T_RB */
prev = do_intervals(&re_exp);
return prev = T_RB;
case T_RB:
/* FALLTHRU */
#endif
default:
/* don't advance the pointer */
return prev = T_CAT;
}
/* check for end character */
if (*re_exp == '$') {
mp->start->s_type = (SType) (mp->start->s_type + END_ON);
re_exp++;
}
return prev;
}
/*
Collect a run of characters into a string machine.
If the run ends at *,+, or ?, then don't take the last
character unless the string has length one.
*/
static int
do_str(
int c, /* the first character */
char **pp, /* where to put the re_char pointer on exit */
MACHINE * mp) /* where to put the string machine */
{
register char *p; /* runs thru the input */
char *pt = 0; /* trails p by one */
char *str; /* collect it here */
register char *s; /* runs thru the output */
size_t len; /* length collected */
p = *pp;
s = str = RE_malloc(re_len);
*s++ = (char) c;
len = 1;
while ((1 + p - re_str) < (int) re_len) {
char *save;
c = char2token[(UChar) (*p)];
#ifndef NO_INTERVAL_EXPR
if (!repetitions_flag && (c == T_LB || c == T_RB)) {
c = T_CHAR;
}
#endif
switch (c) {
case T_CHAR:
pt = p;
*s++ = *p++;
break;
case T_SLASH:
pt = p;
save = p + 1; /* keep p in a register */
*s++ = (char) escape(&save);
p = save;
break;
default:
goto out;
}
len++;
}
out:
/* if len > 1 and we stopped on a ? + or * , need to back up */
if (len > 1 && (*p == '*' || *p == '+' || *p == '?'
#ifndef NO_INTERVAL_EXPR
|| (repetitions_flag == 1 && *p == L_CURL)
#endif
)) {
len--;
p = pt;
s--;
}
*s = 0;
*pp = p;
*mp = RE_str((char *) RE_realloc(str, len + 1), len);
return T_STR;
}
/*--------------------------------------------
BUILD A CHARACTER CLASS
*---------------------------*/
#define char_on(b,x) ((b)[(x)>>3] |= (UChar) ( 1 << ((x)&7) ))
static void
block_on(BV b, int x, int y)
/* caller makes sure x<=y and x>0 y>0 */
{
int lo = x >> 3;
int hi = y >> 3;
int r_lo = x & 7;
int r_hi = y & 7;
if (lo == hi) {
b[lo] |= (UChar) ((1 << (r_hi + 1)) - (1 << r_lo));
} else {
int i;
for (i = lo + 1; i < hi; i++)
b[i] = 0xff;
b[lo] |= (UChar) (0xff << r_lo);
b[hi] |= (UChar) (~(0xff << (r_hi + 1)));
}
}
#define CCLASS_DATA(name) { CCLASS_##name, #name, sizeof(#name) - 1 }
typedef enum {
CCLASS_NONE = 0,
CCLASS_alnum,
CCLASS_alpha,
CCLASS_blank,
CCLASS_cntrl,
CCLASS_digit,
CCLASS_graph,
CCLASS_lower,
CCLASS_print,
CCLASS_punct,
CCLASS_space,
CCLASS_upper,
CCLASS_xdigit
} CCLASS_ENUM;
#ifndef isblank
#define isblank(c) ((c) == ' ' || (c) == '\t')
#endif
static CCLASS *
lookup_cclass(char **start)
{
static CCLASS *cclass_data[CCLASS_xdigit];
static const struct {
CCLASS_ENUM code;
const char name[8];
unsigned size;
} cclass_table[] = {
CCLASS_DATA(alnum),
CCLASS_DATA(alpha),
CCLASS_DATA(blank),
CCLASS_DATA(cntrl),
CCLASS_DATA(digit),
CCLASS_DATA(graph),
CCLASS_DATA(lower),
CCLASS_DATA(print),
CCLASS_DATA(punct),
CCLASS_DATA(space),
CCLASS_DATA(upper),
CCLASS_DATA(xdigit),
};
CCLASS *result = 0;
CCLASS_ENUM code = CCLASS_NONE;
const char *name;
char *colon;
size_t size;
size_t item;
#ifdef NO_LEAKS
if (start == 0) {
for (item = 0; item < (sizeof(cclass_data) /
sizeof(cclass_data[0])); ++item) {
if (cclass_data[item]) {
free(cclass_data[item]);
cclass_data[item] = 0;
}
}
return 0;
}
#endif
name = (*start += 2); /* point past "[:" */
colon = strchr(name, ':');
if (colon == 0 || colon[1] != ']') {
RE_error_trap(-ERR_3);
}
size = (size_t) (colon - *start); /* length of name */
if (size < 5 || size > 6) {
RE_error_trap(-ERR_3);
}
*start = colon + 2;
switch (name[0]) {
case 'a':
item = ((name[2] == 'n')
? CCLASS_alnum
: CCLASS_alpha);
break;
case 'b':
item = CCLASS_blank;
break;
case 'c':
item = CCLASS_cntrl;
break;
case 'd':
item = CCLASS_digit;
break;
case 'g':
item = CCLASS_graph;
break;
case 'l':
item = CCLASS_lower;
break;
case 'p':
item = ((name[1] == 'r')
? CCLASS_print
: CCLASS_punct);
break;
case 's':
item = CCLASS_space;
break;
case 'u':
item = CCLASS_upper;
break;
case 'x':
item = CCLASS_xdigit;
break;
default:
item = CCLASS_NONE;
break;
}
if (item-- != CCLASS_NONE &&
(size == cclass_table[item].size) &&
!strncmp(name, cclass_table[item].name, size)) {
code = cclass_table[item].code;
} else {
RE_error_trap(-ERR_3);
}
if ((result = cclass_data[item]) == 0) {
int ch = 0;
size_t have = 4;
size_t used = 0;
CCLASS *data = malloc(sizeof(CCLASS) * have);
int in_class = 0;
int first = -2;
int last = -2;
for (ch = 0; ch < 256; ++ch) {
switch (code) {
case CCLASS_NONE:
in_class = 0;
break;
case CCLASS_alnum:
in_class = isalnum(ch);
break;
case CCLASS_alpha:
in_class = isalpha(ch);
break;
case CCLASS_blank:
in_class = isblank(ch);
break;
case CCLASS_cntrl:
in_class = iscntrl(ch);
break;
case CCLASS_digit:
in_class = isdigit(ch);
break;
case CCLASS_graph:
in_class = isgraph(ch);
break;
case CCLASS_lower:
in_class = islower(ch);
break;
case CCLASS_print:
in_class = isprint(ch);
break;
case CCLASS_punct:
in_class = ispunct(ch);
break;
case CCLASS_space:
in_class = isspace(ch);
break;
case CCLASS_upper:
in_class = isupper(ch);
break;
case CCLASS_xdigit:
in_class = isxdigit(ch);
break;
}
if (in_class) {
if (first >= 0) {
last = ch;
} else {
first = last = ch;
}
} else if (first >= 0) {
if (used + 2 >= have) {
have *= 2;
data = realloc(data, sizeof(CCLASS) * have);
}
data[used].first = first;
data[used].last = last;
++used;
first = last = -2;
}
}
if (first >= 0) {
if (used + 2 >= have) {
have *= 2;
data = realloc(data, sizeof(CCLASS) * have);
}
data[used].first = first;
data[used].last = last;
++used;
}
data[used].first = -1;
cclass_data[item] = data;
result = data;
}
return result;
}
static CCLASS *
get_cclass(char *start, char **next)
{
CCLASS *result = 0;
if (start[0] == '['
&& start[1] == ':') {
result = lookup_cclass(&start);
if (next != 0) {
*next = start;
}
}
return result;
}
/*
* Check if we're pointing to a left square-bracket. If so, return nonzero
* if that is a literal one, not part of character class, etc.
*
* http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html#tag_09_03_05
*/
static int
literal_leftsq(char *start)
{
int result = 0;
if (start[0] == '[') {
if (get_cclass(start, 0) == 0)
result = 1;
}
return result;
}
/* build a BV for a character class.
*start points at the '['
on exit: *start points at the character after ']'
mp points at a machine that recognizes the class
*/
static int
do_class(char **start, MACHINE * mp)
{
char *p, *q;
BV *bvp;
int prevc;
int comp_flag;
int level;
CCLASS *cclass;
p = (*start) + 1;
/* []...] puts ] in a class
[^]..] negates a class with ]
*/
if (literal_leftsq(p) || p[0] == ']')
p++;
else if (p[0] == '^' && (literal_leftsq(p + 1) || p[1] == ']'))
p += 2;
/* XXX. Does not handle collating symbols or equivalence
* class expressions. See also collect_RE(). */
for (level = 0, q = p;; ++q) {
if (*q == '[' && q[1] == ':') {
if (++level > 1)
RE_error_trap(-ERR_3);
} else if (*q == ']') {
if (level == 0)
break;
if (q[-1] != ':')
RE_error_trap(-ERR_3);
--level;
} else if (*q == '\\') {
++q;
}
if (*q == '\0' && q == (re_str + re_len - 1)) {
/* no closing bracket */
RE_error_trap(-ERR_3);
}
}
/* q now pts at the back of the class */
p = *start + 1;
*start = q + 1;
bvp = (BV *) RE_malloc(sizeof(BV));
memset(bvp, 0, sizeof(BV));
if (*p == '^') {
comp_flag = 1;
p++;
} else
comp_flag = 0;
prevc = -1; /* indicates - cannot be part of a range */
while (p < q) {
switch (*p) {
case '\\':
++p;
prevc = escape(&p);
char_on(*bvp, prevc);
break;
case '[':
if ((cclass = get_cclass(p, &p)) != 0) {
while (cclass->first >= 0) {
block_on(*bvp, cclass->first, cclass->last);
++cclass;
}
} else {
prevc = (UChar) * p++;
char_on(*bvp, prevc);
}
break;
case '-':
if (prevc == -1 || p + 1 == q) {
prevc = '-';
char_on(*bvp, '-');
p++;
} else {
int c;
char *mark = ++p;
if (*p != '\\')
c = (UChar) * p++;
else {
++p;
c = escape(&p);
}
if (prevc <= c) {
block_on(*bvp, prevc, c);
prevc = -1;
} else { /* back up */
p = mark;
prevc = '-';
char_on(*bvp, '-');
}
}
break;
default:
prevc = (UChar) * p++;
char_on(*bvp, prevc);
break;
}
}
if (comp_flag) {
for (p = (char *) bvp; p < (char *) bvp + sizeof(BV); p++) {
*p = (char) (~*p);
}
}
*mp = RE_class(store_bvp(bvp));
return T_CLASS;
}
/* storage for bit vectors so they can be reused ,
stored in an unsorted linear array
the array grows as needed
*/
#define BV_GROWTH 6
static BV **bv_base, **bv_limit;
static BV **bv_next; /* next empty slot in the array */
static BV *
store_bvp(BV * bvp)
{
register BV **p;
unsigned t;
if (bv_next == bv_limit) {
/* need to grow */
if (!bv_base) {
/* first growth */
t = 0;
bv_base = (BV **) RE_malloc(BV_GROWTH * sizeof(BV *));
} else {
t = (unsigned) (bv_next - bv_base);
bv_base = (BV **) RE_realloc(bv_base,
(t + BV_GROWTH) * sizeof(BV *));
}
bv_next = bv_base + t;
bv_limit = bv_next + BV_GROWTH;
}
/* put bvp in bv_next as a sentinel */
*bv_next = bvp;
p = bv_base;
while (memcmp(*p, bvp, sizeof(BV)))
p++;
if (p == bv_next) {
/* it is new */
bv_next++;
} else {
/* we already have it */
RE_free(bvp);
}
return *p;
}
/* ---------- convert escape sequences -------------*/
#define isoctal(x) ((x)>='0'&&(x)<='7')
#define NOT_HEX 16
static const char hex_val['f' - 'A' + 1] =
{
10, 11, 12, 13, 14, 15, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
10, 11, 12, 13, 14, 15};
/* interpret 1 character as hex */
static int
ctohex(int c)
{
int t;
if (isdigit((UChar) c))
return c - '0';
if (isxdigit((UChar) c) && (t = hex_val[c - 'A']))
return t;
return NOT_HEX;
}
/*
* Return the char and move the pointer forward.
* On entry *s -> at the character after the slash.
*/
static int
escape(char **start_p)
{
register char *p = *start_p;
register unsigned x;
unsigned xx;
switch (*p) {
case 'n':
*start_p = p + 1;
return '\n';
case 't':
*start_p = p + 1;
return '\t';
case 'f':
*start_p = p + 1;
return '\f';
case 'b':
*start_p = p + 1;
return '\b';
case 'r':
*start_p = p + 1;
return '\r';
case 'a':
*start_p = p + 1;
return '\07';
case 'v':
*start_p = p + 1;
return '\013';
case '\0':
return '\\';
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
x = (unsigned) (*p++ - '0');
if (isoctal(*p)) {
x = (x << 3) + (unsigned) (*p++ - '0');
if (isoctal(*p))
x = (x << 3) + (unsigned) (*p++ - '0');
}
*start_p = p;
return (int) (x & 0xff);
case 'x':
++p;
if ((x = (unsigned) ctohex(*p)) == NOT_HEX) {
*start_p = p;
return 'x';
}
/* look for another hex digit */
if ((xx = (unsigned) ctohex(*++p)) != NOT_HEX) {
x = (x << 4) + xx;
p++;
}
*start_p = p;
return (int) x;
default:
/* anything else \c -> c */
*start_p = p + 1;
return (UChar) * p;
}
}