sbase/libutil/unescape.c
Michael Forney 948e516190 libutil/unescape: Stop octal escape at 3 digits
unescape() is used by several tools, in particular printf(1) and
tr(1), which should stop the octal escape at a maximum of 3 digits:

printf(1)
> In addition to the escape sequences shown in XBD 5. File Format
> Notation ('\\', '\a', '\b', '\f', '\n', '\r', '\t', '\v'), "\ddd",
> where ddd is a one, two, or three-digit octal number, shall be
> written as a byte with the numeric value specified by the octal
> number.

tr(1)
> An octal sequence shall consist of a <backslash> followed by the
> longest sequence of one, two, or three-octal-digit characters.

Previously, the maximum was set to 4 (possibly a typo?), which meant
that printf '\0123' printed `S` instead of `<newline>3`.

To check that this doesn't break any other tools using unescape:

- cut: used for -d parameter, escapes are non-standard
- join: used for -t parameter, escapes are non-standard
- nl: used for -s parameter, escapes are non-standard
- paste: used for -d parameter, POSIX specifies \n, \t, \\, and \0,
  \0 followed by a digit is unspecified
- sort: used for -t parameter, escapes are non-standard
2025-04-23 21:10:51 +02:00

59 lines
1.1 KiB
C

/* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <string.h>
#include "../util.h"
#define is_odigit(c) ('0' <= c && c <= '7')
size_t
unescape(char *s)
{
static const char escapes[256] = {
['"'] = '"',
['\''] = '\'',
['\\'] = '\\',
['a'] = '\a',
['b'] = '\b',
['E'] = 033,
['e'] = 033,
['f'] = '\f',
['n'] = '\n',
['r'] = '\r',
['t'] = '\t',
['v'] = '\v'
};
size_t m, q;
char *r, *w;
for (r = w = s; *r;) {
if (*r != '\\') {
*w++ = *r++;
continue;
}
r++;
if (!*r) {
eprintf("null escape sequence\n");
} else if (escapes[(unsigned char)*r]) {
*w++ = escapes[(unsigned char)*r++];
} else if (is_odigit(*r)) {
for (q = 0, m = 3; m && is_odigit(*r); m--, r++)
q = q * 8 + (*r - '0');
*w++ = MIN(q, 255);
} else if (*r == 'x' && isxdigit(r[1])) {
r++;
for (q = 0, m = 2; m && isxdigit(*r); m--, r++)
if (isdigit(*r))
q = q * 16 + (*r - '0');
else
q = q * 16 + (tolower(*r) - 'a' + 10);
*w++ = q;
} else {
eprintf("invalid escape sequence '\\%c'\n", *r);
}
}
*w = '\0';
return w - s;
}