mirror of
https://codeberg.org/landley/toybox.git
synced 2026-01-26 14:13:25 +00:00
In wc, replace mbrtowc() with new utf8towc() which doesn't have a context struct
or care about locale.
This commit is contained in:
parent
bebf14cc29
commit
67ddade337
@ -257,6 +257,7 @@ void linestack_addstack(struct linestack **lls, struct linestack *throw,
|
||||
void linestack_insert(struct linestack **lls, long pos, char *line, long len);
|
||||
void linestack_append(struct linestack **lls, char *line);
|
||||
struct linestack *linestack_load(char *name);
|
||||
int utf8towc(wchar_t *wc, char *str, unsigned len);
|
||||
int crunch_escape(FILE *out, int cols, int wc);
|
||||
int crunch_rev_escape(FILE *out, int cols, int wc);
|
||||
int crunch_str(char **str, int width, FILE *out, char *escmore,
|
||||
|
||||
@ -80,6 +80,37 @@ struct linestack *linestack_load(char *name)
|
||||
return ls;
|
||||
}
|
||||
|
||||
// Convert utf8 sequence to a unicode wide character
|
||||
int utf8towc(wchar_t *wc, char *str, unsigned len)
|
||||
{
|
||||
unsigned result, mask, first;
|
||||
char *s, c;
|
||||
|
||||
// fast path ASCII
|
||||
if (len && *str<128) return !!(*wc = *str);
|
||||
|
||||
result = first = *(s = str++);
|
||||
for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) {
|
||||
if (!--len) return -2;
|
||||
c = *(str++);
|
||||
if ((c&0xc0) != 0x80) return -1;
|
||||
result = (result<<6)|(c&0x3f);
|
||||
}
|
||||
result &= (1<<mask)-1;
|
||||
c = str-s;
|
||||
if (mask==6 || mask>21) return -1;
|
||||
|
||||
// Avoid overlong encodings
|
||||
if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2])
|
||||
return -1;
|
||||
|
||||
// Limit unicode so it can't encode anything UTF-16 can't.
|
||||
if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1;
|
||||
*wc = result;
|
||||
|
||||
return str-s;
|
||||
}
|
||||
|
||||
// Show width many columns, negative means from right edge, out=0 just measure
|
||||
// if escout, send it unprintable chars, otherwise pass through raw data.
|
||||
// Returns width in columns, moves *str to end of data consumed.
|
||||
|
||||
@ -80,6 +80,7 @@ static void do_wc(int fd, char *name)
|
||||
|
||||
// next wide size, don't count invalid, fetch more data if necessary
|
||||
clen = mbrtowc(&wchar, toybuf+pos, len-pos, 0);
|
||||
clen = utf8towc(&wchar, toybuf+pos, len-pos);
|
||||
if (clen == -1) continue;
|
||||
if (clen == -2 && !done) break;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user