diff --git a/lutf8lib.c b/lutf8lib.c index be016135..df49c901 100644 --- a/lutf8lib.c +++ b/lutf8lib.c @@ -215,9 +215,10 @@ static int byteoffset (lua_State *L) { } lua_pushinteger(L, posi + 1); /* initial position */ if ((s[posi] & 0x80) != 0) { /* multi-byte character? */ - do { - posi++; - } while (iscontp(s + posi + 1)); /* skip to final byte */ + if (iscont(s[posi])) + return luaL_error(L, "initial position is a continuation byte"); + while (iscontp(s + posi + 1)) + posi++; /* skip to last continuation byte */ } /* else one-byte character: final position is the initial one */ lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */ diff --git a/testes/utf8.lua b/testes/utf8.lua index 143c6d34..028995a4 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua @@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1) checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) checkerror("continuation byte", utf8.offset, "𦧺", 1, 2) checkerror("continuation byte", utf8.offset, "\x80", 1) +checkerror("continuation byte", utf8.offset, "\x9c", -1) -- error in indices for len checkerror("out of bounds", utf8.len, "abc", 0, 2) checkerror("out of bounds", utf8.len, "abc", 1, 4) +do -- missing continuation bytes + -- get what is available + local p, e = utf8.offset("\xE0", 1) + assert(p == 1 and e == 1) + local p, e = utf8.offset("\xE0\x9e", -1) + assert(p == 1 and e == 2) +end + local s = "hello World" local t = {string.byte(s, 1, -1)}