mirror of
https://github.com/lua/lua.git
synced 2026-01-26 07:37:58 +00:00
Correction in utf8.offset
Wrong utf-8 character may have no continuation bytes.
This commit is contained in:
parent
60b6599e83
commit
ccb8b307f1
@ -215,9 +215,10 @@ static int byteoffset (lua_State *L) {
|
||||
}
|
||||
lua_pushinteger(L, posi + 1); /* initial position */
|
||||
if ((s[posi] & 0x80) != 0) { /* multi-byte character? */
|
||||
do {
|
||||
posi++;
|
||||
} while (iscontp(s + posi + 1)); /* skip to final byte */
|
||||
if (iscont(s[posi]))
|
||||
return luaL_error(L, "initial position is a continuation byte");
|
||||
while (iscontp(s + posi + 1))
|
||||
posi++; /* skip to last continuation byte */
|
||||
}
|
||||
/* else one-byte character: final position is the initial one */
|
||||
lua_pushinteger(L, posi + 1); /* 'posi' now is the final position */
|
||||
|
||||
@ -152,11 +152,20 @@ checkerror("position out of bounds", utf8.offset, "", 1, -1)
|
||||
checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
|
||||
checkerror("continuation byte", utf8.offset, "𦧺", 1, 2)
|
||||
checkerror("continuation byte", utf8.offset, "\x80", 1)
|
||||
checkerror("continuation byte", utf8.offset, "\x9c", -1)
|
||||
|
||||
-- error in indices for len
|
||||
checkerror("out of bounds", utf8.len, "abc", 0, 2)
|
||||
checkerror("out of bounds", utf8.len, "abc", 1, 4)
|
||||
|
||||
do -- missing continuation bytes
|
||||
-- get what is available
|
||||
local p, e = utf8.offset("\xE0", 1)
|
||||
assert(p == 1 and e == 1)
|
||||
local p, e = utf8.offset("\xE0\x9e", -1)
|
||||
assert(p == 1 and e == 2)
|
||||
end
|
||||
|
||||
|
||||
local s = "hello World"
|
||||
local t = {string.byte(s, 1, -1)}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user