Randomness added to table length computation

A bad actor could fill only a few entries in a table (power of twos in
decreasing order, see tests) and produce a small table with a huge
length. If your program builds a table with external data and iterates
over its length, this behavior could be an issue.
This commit is contained in:
Roberto Ierusalimschy 2025-07-18 16:18:30 -03:00
parent ccb8b307f1
commit 303f415559
6 changed files with 48 additions and 23 deletions

2
lapi.c
View File

@ -440,7 +440,7 @@ LUA_API lua_Unsigned lua_rawlen (lua_State *L, int idx) {
case LUA_VSHRSTR: return cast(lua_Unsigned, tsvalue(o)->shrlen);
case LUA_VLNGSTR: return cast(lua_Unsigned, tsvalue(o)->u.lnglen);
case LUA_VUSERDATA: return cast(lua_Unsigned, uvalue(o)->len);
case LUA_VTABLE: return luaH_getn(hvalue(o));
case LUA_VTABLE: return luaH_getn(L, hvalue(o));
default: return 0;
}
}

View File

@ -31,7 +31,8 @@
/*
** Computes ceil(log2(x))
** Computes ceil(log2(x)), which is the smallest integer n such that
** x <= (1 << n).
*/
lu_byte luaO_ceillog2 (unsigned int x) {
static const lu_byte log_2[256] = { /* log_2[i - 1] = ceil(log2(i)) */

View File

@ -1220,24 +1220,36 @@ void luaH_setint (lua_State *L, Table *t, lua_Integer key, TValue *value) {
/*
** Try to find a boundary in the hash part of table 't'. From the
** caller, we know that 'j' is zero or present and that 'j + 1' is
** present. We want to find a larger key that is absent from the
** table, so that we can do a binary search between the two keys to
** find a boundary. We keep doubling 'j' until we get an absent index.
** If the doubling would overflow, we try LUA_MAXINTEGER. If it is
** absent, we are ready for the binary search. ('j', being max integer,
** is larger or equal to 'i', but it cannot be equal because it is
** absent while 'i' is present; so 'j > i'.) Otherwise, 'j' is a
** boundary. ('j + 1' cannot be a present integer key because it is
** not a valid integer in Lua.)
** caller, we know that 'asize + 1' is present. We want to find a larger
** key that is absent from the table, so that we can do a binary search
** between the two keys to find a boundary. We keep doubling 'j' until
** we get an absent index. If the doubling would overflow, we try
** LUA_MAXINTEGER. If it is absent, we are ready for the binary search.
** ('j', being max integer, is larger or equal to 'i', but it cannot be
** equal because it is absent while 'i' is present.) Otherwise, 'j' is a
** boundary. ('j + 1' cannot be a present integer key because it is not
** a valid integer in Lua.)
** About 'rnd': If we used a fixed algorithm, a bad actor could fill
** a table with only the keys that would be probed, in such a way that
** a small table could result in a huge length. To avoid that, we use
** the state's seed as a source of randomness. For the first probe,
** we "randomly double" 'i' by adding to it a random number roughly its
** width.
*/
static lua_Unsigned hash_search (Table *t, lua_Unsigned j) {
lua_Unsigned i;
if (j == 0) j++; /* the caller ensures 'j + 1' is present */
do {
static lua_Unsigned hash_search (lua_State *L, Table *t, unsigned asize) {
lua_Unsigned i = asize + 1; /* caller ensures t[i] is present */
unsigned rnd = G(L)->seed;
int n = (asize > 0) ? luaO_ceillog2(asize) : 0; /* width of 'asize' */
unsigned mask = (1u << n) - 1; /* 11...111 with the width of 'asize' */
unsigned incr = (rnd & mask) + 1; /* first increment (at least 1) */
lua_Unsigned j = (incr <= l_castS2U(LUA_MAXINTEGER) - i) ? i + incr : i + 1;
rnd >>= n; /* used 'n' bits from 'rnd' */
while (!hashkeyisempty(t, j)) { /* repeat until an absent t[j] */
i = j; /* 'i' is a present index */
if (j <= l_castS2U(LUA_MAXINTEGER) / 2)
j *= 2;
if (j <= l_castS2U(LUA_MAXINTEGER)/2 - 1) {
j = j*2 + (rnd & 1); /* try again with 2j or 2j+1 */
rnd >>= 1;
}
else {
j = LUA_MAXINTEGER;
if (hashkeyisempty(t, j)) /* t[j] not present? */
@ -1245,7 +1257,7 @@ static lua_Unsigned hash_search (Table *t, lua_Unsigned j) {
else /* weird case */
return j; /* well, max integer is a boundary... */
}
} while (!hashkeyisempty(t, j)); /* repeat until an absent t[j] */
}
/* i < j && t[i] present && t[j] absent */
while (j - i > 1u) { /* do a binary search between them */
lua_Unsigned m = (i + j) / 2;
@ -1286,7 +1298,7 @@ static lua_Unsigned newhint (Table *t, unsigned hint) {
** If there is no array part, or its last element is non empty, the
** border may be in the hash part.
*/
lua_Unsigned luaH_getn (Table *t) {
lua_Unsigned luaH_getn (lua_State *L, Table *t) {
unsigned asize = t->asize;
if (asize > 0) { /* is there an array part? */
const unsigned maxvicinity = 4;
@ -1327,7 +1339,7 @@ lua_Unsigned luaH_getn (Table *t) {
if (isdummy(t) || hashkeyisempty(t, asize + 1))
return asize; /* 'asize + 1' is empty */
else /* 'asize + 1' is also non empty */
return hash_search(t, asize);
return hash_search(L, t, asize);
}

View File

@ -173,7 +173,7 @@ LUAI_FUNC void luaH_resizearray (lua_State *L, Table *t, unsigned nasize);
LUAI_FUNC lu_mem luaH_size (Table *t);
LUAI_FUNC void luaH_free (lua_State *L, Table *t);
LUAI_FUNC int luaH_next (lua_State *L, Table *t, StkId key);
LUAI_FUNC lua_Unsigned luaH_getn (Table *t);
LUAI_FUNC lua_Unsigned luaH_getn (lua_State *L, Table *t);
#if defined(LUA_DEBUG)

2
lvm.c
View File

@ -722,7 +722,7 @@ void luaV_objlen (lua_State *L, StkId ra, const TValue *rb) {
Table *h = hvalue(rb);
tm = fasttm(L, h->metatable, TM_LEN);
if (tm) break; /* metamethod? break switch to call it */
setivalue(s2v(ra), l_castU2S(luaH_getn(h))); /* else primitive len */
setivalue(s2v(ra), l_castU2S(luaH_getn(L, h))); /* else primitive len */
return;
}
case LUA_VSHRSTR: {

View File

@ -345,6 +345,18 @@ do
end
end
do print("testing attack on table length")
local t = {}
local lim = math.floor(math.log(math.maxinteger, 2)) - 1
for i = lim, 0, -1 do
t[2^i] = true
end
assert(t[1 << lim])
-- next loop should not take forever
for i = 1, #t do end
end
local nofind = {}