Short strings can be external, too

That complicates a little object equality (and therefore table access
for long strings), but the old behavior was somewhat weird. (Short
strings, a concept otherwise absent from the manual, could not be
external.)
This commit is contained in:
Roberto Ierusalimschy 2025-07-15 14:40:27 -03:00
parent c612685d4b
commit 60b6599e83
9 changed files with 168 additions and 120 deletions

View File

@ -345,8 +345,8 @@ static void *freelib (void *ud, void *ptr, size_t osize, size_t nsize) {
** Create a library string that, when deallocated, will unload 'plib'
*/
static void createlibstr (lua_State *L, void *plib) {
static const char dummy[] = /* common long body for all library strings */
"01234567890123456789012345678901234567890123456789";
/* common content for all library strings */
static const char dummy[] = "01234567890";
lua_pushexternalstring(L, dummy, sizeof(dummy) - 1, freelib, plib);
}

View File

@ -418,6 +418,7 @@ typedef struct TString {
#define strisshr(ts) ((ts)->shrlen >= 0)
#define isextstr(ts) (ttislngstring(ts) && tsvalue(ts)->shrlen != LSTRREG)
/*

View File

@ -39,14 +39,14 @@
/*
** equality for long strings
** generic equality for strings
*/
int luaS_eqlngstr (TString *a, TString *b) {
size_t len = a->u.lnglen;
lua_assert(a->tt == LUA_VLNGSTR && b->tt == LUA_VLNGSTR);
return (a == b) || /* same instance or... */
((len == b->u.lnglen) && /* equal length and ... */
(memcmp(getlngstr(a), getlngstr(b), len) == 0)); /* equal contents */
int luaS_eqstr (TString *a, TString *b) {
size_t len1, len2;
const char *s1 = getlstr(a, len1);
const char *s2 = getlstr(b, len2);
return ((len1 == len2) && /* equal length and ... */
(memcmp(s1, s2, len1) == 0)); /* equal contents */
}
@ -315,28 +315,9 @@ static void f_newext (lua_State *L, void *ud) {
}
static void f_pintern (lua_State *L, void *ud) {
struct NewExt *ne = cast(struct NewExt *, ud);
ne->ts = internshrstr(L, ne->s, ne->len);
}
TString *luaS_newextlstr (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud) {
struct NewExt ne;
if (len <= LUAI_MAXSHORTLEN) { /* short string? */
ne.s = s; ne.len = len;
if (!falloc)
f_pintern(L, &ne); /* just internalize string */
else {
TStatus status = luaD_rawrunprotected(L, f_pintern, &ne);
(*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */
if (status != LUA_OK) /* memory error? */
luaM_error(L); /* re-raise memory error */
}
return ne.ts;
}
/* "normal" case: long strings */
if (!falloc) {
ne.kind = LSTRFIX;
f_newext(L, &ne); /* just create header */
@ -357,3 +338,16 @@ TString *luaS_newextlstr (lua_State *L,
}
/*
** Normalize an external string: If it is short, internalize it.
*/
TString *luaS_normstr (lua_State *L, TString *ts) {
size_t len = ts->u.lnglen;
if (len > LUAI_MAXSHORTLEN)
return ts; /* long string; keep the original */
else {
const char *str = getlngstr(ts);
return internshrstr(L, str, len);
}
}

View File

@ -56,7 +56,7 @@
LUAI_FUNC unsigned luaS_hash (const char *str, size_t l, unsigned seed);
LUAI_FUNC unsigned luaS_hashlongstr (TString *ts);
LUAI_FUNC int luaS_eqlngstr (TString *a, TString *b);
LUAI_FUNC int luaS_eqstr (TString *a, TString *b);
LUAI_FUNC void luaS_resize (lua_State *L, int newsize);
LUAI_FUNC void luaS_clearcache (global_State *g);
LUAI_FUNC void luaS_init (lua_State *L);
@ -69,5 +69,6 @@ LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
const char *s, size_t len, lua_Alloc falloc, void *ud);
LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
LUAI_FUNC TString *luaS_normstr (lua_State *L, TString *ts);
#endif

View File

@ -234,41 +234,51 @@ l_sinline Node *mainpositionfromnode (const Table *t, Node *nd) {
** Check whether key 'k1' is equal to the key in node 'n2'. This
** equality is raw, so there are no metamethods. Floats with integer
** values have been normalized, so integers cannot be equal to
** floats. It is assumed that 'eqshrstr' is simply pointer equality, so
** that short strings are handled in the default case.
** A true 'deadok' means to accept dead keys as equal to their original
** values. All dead keys are compared in the default case, by pointer
** identity. (Only collectable objects can produce dead keys.) Note that
** dead long strings are also compared by identity.
** Once a key is dead, its corresponding value may be collected, and
** then another value can be created with the same address. If this
** other value is given to 'next', 'equalkey' will signal a false
** positive. In a regular traversal, this situation should never happen,
** as all keys given to 'next' came from the table itself, and therefore
** could not have been collected. Outside a regular traversal, we
** have garbage in, garbage out. What is relevant is that this false
** positive does not break anything. (In particular, 'next' will return
** some other valid item on the table or nil.)
** floats. It is assumed that 'eqshrstr' is simply pointer equality,
** so that short strings are handled in the default case. The flag
** 'deadok' means to accept dead keys as equal to their original values.
** (Only collectable objects can produce dead keys.) Note that dead
** long strings are also compared by identity. Once a key is dead,
** its corresponding value may be collected, and then another value
** can be created with the same address. If this other value is given
** to 'next', 'equalkey' will signal a false positive. In a regular
** traversal, this situation should never happen, as all keys given to
** 'next' came from the table itself, and therefore could not have been
** collected. Outside a regular traversal, we have garbage in, garbage
** out. What is relevant is that this false positive does not break
** anything. (In particular, 'next' will return some other valid item
** on the table or nil.)
*/
static int equalkey (const TValue *k1, const Node *n2, int deadok) {
if ((rawtt(k1) != keytt(n2)) && /* not the same variants? */
!(deadok && keyisdead(n2) && iscollectable(k1)))
return 0; /* cannot be same key */
switch (keytt(n2)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
return 1;
case LUA_VNUMINT:
return (ivalue(k1) == keyival(n2));
case LUA_VNUMFLT:
return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
case LUA_VLIGHTUSERDATA:
return pvalue(k1) == pvalueraw(keyval(n2));
case LUA_VLCF:
return fvalue(k1) == fvalueraw(keyval(n2));
case ctb(LUA_VLNGSTR):
return luaS_eqlngstr(tsvalue(k1), keystrval(n2));
default:
if (rawtt(k1) != keytt(n2)) { /* not the same variants? */
if (keyisshrstr(n2) && ttislngstring(k1)) {
/* an external string can be equal to a short-string key */
return luaS_eqstr(tsvalue(k1), keystrval(n2));
}
else if (deadok && keyisdead(n2) && iscollectable(k1)) {
/* a collectable value can be equal to a dead key */
return gcvalue(k1) == gcvalueraw(keyval(n2));
}
else
return 0; /* otherwise, different variants cannot be equal */
}
else { /* equal variants */
switch (keytt(n2)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
return 1;
case LUA_VNUMINT:
return (ivalue(k1) == keyival(n2));
case LUA_VNUMFLT:
return luai_numeq(fltvalue(k1), fltvalueraw(keyval(n2)));
case LUA_VLIGHTUSERDATA:
return pvalue(k1) == pvalueraw(keyval(n2));
case LUA_VLCF:
return fvalue(k1) == fvalueraw(keyval(n2));
case ctb(LUA_VLNGSTR):
return luaS_eqstr(tsvalue(k1), keystrval(n2));
default:
return gcvalue(k1) == gcvalueraw(keyval(n2));
}
}
}
@ -1158,6 +1168,14 @@ void luaH_finishset (lua_State *L, Table *t, const TValue *key,
else if (l_unlikely(luai_numisnan(f)))
luaG_runerror(L, "table index is NaN");
}
else if (isextstr(key)) { /* external string? */
/* If string is short, must internalize it to be used as table key */
TString *ts = luaS_normstr(L, tsvalue(key));
setsvalue2s(L, L->top.p++, ts); /* anchor 'ts' (EXTRA_STACK) */
luaH_newkey(L, t, s2v(L->top.p - 1), value);
L->top.p--;
return;
}
luaH_newkey(L, t, key, value);
}
else if (hres > 0) { /* regular Node? */

View File

@ -1066,8 +1066,12 @@ static int tracegc (lua_State *L) {
static int hash_query (lua_State *L) {
if (lua_isnone(L, 2)) {
TString *ts;
luaL_argcheck(L, lua_type(L, 1) == LUA_TSTRING, 1, "string expected");
lua_pushinteger(L, cast_int(tsvalue(obj_at(L, 1))->hash));
ts = tsvalue(obj_at(L, 1));
if (ts->tt == LUA_VLNGSTR)
luaS_hashlongstr(ts); /* make sure long string has a hash */
lua_pushinteger(L, cast_int(ts->hash));
}
else {
TValue *o = obj_at(L, 1);

106
lvm.c
View File

@ -573,52 +573,74 @@ int luaV_lessequal (lua_State *L, const TValue *l, const TValue *r) {
*/
int luaV_equalobj (lua_State *L, const TValue *t1, const TValue *t2) {
const TValue *tm;
if (ttypetag(t1) != ttypetag(t2)) { /* not the same variant? */
if (ttype(t1) != ttype(t2) || ttype(t1) != LUA_TNUMBER)
return 0; /* only numbers can be equal with different variants */
else { /* two numbers with different variants */
/* One of them is an integer. If the other does not have an
integer value, they cannot be equal; otherwise, compare their
integer values. */
lua_Integer i1, i2;
return (luaV_tointegerns(t1, &i1, F2Ieq) &&
luaV_tointegerns(t2, &i2, F2Ieq) &&
i1 == i2);
if (ttype(t1) != ttype(t2)) /* not the same type? */
return 0;
else if (ttypetag(t1) != ttypetag(t2)) {
switch (ttypetag(t1)) {
case LUA_VNUMINT: { /* integer == float? */
/* integer and float can only be equal if float has an integer
value equal to the integer */
lua_Integer i2;
return (luaV_flttointeger(fltvalue(t2), &i2, F2Ieq) &&
ivalue(t1) == i2);
}
case LUA_VNUMFLT: { /* float == integer? */
lua_Integer i1; /* see comment in previous case */
return (luaV_flttointeger(fltvalue(t1), &i1, F2Ieq) &&
i1 == ivalue(t2));
}
case LUA_VSHRSTR: case LUA_VLNGSTR: {
/* compare two strings with different variants: they can be
equal when one string is a short string and the other is
an external string */
return luaS_eqstr(tsvalue(t1), tsvalue(t2));
}
default:
/* only numbers (integer/float) and strings (long/short) can have
equal values with different variants */
return 0;
}
}
/* values have same type and same variant */
switch (ttypetag(t1)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE: return 1;
case LUA_VNUMINT: return (ivalue(t1) == ivalue(t2));
case LUA_VNUMFLT: return luai_numeq(fltvalue(t1), fltvalue(t2));
case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
case LUA_VLCF: return fvalue(t1) == fvalue(t2);
case LUA_VSHRSTR: return eqshrstr(tsvalue(t1), tsvalue(t2));
case LUA_VLNGSTR: return luaS_eqlngstr(tsvalue(t1), tsvalue(t2));
case LUA_VUSERDATA: {
if (uvalue(t1) == uvalue(t2)) return 1;
else if (L == NULL) return 0;
tm = fasttm(L, uvalue(t1)->metatable, TM_EQ);
if (tm == NULL)
tm = fasttm(L, uvalue(t2)->metatable, TM_EQ);
break; /* will try TM */
else { /* equal variants */
switch (ttypetag(t1)) {
case LUA_VNIL: case LUA_VFALSE: case LUA_VTRUE:
return 1;
case LUA_VNUMINT:
return (ivalue(t1) == ivalue(t2));
case LUA_VNUMFLT:
return (fltvalue(t1) == fltvalue(t2));
case LUA_VLIGHTUSERDATA: return pvalue(t1) == pvalue(t2);
case LUA_VSHRSTR:
return eqshrstr(tsvalue(t1), tsvalue(t2));
case LUA_VLNGSTR:
return luaS_eqstr(tsvalue(t1), tsvalue(t2));
case LUA_VUSERDATA: {
if (uvalue(t1) == uvalue(t2)) return 1;
else if (L == NULL) return 0;
tm = fasttm(L, uvalue(t1)->metatable, TM_EQ);
if (tm == NULL)
tm = fasttm(L, uvalue(t2)->metatable, TM_EQ);
break; /* will try TM */
}
case LUA_VTABLE: {
if (hvalue(t1) == hvalue(t2)) return 1;
else if (L == NULL) return 0;
tm = fasttm(L, hvalue(t1)->metatable, TM_EQ);
if (tm == NULL)
tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
break; /* will try TM */
}
case LUA_VLCF:
return (fvalue(t1) == fvalue(t2));
default: /* functions and threads */
return (gcvalue(t1) == gcvalue(t2));
}
case LUA_VTABLE: {
if (hvalue(t1) == hvalue(t2)) return 1;
else if (L == NULL) return 0;
tm = fasttm(L, hvalue(t1)->metatable, TM_EQ);
if (tm == NULL)
tm = fasttm(L, hvalue(t2)->metatable, TM_EQ);
break; /* will try TM */
if (tm == NULL) /* no TM? */
return 0; /* objects are different */
else {
int tag = luaT_callTMres(L, tm, t1, t2, L->top.p); /* call TM */
return !tagisfalse(tag);
}
default:
return gcvalue(t1) == gcvalue(t2);
}
if (tm == NULL) /* no TM? */
return 0; /* objects are different */
else {
int tag = luaT_callTMres(L, tm, t1, t2, L->top.p); /* call TM */
return !tagisfalse(tag);
}
}

View File

@ -2419,8 +2419,8 @@ for instance @T{foo(e1, e2, e3)} @see{functioncall}.}
@item{A multiple assignment,
for instance @T{a , b, c = e1, e2, e3} @see{assignment}.}
@item{A local declaration,
for instance @T{local a , b, c = e1, e2, e3} @see{localvar}.}
@item{A local or global declaration,
which is a special case of multiple assignment.}
@item{The initial values in a generic @rw{for} loop,
for instance @T{for k in e1, e2, e3 do ... end} @see{for}.}
@ -2431,8 +2431,7 @@ the list of values from the list of expressions
must be @emph{adjusted} to a specific length:
the number of parameters in a call to a non-variadic function
@see{func-def},
the number of variables in a multiple assignment or
a local declaration,
the number of variables in a multiple assignment or a declaration,
and exactly four values for a generic @rw{for} loop.
The @def{adjustment} follows these rules:
If there are more values than needed,
@ -4075,11 +4074,6 @@ the string @id{s} as the block,
the length plus one (to account for the ending zero) as the old size,
and 0 as the new size.
Lua always @x{internalizes} strings with lengths up to 40 characters.
So, for strings in that range,
this function will immediately internalize the string
and call @id{falloc} to free the buffer.
Even when using an external buffer,
Lua still has to allocate a header for the string.
In case of a memory-allocation error,

View File

@ -300,12 +300,6 @@ else
assert(_ENV.x == "lib2-v2" and _ENV.y == DC"lib2-v2")
assert(lib2.id("x") == true) -- a different "id" implementation
for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
local str = string.rep("a", len)
local str1 = lib2.newstr(str)
assert(str == str1)
end
-- test C submodules
local fs, ext = require"lib1.sub"
assert(_ENV.x == "lib1.sub" and _ENV.y == DC"lib1")
@ -314,11 +308,11 @@ else
_ENV.x, _ENV.y = nil
end
_ENV = _G
-- testing preload
do
local p = package
package = {}
@ -337,6 +331,26 @@ do
assert(type(package.path) == "string")
end
do print("testing external strings")
package.cpath = DC"?"
local lib2 = require"lib2-v2"
local t = {}
for _, len in ipairs{0, 10, 39, 40, 41, 1000} do
local str = string.rep("a", len)
local str1 = lib2.newstr(str)
assert(str == str1)
assert(not T or T.hash(str) == T.hash(str1))
t[str1] = 20; assert(t[str] == 20 and t[str1] == 20)
t[str] = 10; assert(t[str1] == 10)
local tt = {[str1] = str1}
assert(next(tt) == str1 and next(tt, str1) == nil)
assert(tt[str] == str)
local str2 = lib2.newstr(str1)
assert(str == str2 and t[str2] == 10 and tt[str2] == str)
end
end
print('+')
end --]