OP_SUBSTR_LEFT: GH#22914 - multiple pointers to replacement OP

The recent initial commit for OP_SUBSTR_LEFT failed to account for
there being multiple paths from a non-trivial LENGTH to the ""
replacement CONST OP. This could result in the replacement SV
being erroneously pushed to the stack, causing `pp_substr_left`
to try to operate on the wrong SV.

This commit nulls out the replacement OP, so that even if it
is encountered, no erroneous SV is pushed. Contrary to the
comment in the original commit, this actually does not break
B::Deparse.

Thanks to @mauke for figuring this out and preparing a patch
before I'd even opened my browser.
This commit is contained in:
Richard Leach 2025-01-15 22:41:05 +00:00
parent 7a1c156a29
commit b1397c41ce
3 changed files with 22 additions and 10 deletions

17
peep.c
View File

@ -3869,7 +3869,7 @@ Perl_rpeep(pTHX_ OP *o)
break;
case OP_SUBSTR: {
OP *expr, *offs, *len;
OP *expr, *offs, *len, *repl = NULL;
/* Specialize substr($x, 0, $y) and substr($x,0,$y,"") */
/* Does this substr have 3-4 args and amiable flags? */
if (
@ -3897,7 +3897,7 @@ Perl_rpeep(pTHX_ OP *o)
if (cMAXARG3x(o) == 4) {/* replacement */
/* Is the replacement string CONST ""? */
OP *repl = OpSIBLING(len);
repl = OpSIBLING(len);
if (repl->op_type != OP_CONST)
break;
SV *repl_sv = cSVOPx_sv(repl);
@ -3908,12 +3908,10 @@ Perl_rpeep(pTHX_ OP *o)
break;
}
/* It's on! */
/* Take out the static LENGTH & REPLACMENT OPs */
/* Take out the static LENGTH OP. */
/* (The finalizer does not seem to change op_next here) */
expr->op_next = offs->op_next;
o->op_private = cMAXARG3x(o);
if (cMAXARG3x(o) == 4)
len->op_next = o;
/* We have a problem if padrange pushes the expr OP for us,
* then jumps straight to the offs CONST OP. For example:
@ -3924,7 +3922,14 @@ Perl_rpeep(pTHX_ OP *o)
* B::Deparse. :/ */
op_null(offs);
/* repl status unchanged because it makes Deparsing easier. */
/* There can be multiple pointers to repl, see GH #22914.
* substr $x, 0, $y ? 2 : 3, "";
* So instead of rewriting all of len, null out repl. */
if (repl) {
op_null(repl);
/* We can still rewrite the simple len case though.*/
len->op_next = o;
}
/* Upgrade the SUBSTR to a SUBSTR_LEFT */
OpTYPE_set(o, OP_SUBSTR_LEFT);

View File

@ -104,5 +104,12 @@ $str = "\x00\x01\x02\x03\x04\x05";
$result = substr($str, 0, 3, "");
is($result, "\x00\x01\x02", 'hex EXPR: returns correct characters');
is($str, "\x03\x04\x05", 'hex EXPR: retains correct characters');
# GH #22914. LEN has more than one pointer to REPL.
$str = "perl";
# Hopefully $INC[0] ne '/dev/random' is a reasonable test assumption...
# (We need a condition that no future clever optimiser will strip)
$result = substr($str, 0, $INC[0] eq '/dev/random' ? 2: 3, '');
is($result, 'per', 'GH#22914: non-trivial LEN returns correct characters');
is($str, 'l', 'GH#22914: non-trivial LEN retains correct characters');
done_testing();

View File

@ -1034,7 +1034,7 @@ test_opcount(0, "substr with const zero offset and '' repl (void)",
{
substr => 0,
substr_left => 1,
const => 2,
const => 1,
});
test_opcount(0, "substr with const zero offset and '' repl (lexical)",
@ -1042,7 +1042,7 @@ test_opcount(0, "substr with const zero offset and '' repl (lexical)",
{
substr => 0,
substr_left => 1,
const => 2,
const => 1,
padsv => 3,
sassign => 1
});
@ -1052,7 +1052,7 @@ test_opcount(0, "substr with const zero offset and '' repl (lexical TARGMY)",
{
substr => 0,
substr_left => 1,
const => 2,
const => 1,
padsv => 3,
padsv_store => 0,
sassign => 0
@ -1063,7 +1063,7 @@ test_opcount(0, "substr with const zero offset and '' repl (gv)",
{
substr => 0,
substr_left => 1,
const => 2,
const => 1,
gvsv => 1,
sassign => 1
});