Optimise substr assignment in void context

In void context we can optimise

    substr($foo, $bar, $baz) = $replacement;

to something like

    substr($foo, $bar, $baz, $replacement);

except that the execution order must be preserved.  So what we actu-
ally do is

    substr($replacement, $foo, $bar, $baz);

with a flag to indicate that the replacement comes first.  This means
we can also optimise assignment to two-argument substr the same way.

Although optimisations are not supposed to change behaviour,
this one does.

• It stops substr assignment from calling get-magic twice, which means
  the optimisation makes things less buggy than usual.
• It causes the uninitialized warning (for an undefined first argu-
  ment) to mention the substr operator, as it did before the previous
  commit, rather than the assignment operator.  I think that sort of
  detail is minor enough.

I had to make the warning about clobbering references apply whenever
substr does a replacement, and not only when used as an lvalue.  So
four-argument substr now emits that warning.  I would consider that a
bug fix, too.

Also, if the numeric arguments to four-argument substr and the
replacement string are undefined, the order of the uninitialized warn-
ings is slightly different, but is consistent regardless of whether
the optimisation is in effect.

I believe this will make 95% of substr assignments run faster.  So
there is less incentive to use what I consider the less readable form
(the four-argument form, which is not self-documenting).

Since I like naïve benchmarks, here are Before and After:

$ time ./miniperl -le 'do{$x="hello"; substr ($x,0,0) = 34;0}for 1..1000000'

real	0m2.391s
user	0m2.381s
sys	0m0.005s
$ time ./miniperl -le 'do{$x="hello"; substr ($x,0,0) = 34;0}for 1..1000000'

real	0m0.936s
user	0m0.927s
sys	0m0.005s
This commit is contained in:
Father Chrysostomos 2011-11-25 23:04:22 -08:00
parent a74fb2cdc8
commit 24fcb59fcc
8 changed files with 61 additions and 18 deletions

View File

@ -31,7 +31,7 @@ BEGIN {
# be to fake up a dummy constant that will never actually be true.
foreach (qw(OPpSORT_INPLACE OPpSORT_DESCEND OPpITER_REVERSED OPpCONST_NOVER
OPpPAD_STATE PMf_SKIPWHITE RXf_SKIPWHITE
CVf_LOCKED OPpREVERSE_INPLACE
CVf_LOCKED OPpREVERSE_INPLACE OPpSUBSTR_REPL_FIRST
PMf_NONDESTRUCT OPpCONST_ARYBASE OPpEVAL_BYTES)) {
eval { import B $_ };
no strict 'refs';
@ -2334,10 +2334,10 @@ sub pp_dorassign { logassignop(@_, "//=") }
sub listop {
my $self = shift;
my($op, $cx, $name) = @_;
my($op, $cx, $name, $kid) = @_;
my(@exprs);
my $parens = ($cx >= 5) || $self->{'parens'};
my $kid = $op->first->sibling;
$kid ||= $op->first->sibling;
return $self->keyword($name) if null $kid;
my $first;
$name = "socketpair" if $name eq "sockpair";
@ -2377,7 +2377,16 @@ sub listop {
sub pp_bless { listop(@_, "bless") }
sub pp_atan2 { maybe_targmy(@_, \&listop, "atan2") }
sub pp_substr { maybe_local(@_, listop(@_, "substr")) }
sub pp_substr {
my ($self,$op,$cx) = @_;
if ($op->private & OPpSUBSTR_REPL_FIRST) {
return
listop($self, $op, 7, "substr", $op->first->sibling->sibling)
. " = "
. $self->deparse($op->first->sibling, 7);
}
maybe_local(@_, listop(@_, "substr"))
}
sub pp_vec { maybe_local(@_, listop(@_, "vec")) }
sub pp_index { maybe_targmy(@_, \&listop, "index") }
sub pp_rindex { maybe_targmy(@_, \&listop, "rindex") }

View File

@ -789,3 +789,7 @@ my(@a) = ()[()];
print sort(foo('bar'));
>>>>
print sort(foo('bar'));
####
# substr assignment
substr(my $a, 0, 0) = (foo(), bar());
$a++;

View File

@ -621,6 +621,7 @@ $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv",
"enteriter");
$priv{$_}{8} = 'LVSUB' for qw(rv2av rv2gv rv2hv padav padhv aelem helem
aslice hslice av2arylen keys rkeys substr pos vec);
$priv{substr}{16} = 'REPL1ST';
$priv{$_}{16} = "TARGMY"
for (map(($_,"s$_"),"chop", "chomp"),
map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo",

View File

@ -169,7 +169,7 @@ my $testpkgs = {
PMf_MULTILINE PMf_ONCE PMf_SINGLELINE
POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK
SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE
OPpCONST_ARYBASE OPpEVAL_BYTES
OPpCONST_ARYBASE OPpEVAL_BYTES OPpSUBSTR_REPL_FIRST
/, $] > 5.009 ? ('RXf_SKIPWHITE') : ('PMf_SKIPWHITE'),
'CVf_LOCKED', # This ends up as a constant, pre or post 5.10
],

18
op.c
View File

@ -10300,6 +10300,24 @@ Perl_rpeep(pTHX_ register OP *o)
}
break;
case OP_SASSIGN:
if (OP_GIMME(o,0) == G_VOID) {
OP *right = cBINOP->op_first;
if (right) {
OP *left = right->op_sibling;
if (left->op_type == OP_SUBSTR
&& (left->op_private & 7) < 4) {
op_null(o);
cBINOP->op_first = left;
right->op_sibling =
cBINOPx(left)->op_first->op_sibling;
cBINOPx(left)->op_first->op_sibling = right;
left->op_private |= OPpSUBSTR_REPL_FIRST;
}
}
}
break;
case OP_CUSTOM: {
Perl_cpeep_t cpeep =
XopENTRY(Perl_custom_op_xop(aTHX_ o), xop_peep);

4
op.h
View File

@ -229,6 +229,10 @@ Deprecated. Use C<GIMME_V> instead.
/* OP_RV2[AGH]V, OP_PAD[AH]V, OP_[AH]ELEM, OP_[AH]SLICE OP_AV2ARYLEN,
OP_R?KEYS, OP_SUBSTR, OP_POS, OP_VEC */
#define OPpMAYBE_LVSUB 8 /* We might be an lvalue to return */
/* OP_SUBSTR only */
#define OPpSUBSTR_REPL_FIRST 16 /* 1st arg is replacement string */
/* OP_PADSV only */
#define OPpPAD_STATE 16 /* is a "state" pad */
/* for OP_RV2?V, lower bits carry hints (currently only HINT_STRICT_REFS) */

23
pp.c
View File

@ -2968,7 +2968,7 @@ PP(pp_substr)
SV * len_sv;
IV len_iv = 0;
int len_is_uv = 1;
const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
const bool rvalue = (GIMME_V != G_VOID);
const char *tmps;
SV *repl_sv = NULL;
@ -2980,11 +2980,7 @@ PP(pp_substr)
if (num_args > 2) {
if (num_args > 3) {
if((repl_sv = POPs)) {
repl = SvPV_const(repl_sv, repl_len);
repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
}
else num_args--;
if(!(repl_sv = POPs)) num_args--;
}
if ((len_sv = POPs)) {
len_iv = SvIV(len_sv);
@ -2996,16 +2992,23 @@ PP(pp_substr)
pos1_iv = SvIV(pos_sv);
pos1_is_uv = SvIOK_UV(pos_sv);
sv = POPs;
if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
assert(!repl_sv);
repl_sv = POPs;
}
PUTBACK;
if (repl_sv) {
repl = SvPV_const(repl_sv, repl_len);
repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
if (repl_is_utf8) {
if (!DO_UTF8(sv))
sv_utf8_upgrade(sv);
}
else if (DO_UTF8(sv))
repl_need_utf8_upgrade = TRUE;
lvalue = 0;
}
if (lvalue && !repl) {
if (lvalue) {
tmps = NULL; /* unused */
SvGETMAGIC(sv);
if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen);
@ -3075,7 +3078,7 @@ PP(pp_substr)
STRLEN byte_pos = utf8_curlen
? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
if (lvalue && !repl) {
if (lvalue) {
SV * ret;
ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
@ -3111,6 +3114,10 @@ PP(pp_substr)
repl = SvPV_const(repl_sv_copy, repl_len);
repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
}
if (SvROK(sv))
Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
"Attempt to use reference as lvalue in substr"
);
if (!SvOK(sv))
sv_setpvs(sv, "");
sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);

View File

@ -1019,17 +1019,17 @@ Use of uninitialized value $m1 in substr at - line 5.
Use of uninitialized value $m2 in substr at - line 6.
Use of uninitialized value $g1 in substr at - line 6.
Use of uninitialized value $m1 in substr at - line 6.
Use of uninitialized value $g2 in substr at - line 7.
Use of uninitialized value $m2 in substr at - line 7.
Use of uninitialized value $g1 in substr at - line 7.
Use of uninitialized value $g2 in substr at - line 7.
Use of uninitialized value $m1 in substr at - line 7.
Use of uninitialized value $g1 in substr at - line 8.
Use of uninitialized value in scalar assignment at - line 8.
Use of uninitialized value $m1 in scalar assignment at - line 8.
Use of uninitialized value $g2 in substr at - line 8.
Use of uninitialized value $m1 in substr at - line 8.
Use of uninitialized value $m2 in substr at - line 9.
Use of uninitialized value $g1 in substr at - line 9.
Use of uninitialized value in scalar assignment at - line 9.
Use of uninitialized value $m1 in scalar assignment at - line 9.
Use of uninitialized value $g2 in substr at - line 9.
Use of uninitialized value $m1 in substr at - line 9.
Use of uninitialized value $m2 in vec at - line 11.
Use of uninitialized value $g1 in vec at - line 11.
Use of uninitialized value $m1 in vec at - line 11.