Move all the signature param parsing logic out of perly.y into a helper API

Provide a subsignature_*() API

Added:
 * subsignature_start()
 * subsignature_append_slurpy()
 * subsignature_append_positional()
 * subsignature_finish()

Call these from code blocks in perly.y

Make the actual parser signature struct opaque, hidden in toke.c. This
gives it much more robustness against future modifications.
This commit is contained in:
Paul "LeoNerd" Evans 2025-01-31 14:51:47 +00:00 committed by Paul Evans
parent b41ec6c20e
commit 8d1931e529
12 changed files with 1680 additions and 1594 deletions

View File

@ -3090,6 +3090,16 @@ ATdmp |bool |strict_utf8_to_uv \
CRp |NV |str_to_version |NN SV *sv
: Used in pp_ctl.c
p |void |sub_crush_depth|NN CV *cv
: Used in perly.y
p |void |subsignature_append_positional \
|NULLOK OP *varop \
|OPCODE defmode \
|NULLOK OP *defexpr
p |void |subsignature_append_slurpy \
|I32 sigil \
|NULLOK OP *varop
p |OP * |subsignature_finish
p |void |subsignature_start
Adp |void |suspend_compcv |NN struct suspended_compcv *buffer
ATdip |void |SvAMAGIC_off |NN SV *sv
ATdip |void |SvAMAGIC_on |NN SV *sv

View File

@ -1213,6 +1213,10 @@
# define sighandler1 Perl_sighandler1
# define sighandler3 Perl_sighandler3
# define sub_crush_depth(a) Perl_sub_crush_depth(aTHX_ a)
# define subsignature_append_positional(a,b,c) Perl_subsignature_append_positional(aTHX_ a,b,c)
# define subsignature_append_slurpy(a,b) Perl_subsignature_append_slurpy(aTHX_ a,b)
# define subsignature_finish() Perl_subsignature_finish(aTHX)
# define subsignature_start() Perl_subsignature_start(aTHX)
# define sv_2num(a) Perl_sv_2num(aTHX_ a)
# define sv_clean_all() Perl_sv_clean_all(aTHX)
# define sv_clean_objs() Perl_sv_clean_objs(aTHX)

219
op.c
View File

@ -16190,6 +16190,225 @@ Perl_rcpv_copy(pTHX_ char *pv) {
return pv;
}
/* Subroutine signature parsing */
struct yy_parser_signature {
UV elems; /* number of signature elements seen so far */
UV optelems; /* number of optional signature elems seen */
char slurpy; /* the sigil of the slurpy var (or null) */
OP *elemops; /* NULL, or an OP_LINESEQ of individual element ops */
};
static void
destroy_subsignature_context(pTHX_ void *p)
{
yy_parser_signature *signature = (yy_parser_signature *)p;
if(signature->elemops)
op_free(signature->elemops);
Safefree(signature);
}
/* Called from perly.y on encountering the '(' of a subroutine signature.
* Does not return anything useful, but sets up the memory structure in
* `PL_parser->signature` that the following functions make use of.
*/
void
Perl_subsignature_start(pTHX)
{
PERL_ARGS_ASSERT_SUBSIGNATURE_START;
assert(PL_parser);
yy_parser_signature *signature;
Newx(signature, 1, yy_parser_signature);
SAVEDESTRUCTOR_X(&destroy_subsignature_context, signature);
signature->elems = 0;
signature->optelems = 0;
signature->slurpy = 0;
signature->elemops = NULL;
SAVEVPTR(PL_parser->signature);
PL_parser->signature = signature;
}
/* Appends another positional scalar parameter to the accumulated set of
* subroutine params. `varop` may be NULL, but if not it must be an OP_ARGELEM
* whose op_targ refers to an already-declared pad lexical. That lexical must
* be a scalar. It is not necessary to set the argument index in the op_aux
* field; that will be filled in by this function.
* If `defexpr` is not NULL, it gives a defaulting expression to be evaluated
* if required, according to `defmode` - one of zero, `OP_DORASSIGN` or
* `OP_ORASSIGN`.
*/
void
Perl_subsignature_append_positional(pTHX_ OP *varop, OPCODE defmode, OP *defexpr)
{
PERL_ARGS_ASSERT_SUBSIGNATURE_APPEND_POSITIONAL;
assert(PL_parser);
yy_parser_signature *signature = PL_parser->signature;
assert(signature);
if(signature->slurpy)
yyerror("Slurpy parameter not last");
UV argix = signature->elems;
if(varop) {
assert(varop->op_type == OP_ARGELEM);
assert((varop->op_private & OPpARGELEM_MASK) == OPpARGELEM_SV);
assert(varop->op_targ);
assert(PadnamePV(PadnamelistARRAY(PL_comppad_name)[varop->op_targ])[0] == '$');
/* Now fill in the argix */
cUNOP_AUXx(varop)->op_aux = INT2PTR(UNOP_AUX_item *, argix);
}
signature->elems++;
if(defexpr) {
signature->optelems++;
I32 flags = 0;
if(defmode == OP_DORASSIGN)
flags |= OPpARG_IF_UNDEF << 8;
if(defmode == OP_ORASSIGN)
flags |= OPpARG_IF_FALSE << 8;
if(defexpr->op_type == OP_NULL && !(defexpr->op_flags & OPf_KIDS))
{
/* handle '$=' special case */
if(varop)
yyerror("Optional parameter lacks default expression");
}
else {
/* a normal '=default' expression */
OP *defop = newARGDEFELEMOP(flags, defexpr, argix);
if(varop) {
varop->op_flags |= OPf_STACKED;
(void)op_sibling_splice(varop, NULL, 0, defop);
scalar(defop);
}
else
varop = newUNOP(OP_NULL, 0, defop);
LINKLIST(varop);
/* NB: normally the first child of a logop is executed before the
* logop, and it pushes a boolean result ready for the logop. For
* ARGDEFELEM, the op itself does the boolean calculation, so set
* the first op to it instead.
*/
varop->op_next = defop;
defexpr->op_next = varop;
}
}
else
if(signature->optelems)
yyerror("Mandatory parameter follows optional parameter");
if(varop) {
signature->elemops = op_append_list(OP_LINESEQ, signature->elemops,
newSTATEOP(0, NULL, varop));
}
}
/* Appends a final slurpy parameter to the accumulated set of subroutine
* params. `varop` may be NULL, but if not it must be an OP_ARGELEM whose
* op_targ refers to an already-declared pad lexical. That lexical must match
* the `sigil` parameter. It is not necessary to set the argument index in the
* op_aux field; that will be filled in by this function.
*/
void
Perl_subsignature_append_slurpy(pTHX_ I32 sigil, OP *varop)
{
PERL_ARGS_ASSERT_SUBSIGNATURE_APPEND_SLURPY;
assert(PL_parser);
yy_parser_signature *signature = PL_parser->signature;
assert(signature);
assert(sigil == '@' || sigil == '%');
if(signature->slurpy)
yyerror("Multiple slurpy parameters not allowed");
UV argix = signature->elems;
if(varop) {
assert(varop->op_type == OP_ARGELEM);
assert((varop->op_private & OPpARGELEM_MASK) ==
((sigil == '@') ? OPpARGELEM_AV : OPpARGELEM_HV));
assert(varop->op_targ);
assert(PadnamePV(PadnamelistARRAY(PL_comppad_name)[varop->op_targ])[0] == sigil);
/* Now fill in the argix */
cUNOP_AUXx(varop)->op_aux = INT2PTR(UNOP_AUX_item *, argix);
}
signature->slurpy = (char)sigil;
if(varop) {
/* TODO: assert() the sigil of the pad variable matches */
signature->elemops = op_append_list(OP_LINESEQ, signature->elemops,
newSTATEOP(0, NULL, varop));
}
}
/* Called from perly.y on encountering the closing `)` of a subroutine
* signature. This creates the optree fragment responsible for processing all
* the accumulated subroutine params, to be inserted at the start of the
* subroutine's optree.
*/
OP *
Perl_subsignature_finish(pTHX)
{
PERL_ARGS_ASSERT_SUBSIGNATURE_FINISH;
assert(PL_parser);
yy_parser_signature *signature = PL_parser->signature;
assert(signature);
OP *sigops = signature->elemops;
signature->elemops = NULL;
struct op_argcheck_aux *aux = (struct op_argcheck_aux *)
PerlMemShared_malloc( sizeof(struct op_argcheck_aux));
aux->params = signature->elems;
aux->opt_params = signature->optelems;
aux->slurpy = signature->slurpy;
OP *check = newUNOP_AUX(OP_ARGCHECK, 0, NULL, (UNOP_AUX_item *)aux);
sigops = op_prepend_elem(OP_LINESEQ,
check,
sigops);
/* a nextstate right at the beginning */
sigops = op_prepend_elem(OP_LINESEQ,
newSTATEOP(0, NULL, NULL),
sigops);
/* a nextstate at the end handles context correctly for an empty sub body */
sigops = op_append_elem(OP_LINESEQ, sigops,
newSTATEOP(0, NULL, NULL));
/* wrap the list of arg ops in a NULL aux op.
This serves two purposes. First, it makes the arg list a separate
subtree from the body of the sub, and secondly the null op may in future
be upgraded to an OP_SIGNATURE when implemented. For now leave it as
ex-argcheck */
OP *ret = newUNOP_AUX(OP_ARGCHECK, 0, sigops, NULL);
op_null(ret);
return ret;
}
/*
* ex: set ts=8 sts=4 sw=4 et:
*/

View File

@ -31,6 +31,10 @@ typedef struct yy_lexshared {
SV *re_eval_str; /* "(?{...})" text */
} LEXSHARED;
/* Opaque struct of data relevant during parsing and construction of a
* subroutine signature. Defined and used exclusively by op.c */
typedef struct yy_parser_signature yy_parser_signature;
typedef struct yy_parser {
/* parser state */
@ -112,10 +116,7 @@ typedef struct yy_parser {
line_t herelines; /* number of lines in here-doc */
line_t preambling; /* line # when processing $ENV{PERL5DB} */
/* these are valid while parsing a subroutine signature */
UV sig_elems; /* number of signature elements seen so far */
UV sig_optelems; /* number of optional signature elems seen */
char sig_slurpy; /* the sigil of the slurpy var (or null) */
yy_parser_signature *signature; /* parser state of a subroutine signature */
bool sig_seen; /* the currently parsing sub has a signature */
bool recheck_charset_validity;

889
perly.act generated

File diff suppressed because it is too large Load Diff

2
perly.h generated
View File

@ -241,6 +241,6 @@ int yyparse (void);
/* Generated from:
* cdef9ff44874bfb2f6175adcd920473f8c6601794f20f7977a13e1c2e2be5e26 perly.y
* 59c1b0d8db6eb03422c4aa700803c201837a9c4085a3177364c2eda6cbeb7cc7 perly.y
* f13e9c08cea6302f0c1d1f467405bd0e0880d0ea92d0669901017a7f7e94ab28 regen_perly.pl
* ex: set ro ft=c: */

1923
perly.tab generated

File diff suppressed because it is too large Load Diff

159
perly.y
View File

@ -114,7 +114,7 @@
%type <opval> termbinop termunop anonymous termdo
%type <opval> termrelop relopchain termeqop eqopchain
%type <ival> sigslurpsigil
%type <opval> sigvarname sigdefault sigscalarelem sigslurpelem
%type <opval> sigvarname sigscalarelem sigslurpelem
%type <opval> sigelem siglist optsiglist subsigguts subsignature optsubsignature
%type <opval> subbody optsubbody sigsubbody optsigsubbody
%type <opval> formstmtseq formline formarg
@ -823,99 +823,39 @@ sigslurpsigil:
{ $$ = '%'; }
/* @, %, @foo, %foo */
sigslurpelem: sigslurpsigil sigvarname sigdefault/* def only to catch errors */
sigslurpelem: sigslurpsigil sigvarname
{
I32 sigil = $sigslurpsigil;
OP *var = $sigvarname;
OP *defop = $sigdefault;
if (parser->sig_slurpy)
yyerror("Multiple slurpy parameters not allowed");
parser->sig_slurpy = (char)sigil;
if (defop)
yyerror("A slurpy parameter may not have "
"a default value");
$$ = var ? newSTATEOP(0, NULL, var) : NULL;
subsignature_append_slurpy($sigslurpsigil, $sigvarname);
$$ = NULL;
}
;
/* default part of sub signature scalar element: i.e. '= default_expr' */
sigdefault
: empty
| ASSIGNOP
{ $$ = newARGDEFELEMOP(0, newOP(OP_NULL, 0), parser->sig_elems); }
| ASSIGNOP term
| sigslurpsigil sigvarname ASSIGNOP
{
I32 flags = 0;
if ($ASSIGNOP == OP_DORASSIGN)
flags |= OPpARG_IF_UNDEF << 8;
if ($ASSIGNOP == OP_ORASSIGN)
flags |= OPpARG_IF_FALSE << 8;
$$ = newARGDEFELEMOP(flags, $term, parser->sig_elems);
yyerror("A slurpy parameter may not have a default value");
}
| sigslurpsigil sigvarname ASSIGNOP term
{
yyerror("A slurpy parameter may not have a default value");
}
;
/* subroutine signature scalar element: e.g. '$x', '$=', '$x = $default' */
sigscalarelem:
PERLY_DOLLAR sigvarname sigdefault
PERLY_DOLLAR sigvarname
{
OP *var = $sigvarname;
OP *defop = $sigdefault;
if (parser->sig_slurpy)
yyerror("Slurpy parameter not last");
parser->sig_elems++;
if (defop) {
parser->sig_optelems++;
OP *defexpr = cLOGOPx(defop)->op_first;
if ( defexpr->op_type == OP_NULL
&& !(defexpr->op_flags & OPf_KIDS))
{
/* handle '$=' special case */
if (var)
yyerror("Optional parameter "
"lacks default expression");
op_free(defop);
}
else {
/* a normal '=default' expression */
if (var) {
var->op_flags |= OPf_STACKED;
(void)op_sibling_splice(var,
NULL, 0, defop);
scalar(defop);
}
else
var = newUNOP(OP_NULL, 0, defop);
LINKLIST(var);
/* NB: normally the first child of a
* logop is executed before the logop,
* and it pushes a boolean result
* ready for the logop. For ARGDEFELEM,
* the op itself does the boolean
* calculation, so set the first op to
* it instead.
*/
var->op_next = defop;
defexpr->op_next = var;
}
}
else {
if (parser->sig_optelems)
yyerror("Mandatory parameter "
"follows optional parameter");
}
$$ = var ? newSTATEOP(0, NULL, var) : NULL;
subsignature_append_positional($sigvarname, 0, NULL);
$$ = NULL;
}
;
| PERLY_DOLLAR sigvarname ASSIGNOP
{
subsignature_append_positional($sigvarname, $ASSIGNOP, newOP(OP_NULL, 0));
$$ = NULL;
}
| PERLY_DOLLAR sigvarname ASSIGNOP term[defop]
{
subsignature_append_positional($sigvarname, $ASSIGNOP, $defop);
$$ = NULL;
}
;
/* subroutine signature element: e.g. '$x = $default' or '%h' */
@ -925,16 +865,13 @@ sigelem: sigscalarelem
{ parser->in_my = KEY_sigvar; $$ = $sigslurpelem; }
;
/* list of subroutine signature elements */
/* list of subroutine signature elements
* These parser tokens no longer emit anything; they are combined just for
* their side-effect on the parser structures. */
siglist:
siglist[list] PERLY_COMMA
{ $$ = $list; }
| siglist[list] PERLY_COMMA sigelem[element]
{
$$ = op_append_list(OP_LINESEQ, $list, $element);
}
| sigelem[element] %prec PREC_LOW
{ $$ = $element; }
;
/* () or (....) */
@ -956,51 +893,17 @@ subsignature: PERLY_PAREN_OPEN subsigguts PERLY_PAREN_CLOSE
subsigguts:
{
ENTER;
SAVEIV(parser->sig_elems);
SAVEIV(parser->sig_optelems);
SAVEI8(parser->sig_slurpy);
parser->sig_elems = 0;
parser->sig_optelems = 0;
parser->sig_slurpy = 0;
parser->in_my = KEY_sigvar;
subsignature_start();
parser->in_my = KEY_sigvar;
}
optsiglist
{
OP *sigops = $optsiglist;
struct op_argcheck_aux *aux;
OP *check;
if (!FEATURE_SIGNATURES_IS_ENABLED && !CvIsMETHOD(PL_compcv))
Perl_croak(aTHX_ "Experimental "
"subroutine signatures not enabled");
/* We shouldn't get here otherwise */
aux = (struct op_argcheck_aux*)
PerlMemShared_malloc(
sizeof(struct op_argcheck_aux));
aux->params = parser->sig_elems;
aux->opt_params = parser->sig_optelems;
aux->slurpy = parser->sig_slurpy;
check = newUNOP_AUX(OP_ARGCHECK, 0, NULL,
(UNOP_AUX_item *)aux);
sigops = op_prepend_elem(OP_LINESEQ, check, sigops);
sigops = op_prepend_elem(OP_LINESEQ,
newSTATEOP(0, NULL, NULL),
sigops);
/* a nextstate at the end handles context
* correctly for an empty sub body */
sigops = op_append_elem(OP_LINESEQ,
sigops,
newSTATEOP(0, NULL, NULL));
/* wrap the list of arg ops in a NULL aux op.
This serves two purposes. First, it makes
the arg list a separate subtree from the
body of the sub, and secondly the null op
may in future be upgraded to an OP_SIGNATURE
when implemented. For now leave it as
ex-argcheck */
$$ = newUNOP_AUX(OP_ARGCHECK, 0, sigops, NULL);
op_null($$);
$$ = subsignature_finish();
CvSIGNATURE_on(PL_compcv);

20
proto.h generated
View File

@ -4399,6 +4399,26 @@ Perl_sub_crush_depth(pTHX_ CV *cv)
#define PERL_ARGS_ASSERT_SUB_CRUSH_DEPTH \
assert(cv); assert(SvTYPE(cv) == SVt_PVCV || SvTYPE(cv) == SVt_PVFM)
PERL_CALLCONV void
Perl_subsignature_append_positional(pTHX_ OP *varop, OPCODE defmode, OP *defexpr)
__attribute__visibility__("hidden");
#define PERL_ARGS_ASSERT_SUBSIGNATURE_APPEND_POSITIONAL
PERL_CALLCONV void
Perl_subsignature_append_slurpy(pTHX_ I32 sigil, OP *varop)
__attribute__visibility__("hidden");
#define PERL_ARGS_ASSERT_SUBSIGNATURE_APPEND_SLURPY
PERL_CALLCONV OP *
Perl_subsignature_finish(pTHX)
__attribute__visibility__("hidden");
#define PERL_ARGS_ASSERT_SUBSIGNATURE_FINISH
PERL_CALLCONV void
Perl_subsignature_start(pTHX)
__attribute__visibility__("hidden");
#define PERL_ARGS_ASSERT_SUBSIGNATURE_START
PERL_CALLCONV void
Perl_suspend_compcv(pTHX_ struct suspended_compcv *buffer);
#define PERL_ARGS_ASSERT_SUSPEND_COMPCV \

8
sv.c
View File

@ -13991,11 +13991,13 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
parser->in_my = proto->in_my;
parser->in_my_stash = hv_dup(proto->in_my_stash, param);
parser->error_count = proto->error_count;
parser->sig_elems = proto->sig_elems;
parser->sig_optelems= proto->sig_optelems;
parser->sig_slurpy = proto->sig_slurpy;
parser->recheck_charset_validity = proto->recheck_charset_validity;
/* A currently running signature parser really shouldn't be required in
* newly-cloned thread
*/
parser->signature = NULL;
{
char * const ols = SvPVX(proto->linestr);
char * const ls = SvPVX(parser->linestr);

View File

@ -1725,6 +1725,29 @@ SKIP: {
diag("Error was $@");
}
SKIP: {
use Config;
$Config{useithreads} or skip "No threads", 1;
ok(eval <<'EOPERL',
no warnings 'closure';
sub signature_thread_test (
$x = do {
my $thr;
BEGIN {
use threads;
$thr = threads->create( sub { "OK" } );
}
$thr;
}
) {
return $x->join;
}
signature_thread_test() eq "OK"
EOPERL
'thread cloning during signature parse does not crash');
}
done_testing;
1;

8
toke.c
View File

@ -9954,9 +9954,11 @@ S_pending_ident(pTHX)
* index. If we ever need more fields, use a real malloced
* aux strut instead.
*/
o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
INT2PTR(UNOP_AUX_item *,
(PL_parser->sig_elems)));
assert(PL_parser->signature);
/* We don't yet know the argindex but subsignature_append_*()
* will fill it in
*/
o = newUNOP_AUX(OP_ARGELEM, 0, NULL, NULL);
o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
: PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
: OPpARGELEM_HV);