regcomp.c: Need to account for UTF group name

I found this by reading the code.  Prior to this commit, the parse
pointer was advanced by one byte; it should be advanced by one
character.  As long as the the character was ASCII, things worked.

I looked through the regcomp.c source for other mis-use of the macro
changed by this commit; none were obvious.
This commit is contained in:
Karl Williamson 2025-10-25 17:33:05 -06:00 committed by Karl Williamson
parent 291e398846
commit c1746e5a96
2 changed files with 3 additions and 2 deletions

View File

@ -2533,8 +2533,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
&& (advance = isWORDCHAR_utf8_safe( (U8 *) RExC_parse,
(U8 *) RExC_end)));
} else {
RExC_parse_inc_by(1); /* so the <- from the vFAIL is after the offending
character */
/* so the <- from the vFAIL is after the offending character */
RExC_parse_inc_safe();
vFAIL("Group name must start with a non-digit word character");
}
sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),

View File

@ -547,6 +547,7 @@ my @death_utf8 = mark_as_utf8(
'/[\cネ]/' => "Character following \"\\c\" must be printable ASCII {#} m/[\\cネ{#}]/",
'/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/",
'/\B{ネ}/' => "'ネ' is an unknown bound type {#} m/\\B{ネ{#}}/",
'/ネ(?<‿name>match)ネ/; #no latin1' => 'Group name must start with a non-digit word character {#} m/ネ(?<‿{#}name>match)ネ/',
);
push @death, @death_utf8;