embed.fnc: Add ptr assertions for apparently non-problematic

I went through the declarations in embed.fnc and added PTR constraints
for all the ones that looked to have pointers to the beginning and end
of a string.  I then ran the test suite, and reverted any that had
problems.

Then I looked at the code for each one remaining to see if it was
equipped to handle the case where the end == the beginning, and removed
those.

This is the result.  Testing in the field may reveal others that the
test suite missed; we can fix those as they occur.

I removed now redundant asserts that were in the functions, and now are
included in the ARGS_ASSERT macros
This commit is contained in:
Karl Williamson 2025-10-06 07:34:18 -06:00
parent 752f9267bf
commit e18f23d735
3 changed files with 64 additions and 67 deletions

View File

@ -1346,8 +1346,8 @@ Adpx |void |forbid_outofblock_ops \
|NN const char *blockname
p |void |force_locale_unlock
Cp |void |force_out_malformed_utf8_message_ \
|NN const U8 * const p \
|NN const U8 * const e \
|SPTR const U8 * const p \
|EPTR const U8 * const e \
|U32 flags \
|const bool die_here
Adfp |char * |form |NN const char *pat \
@ -1811,12 +1811,12 @@ ARTdip |Size_t |isUTF8_CHAR_flags \
|NN const U8 * const e \
|const U32 flags
CPRTp |STRLEN |is_utf8_char_helper_ \
|NN const U8 * const s \
|NN const U8 *e \
|SPTR const U8 * const s \
|EPTR const U8 *e \
|const U32 flags
CPRTp |Size_t |is_utf8_FF_helper_ \
|NN const U8 * const s0 \
|NN const U8 * const e \
|SPTR const U8 * const s0 \
|EPTR const U8 * const e \
|const bool require_partial
ATdmp |bool |is_utf8_fixed_width_buf_flags \
|NN const U8 * const s \
@ -1834,18 +1834,18 @@ ATdip |bool |is_utf8_fixed_width_buf_loclen_flags \
|NULLOK STRLEN *el \
|const U32 flags
CRp |Size_t |is_utf8_FOO_ |const U8 classnum \
|NN const U8 *p \
|NN const U8 * const e
|SPTR const U8 *p \
|EPTR const U8 * const e
ARTdip |bool |is_utf8_invariant_string_loc \
|NN const U8 * const s \
|STRLEN len \
|NULLOK const U8 **ep
CRp |Size_t |is_utf8_perl_idcont_ \
|NN const U8 *p \
|NN const U8 * const e
|SPTR const U8 *p \
|EPTR const U8 * const e
CRp |Size_t |is_utf8_perl_idstart_ \
|NN const U8 *p \
|NN const U8 * const e
|SPTR const U8 *p \
|EPTR const U8 * const e
ARTdmp |bool |is_utf8_string |NN const U8 *s \
|STRLEN len
ARTdip |bool |is_utf8_string_flags \
@ -1873,11 +1873,11 @@ ATdip |bool |is_utf8_string_loclen_flags \
|NULLOK STRLEN *el \
|const U32 flags
APTdmp |bool |is_utf8_valid_partial_char \
|NN const U8 * const s0 \
|NN const U8 * const e
|SPTR const U8 * const s0 \
|EPTR const U8 * const e
ARTdip |bool |is_utf8_valid_partial_char_flags \
|NN const U8 * const s0 \
|NN const U8 * const e \
|SPTR const U8 * const s0 \
|EPTR const U8 * const e \
|const U32 flags
: Used in perly.y
@ -3139,12 +3139,12 @@ Adp |const char *|scan_version \
|NN const char *s \
|NN SV *rv \
|bool qv
Adp |char * |scan_vstring |NN const char *s \
|NN const char * const e \
Adp |char * |scan_vstring |SPTR const char *s \
|EPTR const char * const e \
|NN SV *sv
EXpx |char * |scan_word |NN char *s \
|NN char *dest \
|NN char *dest_end \
|SPTR char *dest \
|EPTR char *dest_end \
|int allow_package \
|NN STRLEN *slp
Cp |U32 |seed
@ -3758,27 +3758,27 @@ Cp |UV |to_uni_upper |UV c \
|NN U8 *p \
|NN STRLEN *lenp
Cp |UV |to_utf8_fold_flags_ \
|NN const U8 *p \
|NN const U8 *e \
|SPTR const U8 *p \
|EPTR const U8 *e \
|NN U8 *ustrp \
|NULLOK STRLEN *lenp \
|U8 flags
Cp |UV |to_utf8_lower_flags_ \
|NN const U8 *p \
|NN const U8 *e \
|SPTR const U8 *p \
|EPTR const U8 *e \
|NN U8 *ustrp \
|NULLOK STRLEN *lenp \
|bool flags
Cp |UV |to_utf8_title_flags_ \
|NN const U8 *p \
|NN const U8 *e \
|SPTR const U8 *p \
|EPTR const U8 *e \
|NN U8 *ustrp \
|NULLOK STRLEN *lenp \
|bool flags
Cp |UV |to_utf8_upper_flags_ \
|NN const U8 *p \
|NN const U8 *e \
|SPTR const U8 *p \
|EPTR const U8 *e \
|NN U8 *ustrp \
|NULLOK STRLEN *lenp \
|bool flags
@ -5847,8 +5847,8 @@ Ei |I32 |foldEQ_latin1_s2_folded \
ERS |bool |isFOO_lc |const U8 classnum \
|const U8 character
ERS |bool |isFOO_utf8_lc |const U8 classnum \
|NN const U8 *character \
|NN const U8 *e
|SPTR const U8 *character \
|EPTR const U8 *e
ERS |bool |isGCB |const GCB_enum before \
|const GCB_enum after \
|NN const U8 * const strbeg \
@ -5892,8 +5892,8 @@ ERST |U8 * |reghopmaybe3 |NN U8 *s \
|NN const U8 * const lim
ERS |bool |reginclass |NULLOK regexp * const prog \
|NN const regnode * const n \
|NN const U8 * const p \
|NN const U8 * const p_end \
|SPTR const U8 * const p \
|EPTR const U8 * const p_end \
|bool const utf8_target
ERS |SSize_t|regmatch |NN regmatch_info *reginfo \
|NN char *startpos \
@ -6181,8 +6181,8 @@ RS |char * |scan_const |NN char *start
RS |char * |scan_formline |NN char *s
RS |char * |scan_heredoc |NN char *s
S |char * |scan_ident |NN char *s \
|NN char *dest \
|NN char *dest_end \
|SPTR char *dest \
|EPTR char *dest_end \
|bool chk_unary
RS |char * |scan_inputsymbol \
|NN char *start
@ -6247,8 +6247,8 @@ RS |UV |check_locale_boundary_crossing \
|NN U8 * const ustrp \
|NN STRLEN *lenp
RTi |int |does_utf8_overflow \
|NN const U8 * const s \
|NN const U8 *e
|SPTR const U8 * const s \
|EPTR const U8 *e
RTi |int |isFF_overlong |NN const U8 * const s \
|const STRLEN len
RTi |SSize_t|is_utf8_overlong \
@ -6278,16 +6278,16 @@ S |UV |to_utf8_case_ |const UV original \
|NULLOK const U32 * const * const aux_tables \
|NULLOK const U8 * const aux_table_lengths \
|NN const char * const normal
S |UV |turkic_fc |NN const U8 * const p \
|NN const U8 * const e \
S |UV |turkic_fc |SPTR const U8 * const p \
|EPTR const U8 * const e \
|NN U8 *ustrp \
|NN STRLEN *lenp
S |UV |turkic_lc |NN const U8 * const p0 \
|NN const U8 * const e \
S |UV |turkic_lc |SPTR const U8 * const p0 \
|EPTR const U8 * const e \
|NN U8 *ustrp \
|NN STRLEN *lenp
S |UV |turkic_uc |NN const U8 * const p \
|NN const U8 * const e \
S |UV |turkic_uc |SPTR const U8 * const p \
|EPTR const U8 * const e \
|NN U8 *ustrp \
|NN STRLEN *lenp
RS |char * |unexpected_non_continuation_text \

41
proto.h generated
View File

@ -1131,7 +1131,7 @@ Perl_force_locale_unlock(pTHX)
PERL_CALLCONV void
Perl_force_out_malformed_utf8_message_(pTHX_ const U8 * const p, const U8 * const e, U32 flags, const bool die_here);
#define PERL_ARGS_ASSERT_FORCE_OUT_MALFORMED_UTF8_MESSAGE_ \
assert(p); assert(e)
assert(p); assert(e); assert(p < e)
PERL_CALLCONV char *
Perl_form(pTHX_ const char *pat, ...)
@ -1858,13 +1858,13 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool requ
__attribute__warn_unused_result__
__attribute__pure__;
#define PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_ \
assert(s0); assert(e)
assert(s0); assert(e); assert(s0 < e)
PERL_CALLCONV Size_t
Perl_is_utf8_FOO_(pTHX_ const U8 classnum, const U8 *p, const U8 * const e)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_IS_UTF8_FOO_ \
assert(p); assert(e)
assert(p); assert(e); assert(p < e)
/* PERL_CALLCONV STRLEN
Perl_is_utf8_char_buf(const U8 *buf, const U8 *buf_end); */
@ -1874,7 +1874,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 *e, const U32 flags)
__attribute__warn_unused_result__
__attribute__pure__;
#define PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_ \
assert(s); assert(e)
assert(s); assert(e); assert(s < e)
/* PERL_CALLCONV bool
Perl_is_utf8_fixed_width_buf_flags(const U8 * const s, STRLEN len, const U32 flags); */
@ -1886,13 +1886,13 @@ PERL_CALLCONV Size_t
Perl_is_utf8_perl_idcont_(pTHX_ const U8 *p, const U8 * const e)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDCONT_ \
assert(p); assert(e)
assert(p); assert(e); assert(p < e)
PERL_CALLCONV Size_t
Perl_is_utf8_perl_idstart_(pTHX_ const U8 *p, const U8 * const e)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDSTART_ \
assert(p); assert(e)
assert(p); assert(e); assert(p < e)
/* PERL_CALLCONV bool
Perl_is_utf8_string(const U8 *s, STRLEN len)
@ -4239,10 +4239,11 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv);
PERL_CALLCONV char *
Perl_scan_vstring(pTHX_ const char *s, const char * const e, SV *sv);
#define PERL_ARGS_ASSERT_SCAN_VSTRING \
assert(s); assert(e); assert(sv)
assert(s); assert(e); assert(sv); assert(s < e)
#define PERL_ARGS_ASSERT_SCAN_WORD \
assert(s); assert(dest); assert(dest_end); assert(slp)
assert(s); assert(dest); assert(dest_end); assert(slp); \
assert(dest < dest_end)
PERL_CALLCONV U32
Perl_seed(pTHX);
@ -5296,22 +5297,22 @@ Perl_to_uni_upper(pTHX_ UV c, U8 *p, STRLEN *lenp);
PERL_CALLCONV UV
Perl_to_utf8_fold_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, U8 flags);
#define PERL_ARGS_ASSERT_TO_UTF8_FOLD_FLAGS_ \
assert(p); assert(e); assert(ustrp)
assert(p); assert(e); assert(ustrp); assert(p < e)
PERL_CALLCONV UV
Perl_to_utf8_lower_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags);
#define PERL_ARGS_ASSERT_TO_UTF8_LOWER_FLAGS_ \
assert(p); assert(e); assert(ustrp)
assert(p); assert(e); assert(ustrp); assert(p < e)
PERL_CALLCONV UV
Perl_to_utf8_title_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags);
#define PERL_ARGS_ASSERT_TO_UTF8_TITLE_FLAGS_ \
assert(p); assert(e); assert(ustrp)
assert(p); assert(e); assert(ustrp); assert(p < e)
PERL_CALLCONV UV
Perl_to_utf8_upper_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags);
#define PERL_ARGS_ASSERT_TO_UTF8_UPPER_FLAGS_ \
assert(p); assert(e); assert(ustrp)
assert(p); assert(e); assert(ustrp); assert(p < e)
PERL_CALLCONV bool
Perl_try_amagic_bin(pTHX_ int method, int flags);
@ -8879,7 +8880,7 @@ S_unwind_scan_frames(pTHX_ void *p);
# define PERL_ARGS_ASSERT_ISFOO_LC
# define PERL_ARGS_ASSERT_ISFOO_UTF8_LC \
assert(character); assert(e)
assert(character); assert(e); assert(character < e)
# define PERL_ARGS_ASSERT_ISGCB \
assert(strbeg); assert(curpos)
@ -8912,7 +8913,7 @@ S_unwind_scan_frames(pTHX_ void *p);
assert(s); assert(lim)
# define PERL_ARGS_ASSERT_REGINCLASS \
assert(n); assert(p); assert(p_end)
assert(n); assert(p); assert(p_end); assert(p < p_end)
# define PERL_ARGS_ASSERT_REGMATCH \
assert(reginfo); assert(startpos); assert(prog)
@ -9471,7 +9472,7 @@ S_scan_heredoc(pTHX_ char *s)
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary);
# define PERL_ARGS_ASSERT_SCAN_IDENT \
assert(s); assert(dest); assert(dest_end)
assert(s); assert(dest); assert(dest_end); assert(dest < dest_end)
STATIC char *
S_scan_inputsymbol(pTHX_ char *start)
@ -9618,17 +9619,17 @@ S_to_utf8_case_(pTHX_ const UV original, const U8 *p, U8 *ustrp, STRLEN *lenp, S
STATIC UV
S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp);
# define PERL_ARGS_ASSERT_TURKIC_FC \
assert(p); assert(e); assert(ustrp); assert(lenp)
assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e)
STATIC UV
S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 *ustrp, STRLEN *lenp);
# define PERL_ARGS_ASSERT_TURKIC_LC \
assert(p0); assert(e); assert(ustrp); assert(lenp)
assert(p0); assert(e); assert(ustrp); assert(lenp); assert(p0 < e)
STATIC UV
S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp);
# define PERL_ARGS_ASSERT_TURKIC_UC \
assert(p); assert(e); assert(ustrp); assert(lenp)
assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e)
STATIC char *
S_unexpected_non_continuation_text(pTHX_ const U8 * const s, STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len)
@ -9648,7 +9649,7 @@ PERL_STATIC_INLINE int
S_does_utf8_overflow(const U8 * const s, const U8 *e)
__attribute__warn_unused_result__;
# define PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW \
assert(s); assert(e)
assert(s); assert(e); assert(s < e)
PERL_STATIC_INLINE int
S_isFF_overlong(const U8 * const s, const STRLEN len)
@ -10008,7 +10009,7 @@ PERL_STATIC_INLINE bool
Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
__attribute__warn_unused_result__;
# define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \
assert(s0); assert(e)
assert(s0); assert(e); assert(s0 < e)
PERL_STATIC_INLINE unsigned
Perl_lsbit_pos32(U32 word)

6
utf8.c
View File

@ -725,7 +725,6 @@ STRLEN
Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
{
PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_;
assert(e > s);
assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
SSize_t len, full_len;
@ -755,6 +754,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
* determined with just the first one or two bytes.
*
*/
full_len = UTF8SKIP(s);
len = e - s;
@ -840,7 +840,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
const bool require_partial)
{
PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_;
assert(s0 < e);
assert(*s0 == I8_TO_NATIVE_UTF8(0xFF));
/* This is called to determine if the UTF-8 sequence starting at s0 and
@ -4245,7 +4244,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
U8 * ustrp, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TURKIC_FC;
assert(e > p);
/* Returns 0 if the foldcase of the input UTF-8 encoded sequence from
* p0..e-1 according to Turkic rules is the same as for non-Turkic.
@ -4280,7 +4278,6 @@ S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e,
U8 * ustrp, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TURKIC_LC;
assert(e > p0);
/* Returns 0 if the lowercase of the input UTF-8 encoded sequence from
* p0..e-1 according to Turkic rules is the same as for non-Turkic.
@ -4326,7 +4323,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
U8 * ustrp, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TURKIC_UC;
assert(e > p);
/* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence
* from p0..e-1 according to Turkic rules is the same as for non-Turkic.