From e18f23d7358c3abc5d74b13f43209a163ae82869 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 6 Oct 2025 07:34:18 -0600 Subject: [PATCH] embed.fnc: Add ptr assertions for apparently non-problematic I went through the declarations in embed.fnc and added PTR constraints for all the ones that looked to have pointers to the beginning and end of a string. I then ran the test suite, and reverted any that had problems. Then I looked at the code for each one remaining to see if it was equipped to handle the case where the end == the beginning, and removed those. This is the result. Testing in the field may reveal others that the test suite missed; we can fix those as they occur. I removed now redundant asserts that were in the functions, and now are included in the ARGS_ASSERT macros --- embed.fnc | 84 +++++++++++++++++++++++++++---------------------------- proto.h | 41 ++++++++++++++------------- utf8.c | 6 +--- 3 files changed, 64 insertions(+), 67 deletions(-) diff --git a/embed.fnc b/embed.fnc index afc5b5d088..e0114deb54 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1346,8 +1346,8 @@ Adpx |void |forbid_outofblock_ops \ |NN const char *blockname p |void |force_locale_unlock Cp |void |force_out_malformed_utf8_message_ \ - |NN const U8 * const p \ - |NN const U8 * const e \ + |SPTR const U8 * const p \ + |EPTR const U8 * const e \ |U32 flags \ |const bool die_here Adfp |char * |form |NN const char *pat \ @@ -1811,12 +1811,12 @@ ARTdip |Size_t |isUTF8_CHAR_flags \ |NN const U8 * const e \ |const U32 flags CPRTp |STRLEN |is_utf8_char_helper_ \ - |NN const U8 * const s \ - |NN const U8 *e \ + |SPTR const U8 * const s \ + |EPTR const U8 *e \ |const U32 flags CPRTp |Size_t |is_utf8_FF_helper_ \ - |NN const U8 * const s0 \ - |NN const U8 * const e \ + |SPTR const U8 * const s0 \ + |EPTR const U8 * const e \ |const bool require_partial ATdmp |bool |is_utf8_fixed_width_buf_flags \ |NN const U8 * const s \ @@ -1834,18 +1834,18 @@ ATdip |bool |is_utf8_fixed_width_buf_loclen_flags \ |NULLOK STRLEN *el \ |const U32 flags CRp |Size_t |is_utf8_FOO_ |const U8 classnum \ - |NN const U8 *p \ - |NN const U8 * const e + |SPTR const U8 *p \ + |EPTR const U8 * const e ARTdip |bool |is_utf8_invariant_string_loc \ |NN const U8 * const s \ |STRLEN len \ |NULLOK const U8 **ep CRp |Size_t |is_utf8_perl_idcont_ \ - |NN const U8 *p \ - |NN const U8 * const e + |SPTR const U8 *p \ + |EPTR const U8 * const e CRp |Size_t |is_utf8_perl_idstart_ \ - |NN const U8 *p \ - |NN const U8 * const e + |SPTR const U8 *p \ + |EPTR const U8 * const e ARTdmp |bool |is_utf8_string |NN const U8 *s \ |STRLEN len ARTdip |bool |is_utf8_string_flags \ @@ -1873,11 +1873,11 @@ ATdip |bool |is_utf8_string_loclen_flags \ |NULLOK STRLEN *el \ |const U32 flags APTdmp |bool |is_utf8_valid_partial_char \ - |NN const U8 * const s0 \ - |NN const U8 * const e + |SPTR const U8 * const s0 \ + |EPTR const U8 * const e ARTdip |bool |is_utf8_valid_partial_char_flags \ - |NN const U8 * const s0 \ - |NN const U8 * const e \ + |SPTR const U8 * const s0 \ + |EPTR const U8 * const e \ |const U32 flags : Used in perly.y @@ -3139,12 +3139,12 @@ Adp |const char *|scan_version \ |NN const char *s \ |NN SV *rv \ |bool qv -Adp |char * |scan_vstring |NN const char *s \ - |NN const char * const e \ +Adp |char * |scan_vstring |SPTR const char *s \ + |EPTR const char * const e \ |NN SV *sv EXpx |char * |scan_word |NN char *s \ - |NN char *dest \ - |NN char *dest_end \ + |SPTR char *dest \ + |EPTR char *dest_end \ |int allow_package \ |NN STRLEN *slp Cp |U32 |seed @@ -3758,27 +3758,27 @@ Cp |UV |to_uni_upper |UV c \ |NN U8 *p \ |NN STRLEN *lenp Cp |UV |to_utf8_fold_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |U8 flags Cp |UV |to_utf8_lower_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags Cp |UV |to_utf8_title_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags Cp |UV |to_utf8_upper_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags @@ -5847,8 +5847,8 @@ Ei |I32 |foldEQ_latin1_s2_folded \ ERS |bool |isFOO_lc |const U8 classnum \ |const U8 character ERS |bool |isFOO_utf8_lc |const U8 classnum \ - |NN const U8 *character \ - |NN const U8 *e + |SPTR const U8 *character \ + |EPTR const U8 *e ERS |bool |isGCB |const GCB_enum before \ |const GCB_enum after \ |NN const U8 * const strbeg \ @@ -5892,8 +5892,8 @@ ERST |U8 * |reghopmaybe3 |NN U8 *s \ |NN const U8 * const lim ERS |bool |reginclass |NULLOK regexp * const prog \ |NN const regnode * const n \ - |NN const U8 * const p \ - |NN const U8 * const p_end \ + |SPTR const U8 * const p \ + |EPTR const U8 * const p_end \ |bool const utf8_target ERS |SSize_t|regmatch |NN regmatch_info *reginfo \ |NN char *startpos \ @@ -6181,8 +6181,8 @@ RS |char * |scan_const |NN char *start RS |char * |scan_formline |NN char *s RS |char * |scan_heredoc |NN char *s S |char * |scan_ident |NN char *s \ - |NN char *dest \ - |NN char *dest_end \ + |SPTR char *dest \ + |EPTR char *dest_end \ |bool chk_unary RS |char * |scan_inputsymbol \ |NN char *start @@ -6247,8 +6247,8 @@ RS |UV |check_locale_boundary_crossing \ |NN U8 * const ustrp \ |NN STRLEN *lenp RTi |int |does_utf8_overflow \ - |NN const U8 * const s \ - |NN const U8 *e + |SPTR const U8 * const s \ + |EPTR const U8 *e RTi |int |isFF_overlong |NN const U8 * const s \ |const STRLEN len RTi |SSize_t|is_utf8_overlong \ @@ -6278,16 +6278,16 @@ S |UV |to_utf8_case_ |const UV original \ |NULLOK const U32 * const * const aux_tables \ |NULLOK const U8 * const aux_table_lengths \ |NN const char * const normal -S |UV |turkic_fc |NN const U8 * const p \ - |NN const U8 * const e \ +S |UV |turkic_fc |SPTR const U8 * const p \ + |EPTR const U8 * const e \ |NN U8 *ustrp \ |NN STRLEN *lenp -S |UV |turkic_lc |NN const U8 * const p0 \ - |NN const U8 * const e \ +S |UV |turkic_lc |SPTR const U8 * const p0 \ + |EPTR const U8 * const e \ |NN U8 *ustrp \ |NN STRLEN *lenp -S |UV |turkic_uc |NN const U8 * const p \ - |NN const U8 * const e \ +S |UV |turkic_uc |SPTR const U8 * const p \ + |EPTR const U8 * const e \ |NN U8 *ustrp \ |NN STRLEN *lenp RS |char * |unexpected_non_continuation_text \ diff --git a/proto.h b/proto.h index 9b69510232..4a081b9821 100644 --- a/proto.h +++ b/proto.h @@ -1131,7 +1131,7 @@ Perl_force_locale_unlock(pTHX) PERL_CALLCONV void Perl_force_out_malformed_utf8_message_(pTHX_ const U8 * const p, const U8 * const e, U32 flags, const bool die_here); #define PERL_ARGS_ASSERT_FORCE_OUT_MALFORMED_UTF8_MESSAGE_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) PERL_CALLCONV char * Perl_form(pTHX_ const char *pat, ...) @@ -1858,13 +1858,13 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool requ __attribute__warn_unused_result__ __attribute__pure__; #define PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_ \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 < e) PERL_CALLCONV Size_t Perl_is_utf8_FOO_(pTHX_ const U8 classnum, const U8 *p, const U8 * const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_FOO_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) /* PERL_CALLCONV STRLEN Perl_is_utf8_char_buf(const U8 *buf, const U8 *buf_end); */ @@ -1874,7 +1874,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 *e, const U32 flags) __attribute__warn_unused_result__ __attribute__pure__; #define PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_ \ - assert(s); assert(e) + assert(s); assert(e); assert(s < e) /* PERL_CALLCONV bool Perl_is_utf8_fixed_width_buf_flags(const U8 * const s, STRLEN len, const U32 flags); */ @@ -1886,13 +1886,13 @@ PERL_CALLCONV Size_t Perl_is_utf8_perl_idcont_(pTHX_ const U8 *p, const U8 * const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDCONT_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) PERL_CALLCONV Size_t Perl_is_utf8_perl_idstart_(pTHX_ const U8 *p, const U8 * const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDSTART_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) /* PERL_CALLCONV bool Perl_is_utf8_string(const U8 *s, STRLEN len) @@ -4239,10 +4239,11 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv); PERL_CALLCONV char * Perl_scan_vstring(pTHX_ const char *s, const char * const e, SV *sv); #define PERL_ARGS_ASSERT_SCAN_VSTRING \ - assert(s); assert(e); assert(sv) + assert(s); assert(e); assert(sv); assert(s < e) #define PERL_ARGS_ASSERT_SCAN_WORD \ - assert(s); assert(dest); assert(dest_end); assert(slp) + assert(s); assert(dest); assert(dest_end); assert(slp); \ + assert(dest < dest_end) PERL_CALLCONV U32 Perl_seed(pTHX); @@ -5296,22 +5297,22 @@ Perl_to_uni_upper(pTHX_ UV c, U8 *p, STRLEN *lenp); PERL_CALLCONV UV Perl_to_utf8_fold_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, U8 flags); #define PERL_ARGS_ASSERT_TO_UTF8_FOLD_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV UV Perl_to_utf8_lower_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_LOWER_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV UV Perl_to_utf8_title_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_TITLE_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV UV Perl_to_utf8_upper_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_UPPER_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV bool Perl_try_amagic_bin(pTHX_ int method, int flags); @@ -8879,7 +8880,7 @@ S_unwind_scan_frames(pTHX_ void *p); # define PERL_ARGS_ASSERT_ISFOO_LC # define PERL_ARGS_ASSERT_ISFOO_UTF8_LC \ - assert(character); assert(e) + assert(character); assert(e); assert(character < e) # define PERL_ARGS_ASSERT_ISGCB \ assert(strbeg); assert(curpos) @@ -8912,7 +8913,7 @@ S_unwind_scan_frames(pTHX_ void *p); assert(s); assert(lim) # define PERL_ARGS_ASSERT_REGINCLASS \ - assert(n); assert(p); assert(p_end) + assert(n); assert(p); assert(p_end); assert(p < p_end) # define PERL_ARGS_ASSERT_REGMATCH \ assert(reginfo); assert(startpos); assert(prog) @@ -9471,7 +9472,7 @@ S_scan_heredoc(pTHX_ char *s) STATIC char * S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary); # define PERL_ARGS_ASSERT_SCAN_IDENT \ - assert(s); assert(dest); assert(dest_end) + assert(s); assert(dest); assert(dest_end); assert(dest < dest_end) STATIC char * S_scan_inputsymbol(pTHX_ char *start) @@ -9618,17 +9619,17 @@ S_to_utf8_case_(pTHX_ const UV original, const U8 *p, U8 *ustrp, STRLEN *lenp, S STATIC UV S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp); # define PERL_ARGS_ASSERT_TURKIC_FC \ - assert(p); assert(e); assert(ustrp); assert(lenp) + assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e) STATIC UV S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 *ustrp, STRLEN *lenp); # define PERL_ARGS_ASSERT_TURKIC_LC \ - assert(p0); assert(e); assert(ustrp); assert(lenp) + assert(p0); assert(e); assert(ustrp); assert(lenp); assert(p0 < e) STATIC UV S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp); # define PERL_ARGS_ASSERT_TURKIC_UC \ - assert(p); assert(e); assert(ustrp); assert(lenp) + assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e) STATIC char * S_unexpected_non_continuation_text(pTHX_ const U8 * const s, STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len) @@ -9648,7 +9649,7 @@ PERL_STATIC_INLINE int S_does_utf8_overflow(const U8 * const s, const U8 *e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW \ - assert(s); assert(e) + assert(s); assert(e); assert(s < e) PERL_STATIC_INLINE int S_isFF_overlong(const U8 * const s, const STRLEN len) @@ -10008,7 +10009,7 @@ PERL_STATIC_INLINE bool Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 < e) PERL_STATIC_INLINE unsigned Perl_lsbit_pos32(U32 word) diff --git a/utf8.c b/utf8.c index 9aa8c0ece5..d42afdb597 100644 --- a/utf8.c +++ b/utf8.c @@ -725,7 +725,6 @@ STRLEN Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) { PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_; - assert(e > s); assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE)); SSize_t len, full_len; @@ -755,6 +754,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) * determined with just the first one or two bytes. * */ + full_len = UTF8SKIP(s); len = e - s; @@ -840,7 +840,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool require_partial) { PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_; - assert(s0 < e); assert(*s0 == I8_TO_NATIVE_UTF8(0xFF)); /* This is called to determine if the UTF-8 sequence starting at s0 and @@ -4245,7 +4244,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 * ustrp, STRLEN *lenp) { PERL_ARGS_ASSERT_TURKIC_FC; - assert(e > p); /* Returns 0 if the foldcase of the input UTF-8 encoded sequence from * p0..e-1 according to Turkic rules is the same as for non-Turkic. @@ -4280,7 +4278,6 @@ S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 * ustrp, STRLEN *lenp) { PERL_ARGS_ASSERT_TURKIC_LC; - assert(e > p0); /* Returns 0 if the lowercase of the input UTF-8 encoded sequence from * p0..e-1 according to Turkic rules is the same as for non-Turkic. @@ -4326,7 +4323,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 * ustrp, STRLEN *lenp) { PERL_ARGS_ASSERT_TURKIC_UC; - assert(e > p); /* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence * from p0..e-1 according to Turkic rules is the same as for non-Turkic.