From e18f23d7358c3abc5d74b13f43209a163ae82869 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Mon, 6 Oct 2025 07:34:18 -0600
Subject: [PATCH] embed.fnc: Add ptr assertions for apparently non-problematic

I went through the declarations in embed.fnc and added PTR constraints
for all the ones that looked to have pointers to the beginning and end
of a string.  I then ran the test suite, and reverted any that had
problems.

Then I looked at the code for each one remaining to see if it was
equipped to handle the case where the end == the beginning, and removed
those.

This is the result.  Testing in the field may reveal others that the
test suite missed; we can fix those as they occur.

I removed now redundant asserts that were in the functions, and now are
included in the ARGS_ASSERT macros
---
 embed.fnc | 84 +++++++++++++++++++++++++++----------------------------
 proto.h   | 41 ++++++++++++++-------------
 utf8.c    |  6 +---
 3 files changed, 64 insertions(+), 67 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index afc5b5d088..e0114deb54 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1346,8 +1346,8 @@ Adpx	|void	|forbid_outofblock_ops					\
 				|NN const char *blockname
 p	|void	|force_locale_unlock
 Cp	|void	|force_out_malformed_utf8_message_			\
-				|NN const U8 * const p			\
-				|NN const U8 * const e			\
+				|SPTR const U8 * const p		\
+				|EPTR const U8 * const e		\
 				|U32 flags				\
 				|const bool die_here
 Adfp	|char * |form		|NN const char *pat			\
@@ -1811,12 +1811,12 @@ ARTdip	|Size_t |isUTF8_CHAR_flags					\
 				|NN const U8 * const e			\
 				|const U32 flags
 CPRTp	|STRLEN |is_utf8_char_helper_					\
-				|NN const U8 * const s			\
-				|NN const U8 *e 			\
+				|SPTR const U8 * const s		\
+				|EPTR const U8 *e			\
 				|const U32 flags
 CPRTp	|Size_t |is_utf8_FF_helper_					\
-				|NN const U8 * const s0 		\
-				|NN const U8 * const e			\
+				|SPTR const U8 * const s0		\
+				|EPTR const U8 * const e		\
 				|const bool require_partial
 ATdmp	|bool	|is_utf8_fixed_width_buf_flags				\
 				|NN const U8 * const s			\
@@ -1834,18 +1834,18 @@ ATdip	|bool	|is_utf8_fixed_width_buf_loclen_flags			\
 				|NULLOK STRLEN *el			\
 				|const U32 flags
 CRp	|Size_t |is_utf8_FOO_	|const U8 classnum			\
-				|NN const U8 *p 			\
-				|NN const U8 * const e
+				|SPTR const U8 *p			\
+				|EPTR const U8 * const e
 ARTdip	|bool	|is_utf8_invariant_string_loc				\
 				|NN const U8 * const s			\
 				|STRLEN len				\
 				|NULLOK const U8 **ep
 CRp	|Size_t |is_utf8_perl_idcont_					\
-				|NN const U8 *p 			\
-				|NN const U8 * const e
+				|SPTR const U8 *p			\
+				|EPTR const U8 * const e
 CRp	|Size_t |is_utf8_perl_idstart_					\
-				|NN const U8 *p 			\
-				|NN const U8 * const e
+				|SPTR const U8 *p			\
+				|EPTR const U8 * const e
 ARTdmp	|bool	|is_utf8_string |NN const U8 *s 			\
 				|STRLEN len
 ARTdip	|bool	|is_utf8_string_flags					\
@@ -1873,11 +1873,11 @@ ATdip	|bool	|is_utf8_string_loclen_flags				\
 				|NULLOK STRLEN *el			\
 				|const U32 flags
 APTdmp	|bool	|is_utf8_valid_partial_char				\
-				|NN const U8 * const s0 		\
-				|NN const U8 * const e
+				|SPTR const U8 * const s0		\
+				|EPTR const U8 * const e
 ARTdip	|bool	|is_utf8_valid_partial_char_flags			\
-				|NN const U8 * const s0 		\
-				|NN const U8 * const e			\
+				|SPTR const U8 * const s0		\
+				|EPTR const U8 * const e		\
 				|const U32 flags
 
 : Used in perly.y
@@ -3139,12 +3139,12 @@ Adp	|const char *|scan_version					\
 				|NN const char *s			\
 				|NN SV *rv				\
 				|bool qv
-Adp	|char * |scan_vstring	|NN const char *s			\
-				|NN const char * const e		\
+Adp	|char * |scan_vstring	|SPTR const char *s			\
+				|EPTR const char * const e		\
 				|NN SV *sv
 EXpx	|char * |scan_word	|NN char *s				\
-				|NN char *dest				\
-				|NN char *dest_end			\
+				|SPTR char *dest			\
+				|EPTR char *dest_end			\
 				|int allow_package			\
 				|NN STRLEN *slp
 Cp	|U32	|seed
@@ -3758,27 +3758,27 @@ Cp	|UV	|to_uni_upper	|UV c					\
 				|NN U8 *p				\
 				|NN STRLEN *lenp
 Cp	|UV	|to_utf8_fold_flags_					\
-				|NN const U8 *p 			\
-				|NN const U8 *e 			\
+				|SPTR const U8 *p			\
+				|EPTR const U8 *e			\
 				|NN U8 *ustrp				\
 				|NULLOK STRLEN *lenp			\
 				|U8 flags
 
 Cp	|UV	|to_utf8_lower_flags_					\
-				|NN const U8 *p 			\
-				|NN const U8 *e 			\
+				|SPTR const U8 *p			\
+				|EPTR const U8 *e			\
 				|NN U8 *ustrp				\
 				|NULLOK STRLEN *lenp			\
 				|bool flags
 Cp	|UV	|to_utf8_title_flags_					\
-				|NN const U8 *p 			\
-				|NN const U8 *e 			\
+				|SPTR const U8 *p			\
+				|EPTR const U8 *e			\
 				|NN U8 *ustrp				\
 				|NULLOK STRLEN *lenp			\
 				|bool flags
 Cp	|UV	|to_utf8_upper_flags_					\
-				|NN const U8 *p 			\
-				|NN const U8 *e 			\
+				|SPTR const U8 *p			\
+				|EPTR const U8 *e			\
 				|NN U8 *ustrp				\
 				|NULLOK STRLEN *lenp			\
 				|bool flags
@@ -5847,8 +5847,8 @@ Ei	|I32	|foldEQ_latin1_s2_folded				\
 ERS	|bool	|isFOO_lc	|const U8 classnum			\
 				|const U8 character
 ERS	|bool	|isFOO_utf8_lc	|const U8 classnum			\
-				|NN const U8 *character 		\
-				|NN const U8 *e
+				|SPTR const U8 *character		\
+				|EPTR const U8 *e
 ERS	|bool	|isGCB		|const GCB_enum before			\
 				|const GCB_enum after			\
 				|NN const U8 * const strbeg		\
@@ -5892,8 +5892,8 @@ ERST	|U8 *	|reghopmaybe3	|NN U8 *s				\
 				|NN const U8 * const lim
 ERS	|bool	|reginclass	|NULLOK regexp * const prog		\
 				|NN const regnode * const n		\
-				|NN const U8 * const p			\
-				|NN const U8 * const p_end		\
+				|SPTR const U8 * const p		\
+				|EPTR const U8 * const p_end		\
 				|bool const utf8_target
 ERS	|SSize_t|regmatch	|NN regmatch_info *reginfo		\
 				|NN char *startpos			\
@@ -6181,8 +6181,8 @@ RS	|char * |scan_const	|NN char *start
 RS	|char * |scan_formline	|NN char *s
 RS	|char * |scan_heredoc	|NN char *s
 S	|char * |scan_ident	|NN char *s				\
-				|NN char *dest				\
-				|NN char *dest_end			\
+				|SPTR char *dest			\
+				|EPTR char *dest_end			\
 				|bool chk_unary
 RS	|char * |scan_inputsymbol					\
 				|NN char *start
@@ -6247,8 +6247,8 @@ RS	|UV	|check_locale_boundary_crossing 			\
 				|NN U8 * const ustrp			\
 				|NN STRLEN *lenp
 RTi	|int	|does_utf8_overflow					\
-				|NN const U8 * const s			\
-				|NN const U8 *e
+				|SPTR const U8 * const s		\
+				|EPTR const U8 *e
 RTi	|int	|isFF_overlong	|NN const U8 * const s			\
 				|const STRLEN len
 RTi	|SSize_t|is_utf8_overlong					\
@@ -6278,16 +6278,16 @@ S	|UV	|to_utf8_case_	|const UV original				\
 				|NULLOK const U32 * const * const aux_tables	\
 				|NULLOK const U8 * const aux_table_lengths	\
 				|NN const char * const normal
-S	|UV	|turkic_fc	|NN const U8 * const p			\
-				|NN const U8 * const e			\
+S	|UV	|turkic_fc	|SPTR const U8 * const p		\
+				|EPTR const U8 * const e		\
 				|NN U8 *ustrp				\
 				|NN STRLEN *lenp
-S	|UV	|turkic_lc	|NN const U8 * const p0 		\
-				|NN const U8 * const e			\
+S	|UV	|turkic_lc	|SPTR const U8 * const p0		\
+				|EPTR const U8 * const e		\
 				|NN U8 *ustrp				\
 				|NN STRLEN *lenp
-S	|UV	|turkic_uc	|NN const U8 * const p			\
-				|NN const U8 * const e			\
+S	|UV	|turkic_uc	|SPTR const U8 * const p		\
+				|EPTR const U8 * const e		\
 				|NN U8 *ustrp				\
 				|NN STRLEN *lenp
 RS	|char * |unexpected_non_continuation_text			\
diff --git a/proto.h b/proto.h
index 9b69510232..4a081b9821 100644
--- a/proto.h
+++ b/proto.h
@@ -1131,7 +1131,7 @@ Perl_force_locale_unlock(pTHX)
 PERL_CALLCONV void
 Perl_force_out_malformed_utf8_message_(pTHX_ const U8 * const p, const U8 * const e, U32 flags, const bool die_here);
 #define PERL_ARGS_ASSERT_FORCE_OUT_MALFORMED_UTF8_MESSAGE_ \
-        assert(p); assert(e)
+        assert(p); assert(e); assert(p < e)
 
 PERL_CALLCONV char *
 Perl_form(pTHX_ const char *pat, ...)
@@ -1858,13 +1858,13 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool requ
         __attribute__warn_unused_result__
         __attribute__pure__;
 #define PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_     \
-        assert(s0); assert(e)
+        assert(s0); assert(e); assert(s0 < e)
 
 PERL_CALLCONV Size_t
 Perl_is_utf8_FOO_(pTHX_ const U8 classnum, const U8 *p, const U8 * const e)
         __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_IS_UTF8_FOO_           \
-        assert(p); assert(e)
+        assert(p); assert(e); assert(p < e)
 
 /* PERL_CALLCONV STRLEN
 Perl_is_utf8_char_buf(const U8 *buf, const U8 *buf_end); */
@@ -1874,7 +1874,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 *e, const U32 flags)
         __attribute__warn_unused_result__
         __attribute__pure__;
 #define PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_   \
-        assert(s); assert(e)
+        assert(s); assert(e); assert(s < e)
 
 /* PERL_CALLCONV bool
 Perl_is_utf8_fixed_width_buf_flags(const U8 * const s, STRLEN len, const U32 flags); */
@@ -1886,13 +1886,13 @@ PERL_CALLCONV Size_t
 Perl_is_utf8_perl_idcont_(pTHX_ const U8 *p, const U8 * const e)
         __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDCONT_   \
-        assert(p); assert(e)
+        assert(p); assert(e); assert(p < e)
 
 PERL_CALLCONV Size_t
 Perl_is_utf8_perl_idstart_(pTHX_ const U8 *p, const U8 * const e)
         __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDSTART_  \
-        assert(p); assert(e)
+        assert(p); assert(e); assert(p < e)
 
 /* PERL_CALLCONV bool
 Perl_is_utf8_string(const U8 *s, STRLEN len)
@@ -4239,10 +4239,11 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv);
 PERL_CALLCONV char *
 Perl_scan_vstring(pTHX_ const char *s, const char * const e, SV *sv);
 #define PERL_ARGS_ASSERT_SCAN_VSTRING           \
-        assert(s); assert(e); assert(sv)
+        assert(s); assert(e); assert(sv); assert(s < e)
 
 #define PERL_ARGS_ASSERT_SCAN_WORD              \
-        assert(s); assert(dest); assert(dest_end); assert(slp)
+        assert(s); assert(dest); assert(dest_end); assert(slp); \
+        assert(dest < dest_end)
 
 PERL_CALLCONV U32
 Perl_seed(pTHX);
@@ -5296,22 +5297,22 @@ Perl_to_uni_upper(pTHX_ UV c, U8 *p, STRLEN *lenp);
 PERL_CALLCONV UV
 Perl_to_utf8_fold_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, U8 flags);
 #define PERL_ARGS_ASSERT_TO_UTF8_FOLD_FLAGS_    \
-        assert(p); assert(e); assert(ustrp)
+        assert(p); assert(e); assert(ustrp); assert(p < e)
 
 PERL_CALLCONV UV
 Perl_to_utf8_lower_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags);
 #define PERL_ARGS_ASSERT_TO_UTF8_LOWER_FLAGS_   \
-        assert(p); assert(e); assert(ustrp)
+        assert(p); assert(e); assert(ustrp); assert(p < e)
 
 PERL_CALLCONV UV
 Perl_to_utf8_title_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags);
 #define PERL_ARGS_ASSERT_TO_UTF8_TITLE_FLAGS_   \
-        assert(p); assert(e); assert(ustrp)
+        assert(p); assert(e); assert(ustrp); assert(p < e)
 
 PERL_CALLCONV UV
 Perl_to_utf8_upper_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags);
 #define PERL_ARGS_ASSERT_TO_UTF8_UPPER_FLAGS_   \
-        assert(p); assert(e); assert(ustrp)
+        assert(p); assert(e); assert(ustrp); assert(p < e)
 
 PERL_CALLCONV bool
 Perl_try_amagic_bin(pTHX_ int method, int flags);
@@ -8879,7 +8880,7 @@ S_unwind_scan_frames(pTHX_ void *p);
 # define PERL_ARGS_ASSERT_ISFOO_LC
 
 # define PERL_ARGS_ASSERT_ISFOO_UTF8_LC         \
-        assert(character); assert(e)
+        assert(character); assert(e); assert(character < e)
 
 # define PERL_ARGS_ASSERT_ISGCB                 \
         assert(strbeg); assert(curpos)
@@ -8912,7 +8913,7 @@ S_unwind_scan_frames(pTHX_ void *p);
         assert(s); assert(lim)
 
 # define PERL_ARGS_ASSERT_REGINCLASS            \
-        assert(n); assert(p); assert(p_end)
+        assert(n); assert(p); assert(p_end); assert(p < p_end)
 
 # define PERL_ARGS_ASSERT_REGMATCH              \
         assert(reginfo); assert(startpos); assert(prog)
@@ -9471,7 +9472,7 @@ S_scan_heredoc(pTHX_ char *s)
 STATIC char *
 S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary);
 # define PERL_ARGS_ASSERT_SCAN_IDENT            \
-        assert(s); assert(dest); assert(dest_end)
+        assert(s); assert(dest); assert(dest_end); assert(dest < dest_end)
 
 STATIC char *
 S_scan_inputsymbol(pTHX_ char *start)
@@ -9618,17 +9619,17 @@ S_to_utf8_case_(pTHX_ const UV original, const U8 *p, U8 *ustrp, STRLEN *lenp, S
 STATIC UV
 S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp);
 # define PERL_ARGS_ASSERT_TURKIC_FC             \
-        assert(p); assert(e); assert(ustrp); assert(lenp)
+        assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e)
 
 STATIC UV
 S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 *ustrp, STRLEN *lenp);
 # define PERL_ARGS_ASSERT_TURKIC_LC             \
-        assert(p0); assert(e); assert(ustrp); assert(lenp)
+        assert(p0); assert(e); assert(ustrp); assert(lenp); assert(p0 < e)
 
 STATIC UV
 S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp);
 # define PERL_ARGS_ASSERT_TURKIC_UC             \
-        assert(p); assert(e); assert(ustrp); assert(lenp)
+        assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e)
 
 STATIC char *
 S_unexpected_non_continuation_text(pTHX_ const U8 * const s, STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len)
@@ -9648,7 +9649,7 @@ PERL_STATIC_INLINE int
 S_does_utf8_overflow(const U8 * const s, const U8 *e)
         __attribute__warn_unused_result__;
 #   define PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW  \
-        assert(s); assert(e)
+        assert(s); assert(e); assert(s < e)
 
 PERL_STATIC_INLINE int
 S_isFF_overlong(const U8 * const s, const STRLEN len)
@@ -10008,7 +10009,7 @@ PERL_STATIC_INLINE bool
 Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
         __attribute__warn_unused_result__;
 # define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \
-        assert(s0); assert(e)
+        assert(s0); assert(e); assert(s0 < e)
 
 PERL_STATIC_INLINE unsigned
 Perl_lsbit_pos32(U32 word)
diff --git a/utf8.c b/utf8.c
index 9aa8c0ece5..d42afdb597 100644
--- a/utf8.c
+++ b/utf8.c
@@ -725,7 +725,6 @@ STRLEN
 Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
 {
     PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_;
-    assert(e > s);
     assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));
 
     SSize_t len, full_len;
@@ -755,6 +754,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
      *  determined with just the first one or two bytes.
      *
      */
+
     full_len = UTF8SKIP(s);
 
     len = e - s;
@@ -840,7 +840,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
                         const bool require_partial)
 {
     PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_;
-    assert(s0 < e);
     assert(*s0 == I8_TO_NATIVE_UTF8(0xFF));
 
     /* This is called to determine if the UTF-8 sequence starting at s0 and
@@ -4245,7 +4244,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
                         U8 * ustrp, STRLEN *lenp)
 {
     PERL_ARGS_ASSERT_TURKIC_FC;
-    assert(e > p);
 
     /* Returns 0 if the foldcase of the input UTF-8 encoded sequence from
      * p0..e-1 according to Turkic rules is the same as for non-Turkic.
@@ -4280,7 +4278,6 @@ S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e,
                         U8 * ustrp, STRLEN *lenp)
 {
     PERL_ARGS_ASSERT_TURKIC_LC;
-    assert(e > p0);
 
     /* Returns 0 if the lowercase of the input UTF-8 encoded sequence from
      * p0..e-1 according to Turkic rules is the same as for non-Turkic.
@@ -4326,7 +4323,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
                         U8 * ustrp, STRLEN *lenp)
 {
     PERL_ARGS_ASSERT_TURKIC_UC;
-    assert(e > p);
 
     /* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence
      * from p0..e-1 according to Turkic rules is the same as for non-Turkic.