Add valid_utf8_to_uv()

This is identical to valid_utf8_to_uvchr(). They are both internal functions designed for when you are certain that the utf8 string to be translated is well formed; generally you created it yourself earlier. The only reason for this new synonym is to lessen the cognitive load on programmers who should be using the "_uv" suffix functions, and not the "_uvchr" suffix ones for these sorts of tasks. By having this synonym, one doesn't have to learn that there are two.
2026-01-26 16:39:36 +00:00 · 2025-08-21 13:47:39 -06:00 · 2025-08-21 13:47:39 -06:00 · 8543a7ac33
commit 8543a7ac33
parent 738383d65e
6 changed files with 36 additions and 9 deletions
--- a/embed.fnc
+++ b/embed.fnc
@ -3906,7 +3906,10 @@ Adp	|bool	|valid_identifier_pvn					\
 				|U32 flags
 Adp	|bool	|valid_identifier_sv					\
 				|NULLOK SV *sv
-CRTdip	|UV	|valid_utf8_to_uvchr					\
+CRTdip	|UV	|valid_utf8_to_uv					\
+				|NN const U8 *s 			\
+				|NULLOK STRLEN *retlen
+CRTdmp	|UV	|valid_utf8_to_uvchr					\
 				|NN const U8 *s 			\
 				|NULLOK STRLEN *retlen
 Adp	|int	|vcmp		|NN SV *lhv				\
--- a/embed.h
+++ b/embed.h
@ -841,7 +841,8 @@
 # define valid_identifier_pve(a,b,c)            Perl_valid_identifier_pve(aTHX_ a,b,c)
 # define valid_identifier_pvn(a,b,c)            Perl_valid_identifier_pvn(aTHX_ a,b,c)
 # define valid_identifier_sv(a)                 Perl_valid_identifier_sv(aTHX_ a)
-# define valid_utf8_to_uvchr                    Perl_valid_utf8_to_uvchr
+# define valid_utf8_to_uv                       Perl_valid_utf8_to_uv
+# define Perl_valid_utf8_to_uvchr               valid_utf8_to_uvchr
 # define vcmp(a,b)                              Perl_vcmp(aTHX_ a,b)
 # define vcroak(a,b)                            Perl_vcroak(aTHX_ a,b)
 # define vdeb(a,b)                              Perl_vdeb(aTHX_ a,b)
--- a/inline.h
+++ b/inline.h
@ -1306,25 +1306,36 @@ Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp)
 }

 /*
-=for apidoc valid_utf8_to_uvchr
-Like C<L<perlapi/utf8_to_uv>>, but should only be called when it is
+=for apidoc      valid_utf8_to_uv
+=for apidoc_item valid_utf8_to_uvchr
+
+These are synonymous.
+
+These are like C<L<perlapi/utf8_to_uv>>, but should only be called when it is
 known that the next character in the input UTF-8 string C<s> is well-formed
 (I<e.g.>, it passes C<L<perlapi/isUTF8_CHAR>>.  Surrogates, non-character code
 points, and non-Unicode code points are allowed.

+The only use for these is that they should run slightly faster than
+C<utf8_to_uv> because no error checking is done.
+
+The C<_uv> form is slightly preferred so as to have a consistent spelling with
+the other C<_uv> forms that are definitely preferred over the older and
+problematic C<_uvchr> forms.
+
 =cut

 */

 PERL_STATIC_INLINE UV
-Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
+Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
 {
+    PERL_ARGS_ASSERT_VALID_UTF8_TO_UV;
+
    const UV expectlen = UTF8SKIP(s);
    const U8* send = s + expectlen;
    UV uv = *s;

-    PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
-
    if (retlen) {
        *retlen = expectlen;
    }
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@ -350,6 +350,13 @@ well.

 XXX

+=item *
+
+A new function C<valid_utf8_to_uv> has been added.  This is synonymous
+with C<valid_utf8_to_uvchr>; its reason for existence is to have
+consistent spelling with the names of the other functions that translate
+from UTF-8, so you don't have to remember a different spelling.
+
 =back

 =head1 Selected Bug Fixes
--- a/proto.h
+++ b/proto.h
@ -5428,6 +5428,10 @@ PERL_CALLCONV bool
 Perl_valid_identifier_sv(pTHX_ SV *sv);
 #define PERL_ARGS_ASSERT_VALID_IDENTIFIER_SV

+/* PERL_CALLCONV UV
+Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
+        __attribute__warn_unused_result__; */
+
 #define PERL_ARGS_ASSERT_VALIDATE_PROTO         \
        assert(name)

@ -10298,9 +10302,9 @@ Perl_uv_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags);
        assert(d)

 PERL_STATIC_INLINE UV
-Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
+Perl_valid_utf8_to_uv(const U8 *s, STRLEN *retlen)
        __attribute__warn_unused_result__;
-# define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR   \
+# define PERL_ARGS_ASSERT_VALID_UTF8_TO_UV      \
        assert(s)

 PERL_STATIC_INLINE void
--- a/utf8.h
+++ b/utf8.h
@ -191,6 +191,7 @@ For details, see the description for L<perlapi/uv_to_utf8_flags>.
 #define c9strict_utf8_to_uv(s, e, cp_p, advance_p)                      \
        utf8_to_uv_flags(   s, e, cp_p, advance_p,                      \
                                 UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
+#define valid_utf8_to_uvchr(s, advance_p)  valid_utf8_to_uv(s, advance_p)

 #define utf16_to_utf8(p, d, bytelen, newlen)                                \
                            utf16_to_utf8_base(p, d, bytelen, newlen, 0, 1)