Convert all core uvchr_to_utf8 calls to uv_to_utf8

This uses the new more modern synonym. In a few cases, it was calling the flags version with no flags, so I changed to use the no flags version
2026-01-26 08:38:23 +00:00 · 2024-12-03 10:12:43 -07:00 · 2024-12-03 10:12:43 -07:00 · a015052e89
commit a015052e89
parent fcc9d7ed4b
10 changed files with 29 additions and 30 deletions
--- a/doop.c
+++ b/doop.c
@ -85,7 +85,7 @@ S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl)
            const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
            if (c < 0x100 && (ch = tbl->map[c]) >= 0) {
                matches++;
-                d = uvchr_to_utf8(d, (UV)ch);
+                d = uv_to_utf8(d, (UV)ch);
                s += ulen;
            }
            else { /* No match -> copy */
@ -282,7 +282,7 @@ S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl)
              replace:
                matches++;
                if (LIKELY(!squash || ch != pch)) {
-                    d = uvchr_to_utf8(d, ch);
+                    d = uv_to_utf8(d, ch);
                    pch = ch;
                }
                s += len;
@ -541,7 +541,7 @@ S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
         * to the output */
        if (! squash || to != previous_map) {
            if (out_is_utf8) {
-                d = uvchr_to_utf8(d, to);
+                d = uv_to_utf8(d, to);
            }
            else {
                if (to >= 256) {    /* If need to convert to UTF-8, restart */
--- a/op.c
+++ b/op.c
@ -6574,12 +6574,12 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
            start = MIN(IV_MAX, start);
            end   = MIN(IV_MAX, end);

-            temp_end_pos = uvchr_to_utf8(temp, start);
+            temp_end_pos = uv_to_utf8(temp, start);
            sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp);

            if (start != end) {
                Perl_sv_catpvf(aTHX_ inverted_tstr, "%c", RANGE_INDICATOR);
-                temp_end_pos = uvchr_to_utf8(temp, end);
+                temp_end_pos = uv_to_utf8(temp, end);
                sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp);
            }
        }
--- a/perl.c
+++ b/perl.c
@ -311,9 +311,9 @@ perl_construct(pTHXx)
    STATUS_ALL_SUCCESS;

    init_uniprops();
-    (void) uvchr_to_utf8_flags((U8 *) PL_TR_SPECIAL_HANDLING_UTF8,
-                               TR_SPECIAL_HANDLING,
-                               UNICODE_ALLOW_ABOVE_IV_MAX);
+    (void) uv_to_utf8_flags((U8 *) PL_TR_SPECIAL_HANDLING_UTF8,
+                            TR_SPECIAL_HANDLING,
+                            UNICODE_ALLOW_ABOVE_IV_MAX);

 #if defined(LOCAL_PATCH_COUNT)
    PL_localpatches = local_patches;	/* For possible -v */
--- a/pp.c
+++ b/pp.c
@ -3990,7 +3990,7 @@ PP(pp_chr)

    if (value > 255 && !IN_BYTES) {
        SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1);
-        tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
+        tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value);
        SvCUR_set(TARG, tmps - SvPVX_const(TARG));
        *tmps = '\0';
        (void)SvPOK_only(TARG);
--- a/pp_pack.c
+++ b/pp_pack.c
@ -2020,7 +2020,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {

    for (;from_ptr < from_end; from_ptr++) {
        while (*m == from_ptr) *m++ = to_ptr;
-        to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
+        to_ptr = (char *) uv_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
    }
    *to_ptr = 0;

@ -2391,7 +2391,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
                len -= fromlen;
                while (fromlen > 0) {
-                    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
+                    cur = (char *) uv_to_utf8((U8 *) cur, * (U8 *) aptr);
                    aptr++;
                    fromlen--;
                }
@ -2605,7 +2605,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
+                    cur = (char *) uv_to_utf8((U8 *) cur, auv);
                } else {
                    if (auv >= 0x100) {
                        if (!SvUTF8(cat)) {
@ -2656,7 +2656,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                auv = SvUV_no_inf(fromstr, datumtype);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN+1], *endb;
-                    endb = uvchr_to_utf8_flags(buffer, auv, 0);
+                    endb = uv_to_utf8(buffer, auv);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
@ -2672,7 +2672,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
+                    cur = (char *) uv_to_utf8((U8 *) cur, auv);
                }
            }
            break;
--- a/regcomp.c
+++ b/regcomp.c
@ -6725,7 +6725,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        *(s++) = (char) ender;
                    }
                    else {
-                        U8 * new_s = uvchr_to_utf8((U8*)s, ender);
+                        U8 * new_s = uv_to_utf8((U8*)s, ender);
                        added_len = (char *) new_s - s;
                        s = (char *) new_s;

@ -11805,7 +11805,7 @@ S_optimize_regclass(pTHX_
                *STRINGs(REGNODE_p(*ret)) = (U8) value;
            }
            else {
-                uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value);
+                uv_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value);
            }

            return op;
@ -12088,8 +12088,8 @@ S_optimize_regclass(pTHX_

        /* Place the lowest UTF-8 start byte in the flags field, so as to allow
         * efficient ruling out at run time of many possible inputs.  */
-        (void) uvchr_to_utf8(low_utf8, start[0]);
-        (void) uvchr_to_utf8(high_utf8, end[0]);
+        (void) uv_to_utf8(low_utf8, start[0]);
+        (void) uv_to_utf8(high_utf8, end[0]);

        /* If all code points share the same first byte, this can be an
         * ANYOFRb.  Otherwise store the lowest UTF-8 start byte which can
@ -12123,7 +12123,7 @@ S_optimize_regclass(pTHX_
         * regnode can be used for higher ones, but we can't calculate the code
         * point of those.  IV_MAX suffices though, as it will be a large first
         * byte */
-        Size_t low_len = uvchr_to_utf8(low_utf8, MIN(lowest_cp, IV_MAX))
+        Size_t low_len = uv_to_utf8(low_utf8, MIN(lowest_cp, IV_MAX))
                       - low_utf8;

        /* We store the lowest possible first byte of the UTF-8 representation,
@ -12140,7 +12140,7 @@ S_optimize_regclass(pTHX_
         * well */
        if (highest_cp <= IV_MAX) {
            U8 high_utf8[UTF8_MAXBYTES+1];
-            Size_t high_len = uvchr_to_utf8(high_utf8, highest_cp) - high_utf8;
+            Size_t high_len = uv_to_utf8(high_utf8, highest_cp) - high_utf8;

            /* If the lowest and highest are the same, we can get an exact
             * first byte instead of a just minimum or even a sequence of exact
@ -14064,8 +14064,7 @@ S_get_extended_utf8_msg(pTHX_ const UV cp)
    HV *msgs;
    SV **msg;

-    uvchr_to_utf8_flags_msgs(dummy, cp, UNICODE_WARN_PERL_EXTENDED,
-                             &msgs);
+    uv_to_utf8_msgs(dummy, cp, UNICODE_WARN_PERL_EXTENDED, &msgs);

    msg = hv_fetchs(msgs, "text", 0);
    assert(msg);
--- a/regcomp_trie.c
+++ b/regcomp_trie.c
@ -381,7 +381,7 @@ and would end up looking like:
   8: EXACT <baz>(10)
  10: END(0)

-    d = uvchr_to_utf8_flags(d, uv, 0);
+    d = uv_to_utf8(d, uv);

 is the recommended Unicode-aware way of saying

@ -393,7 +393,7 @@ is the recommended Unicode-aware way of saying
        if (UTF) {                                                         \
            SV *zlopp = newSV(UTF8_MAXBYTES);                              \
            unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp);      \
-            unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val);     \
+            unsigned char *const kapow = uv_to_utf8(flrbbbbb, val);     \
            *kapow = '\0';                                                 \
            SvCUR_set(zlopp, kapow - flrbbbbb);                            \
            SvPOK_on(zlopp);                                               \
--- a/regexec.c
+++ b/regexec.c
@ -5015,7 +5015,7 @@ S_setup_EXACTISH_ST(pTHX_ const regnode * const text_node,

            /* Add this character to the list of possible matches */
            if (utf8_target) {
-                uvchr_to_utf8(matches[(U8) m->count], fold_from);
+                uv_to_utf8(matches[(U8) m->count], fold_from);
                lengths[m->count] = UVCHR_SKIP(fold_from);
                m->count++;
            }
--- a/sv.c
+++ b/sv.c
@ -13251,7 +13251,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                    {
                        STATIC_ASSERT_STMT(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
                        eptr = ebuf;
-                        elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+                        elen = uv_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
                        is_utf8 = TRUE;
                    }
                    else {
--- a/toke.c
+++ b/toke.c
@ -775,7 +775,7 @@ S_missingterm(pTHX_ char *s, STRLEN len)
            len = 1;
        }
        else {
-            char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
+            char *end = (char *) uv_to_utf8((U8 *)tmpbuf, PL_multi_close);
            *end = '\0';
            len = end - tmpbuf;
            uni = TRUE;
@ -3620,7 +3620,7 @@ S_scan_const(pTHX_ char *start)
                        if (real_range_max > 0x101) {
                            *d++ = (char) RANGE_INDICATOR;
                        }
-                        d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
+                        d = (char*) uv_to_utf8((U8*)d, real_range_max);
                    }
                }
 #endif
@ -3907,7 +3907,7 @@ S_scan_const(pTHX_ char *start)
                            d = SvCUR(sv) + SvGROW(sv, needed);
                        }

-                        d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
+                        d = (char*) uv_to_utf8_flags((U8*)d, uv,
                                                   (ckWARN(WARN_PORTABLE))
                                                   ? UNICODE_WARN_PERL_EXTENDED
                                                   : 0);
@ -13294,7 +13294,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
            }

            /* Append native character for the rev point */
-            tmpend = uvchr_to_utf8(tmpbuf, rev);
+            tmpend = uv_to_utf8(tmpbuf, rev);
            sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
            if (!UVCHR_IS_INVARIANT(rev))
                 SvUTF8_on(sv);