From a015052e89ef7649c61e746e10f3384e4ab35f2d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 3 Dec 2024 10:12:43 -0700 Subject: [PATCH] Convert all core uvchr_to_utf8 calls to uv_to_utf8 This uses the new more modern synonym. In a few cases, it was calling the flags version with no flags, so I changed to use the no flags version --- doop.c | 6 +++--- op.c | 4 ++-- perl.c | 6 +++--- pp.c | 2 +- pp_pack.c | 10 +++++----- regcomp.c | 15 +++++++-------- regcomp_trie.c | 4 ++-- regexec.c | 2 +- sv.c | 2 +- toke.c | 8 ++++---- 10 files changed, 29 insertions(+), 30 deletions(-) diff --git a/doop.c b/doop.c index ba2b553dc2..62101b258a 100644 --- a/doop.c +++ b/doop.c @@ -85,7 +85,7 @@ S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl) const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); if (c < 0x100 && (ch = tbl->map[c]) >= 0) { matches++; - d = uvchr_to_utf8(d, (UV)ch); + d = uv_to_utf8(d, (UV)ch); s += ulen; } else { /* No match -> copy */ @@ -282,7 +282,7 @@ S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl) replace: matches++; if (LIKELY(!squash || ch != pch)) { - d = uvchr_to_utf8(d, ch); + d = uv_to_utf8(d, ch); pch = ch; } s += len; @@ -541,7 +541,7 @@ S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap) * to the output */ if (! squash || to != previous_map) { if (out_is_utf8) { - d = uvchr_to_utf8(d, to); + d = uv_to_utf8(d, to); } else { if (to >= 256) { /* If need to convert to UTF-8, restart */ diff --git a/op.c b/op.c index b19c100ab4..1b5c11c58b 100644 --- a/op.c +++ b/op.c @@ -6574,12 +6574,12 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) start = MIN(IV_MAX, start); end = MIN(IV_MAX, end); - temp_end_pos = uvchr_to_utf8(temp, start); + temp_end_pos = uv_to_utf8(temp, start); sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp); if (start != end) { Perl_sv_catpvf(aTHX_ inverted_tstr, "%c", RANGE_INDICATOR); - temp_end_pos = uvchr_to_utf8(temp, end); + temp_end_pos = uv_to_utf8(temp, end); sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp); } } diff --git a/perl.c b/perl.c index 6637506c62..15113c6d55 100644 --- a/perl.c +++ b/perl.c @@ -311,9 +311,9 @@ perl_construct(pTHXx) STATUS_ALL_SUCCESS; init_uniprops(); - (void) uvchr_to_utf8_flags((U8 *) PL_TR_SPECIAL_HANDLING_UTF8, - TR_SPECIAL_HANDLING, - UNICODE_ALLOW_ABOVE_IV_MAX); + (void) uv_to_utf8_flags((U8 *) PL_TR_SPECIAL_HANDLING_UTF8, + TR_SPECIAL_HANDLING, + UNICODE_ALLOW_ABOVE_IV_MAX); #if defined(LOCAL_PATCH_COUNT) PL_localpatches = local_patches; /* For possible -v */ diff --git a/pp.c b/pp.c index 4993ac4677..bd4611a4cc 100644 --- a/pp.c +++ b/pp.c @@ -3990,7 +3990,7 @@ PP(pp_chr) if (value > 255 && !IN_BYTES) { SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1); - tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0); + tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value); SvCUR_set(TARG, tmps - SvPVX_const(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); diff --git a/pp_pack.c b/pp_pack.c index f341b77dc3..0e8dd9d5f4 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -2020,7 +2020,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { for (;from_ptr < from_end; from_ptr++) { while (*m == from_ptr) *m++ = to_ptr; - to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); + to_ptr = (char *) uv_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); } *to_ptr = 0; @@ -2391,7 +2391,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); len -= fromlen; while (fromlen > 0) { - cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr); + cur = (char *) uv_to_utf8((U8 *) cur, * (U8 *) aptr); aptr++; fromlen--; } @@ -2605,7 +2605,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0); + cur = (char *) uv_to_utf8((U8 *) cur, auv); } else { if (auv >= 0x100) { if (!SvUTF8(cat)) { @@ -2656,7 +2656,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) auv = SvUV_no_inf(fromstr, datumtype); if (utf8) { U8 buffer[UTF8_MAXLEN+1], *endb; - endb = uvchr_to_utf8_flags(buffer, auv, 0); + endb = uv_to_utf8(buffer, auv); if (cur+(endb-buffer)*UTF8_EXPAND >= end) { *cur = '\0'; SvCUR_set(cat, cur - start); @@ -2672,7 +2672,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0); + cur = (char *) uv_to_utf8((U8 *) cur, auv); } } break; diff --git a/regcomp.c b/regcomp.c index e5f899aa4c..5e3224ee22 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6725,7 +6725,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) *(s++) = (char) ender; } else { - U8 * new_s = uvchr_to_utf8((U8*)s, ender); + U8 * new_s = uv_to_utf8((U8*)s, ender); added_len = (char *) new_s - s; s = (char *) new_s; @@ -11805,7 +11805,7 @@ S_optimize_regclass(pTHX_ *STRINGs(REGNODE_p(*ret)) = (U8) value; } else { - uvchr_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value); + uv_to_utf8((U8 *) STRINGs(REGNODE_p(*ret)), value); } return op; @@ -12088,8 +12088,8 @@ S_optimize_regclass(pTHX_ /* Place the lowest UTF-8 start byte in the flags field, so as to allow * efficient ruling out at run time of many possible inputs. */ - (void) uvchr_to_utf8(low_utf8, start[0]); - (void) uvchr_to_utf8(high_utf8, end[0]); + (void) uv_to_utf8(low_utf8, start[0]); + (void) uv_to_utf8(high_utf8, end[0]); /* If all code points share the same first byte, this can be an * ANYOFRb. Otherwise store the lowest UTF-8 start byte which can @@ -12123,7 +12123,7 @@ S_optimize_regclass(pTHX_ * regnode can be used for higher ones, but we can't calculate the code * point of those. IV_MAX suffices though, as it will be a large first * byte */ - Size_t low_len = uvchr_to_utf8(low_utf8, MIN(lowest_cp, IV_MAX)) + Size_t low_len = uv_to_utf8(low_utf8, MIN(lowest_cp, IV_MAX)) - low_utf8; /* We store the lowest possible first byte of the UTF-8 representation, @@ -12140,7 +12140,7 @@ S_optimize_regclass(pTHX_ * well */ if (highest_cp <= IV_MAX) { U8 high_utf8[UTF8_MAXBYTES+1]; - Size_t high_len = uvchr_to_utf8(high_utf8, highest_cp) - high_utf8; + Size_t high_len = uv_to_utf8(high_utf8, highest_cp) - high_utf8; /* If the lowest and highest are the same, we can get an exact * first byte instead of a just minimum or even a sequence of exact @@ -14064,8 +14064,7 @@ S_get_extended_utf8_msg(pTHX_ const UV cp) HV *msgs; SV **msg; - uvchr_to_utf8_flags_msgs(dummy, cp, UNICODE_WARN_PERL_EXTENDED, - &msgs); + uv_to_utf8_msgs(dummy, cp, UNICODE_WARN_PERL_EXTENDED, &msgs); msg = hv_fetchs(msgs, "text", 0); assert(msg); diff --git a/regcomp_trie.c b/regcomp_trie.c index 0512fcda24..df8271c14a 100644 --- a/regcomp_trie.c +++ b/regcomp_trie.c @@ -381,7 +381,7 @@ and would end up looking like: 8: EXACT (10) 10: END(0) - d = uvchr_to_utf8_flags(d, uv, 0); + d = uv_to_utf8(d, uv); is the recommended Unicode-aware way of saying @@ -393,7 +393,7 @@ is the recommended Unicode-aware way of saying if (UTF) { \ SV *zlopp = newSV(UTF8_MAXBYTES); \ unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \ - unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val); \ + unsigned char *const kapow = uv_to_utf8(flrbbbbb, val); \ *kapow = '\0'; \ SvCUR_set(zlopp, kapow - flrbbbbb); \ SvPOK_on(zlopp); \ diff --git a/regexec.c b/regexec.c index d89225b0e3..47073dc720 100644 --- a/regexec.c +++ b/regexec.c @@ -5015,7 +5015,7 @@ S_setup_EXACTISH_ST(pTHX_ const regnode * const text_node, /* Add this character to the list of possible matches */ if (utf8_target) { - uvchr_to_utf8(matches[(U8) m->count], fold_from); + uv_to_utf8(matches[(U8) m->count], fold_from); lengths[m->count] = UVCHR_SKIP(fold_from); m->count++; } diff --git a/sv.c b/sv.c index 5d221c2ba4..fe7f857bac 100644 --- a/sv.c +++ b/sv.c @@ -13251,7 +13251,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p { STATIC_ASSERT_STMT(sizeof(ebuf) >= UTF8_MAXBYTES + 1); eptr = ebuf; - elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf; + elen = uv_to_utf8((U8*)eptr, uv) - (U8*)ebuf; is_utf8 = TRUE; } else { diff --git a/toke.c b/toke.c index 31566001c0..c60d301ebf 100644 --- a/toke.c +++ b/toke.c @@ -775,7 +775,7 @@ S_missingterm(pTHX_ char *s, STRLEN len) len = 1; } else { - char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); + char *end = (char *) uv_to_utf8((U8 *)tmpbuf, PL_multi_close); *end = '\0'; len = end - tmpbuf; uni = TRUE; @@ -3620,7 +3620,7 @@ S_scan_const(pTHX_ char *start) if (real_range_max > 0x101) { *d++ = (char) RANGE_INDICATOR; } - d = (char*)uvchr_to_utf8((U8*)d, real_range_max); + d = (char*) uv_to_utf8((U8*)d, real_range_max); } } #endif @@ -3907,7 +3907,7 @@ S_scan_const(pTHX_ char *start) d = SvCUR(sv) + SvGROW(sv, needed); } - d = (char*) uvchr_to_utf8_flags((U8*)d, uv, + d = (char*) uv_to_utf8_flags((U8*)d, uv, (ckWARN(WARN_PORTABLE)) ? UNICODE_WARN_PERL_EXTENDED : 0); @@ -13294,7 +13294,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) } /* Append native character for the rev point */ - tmpend = uvchr_to_utf8(tmpbuf, rev); + tmpend = uv_to_utf8(tmpbuf, rev); sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); if (!UVCHR_IS_INVARIANT(rev)) SvUTF8_on(sv);