Fixes to utf8-to_bytes derivative functions

This commit turns them into inline functions instead of macros and
changes the type of a parameter to void*, which is a more accurate type
for it.
This commit is contained in:
Karl Williamson 2025-01-19 05:18:59 -07:00 committed by Karl Williamson
parent 9de1a49435
commit 3ed37686f3
8 changed files with 75 additions and 46 deletions

View File

@ -3711,16 +3711,16 @@ Adpx |U8 * |utf8_to_bytes |NN U8 *s \
|NN STRLEN *lenp
Cp |bool |utf8_to_bytes_ |NN U8 **s_ptr \
|NN STRLEN *lenp \
|NN U8 **free_me \
|NN void **free_me \
|Perl_utf8_to_bytes_arg result_as
Admp |bool |utf8_to_bytes_new_pv \
Adip |bool |utf8_to_bytes_new_pv \
|NN U8 const **s_ptr \
|NN STRLEN *lenp \
|NN U8 *free_me
Admp |bool |utf8_to_bytes_overwrite \
|NN void **free_me
Adip |bool |utf8_to_bytes_overwrite \
|NN U8 **s_ptr \
|NN STRLEN *lenp
Admp |bool |utf8_to_bytes_temp_pv \
Adip |bool |utf8_to_bytes_temp_pv \
|NN U8 const **s_ptr \
|NN STRLEN *lenp
EMXp |U8 * |utf16_to_utf8 |NN U8 *p \

View File

@ -863,9 +863,9 @@
# define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b)
# define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
# define utf8_to_bytes_(a,b,c,d) Perl_utf8_to_bytes_(aTHX_ a,b,c,d)
# define utf8_to_bytes_new_pv(a,b,c) Perl_utf8_to_bytes_new_pv(aTHX,a,b,c)
# define utf8_to_bytes_overwrite(a,b) Perl_utf8_to_bytes_overwrite(aTHX,a,b)
# define utf8_to_bytes_temp_pv(a,b) Perl_utf8_to_bytes_temp_pv(aTHX,a,b)
# define utf8_to_bytes_new_pv(a,b,c) Perl_utf8_to_bytes_new_pv(aTHX_ a,b,c)
# define utf8_to_bytes_overwrite(a,b) Perl_utf8_to_bytes_overwrite(aTHX_ a,b)
# define utf8_to_bytes_temp_pv(a,b) Perl_utf8_to_bytes_temp_pv(aTHX_ a,b)
# define utf8_to_uv Perl_utf8_to_uv
# define utf8_to_uv_errors Perl_utf8_to_uv_errors
# define utf8_to_uv_flags Perl_utf8_to_uv_flags

16
hv.c
View File

@ -1338,9 +1338,9 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
if (is_utf8 && !(k_flags & HVhek_KEYCANONICAL)) {
const char * const keysave = key;
U8 * free_me = NULL;
void * free_me = NULL;
if (! utf8_to_bytes_new_pv(&key, &klen, &free_me)) {
if (! utf8_to_bytes_new_pv((const U8 **) &key, &klen, &free_me)) {
k_flags |= HVhek_UTF8;
}
else {
@ -3270,8 +3270,8 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash)
} else if (len < 0) {
STRLEN tmplen = -len;
/* See the note in hv_fetch(). --jhi */
U8 * free_str = NULL;
if (! utf8_to_bytes_new_pv(&str, &tmplen, &free_str)) {
void * free_str = NULL;
if (! utf8_to_bytes_new_pv((const U8 **) &str, &tmplen, &free_str)) {
k_flags = HVhek_UTF8;
}
else {
@ -3687,7 +3687,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN;
U8 utf8_flag;
U8 * free_me = NULL;
void * free_me = NULL;
if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS))
Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf,
@ -3696,7 +3696,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
goto ret;
/* For searching purposes, canonicalise to Latin-1 where possible. */
if ( flags & REFCOUNTED_HE_KEY_UTF8
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
&& utf8_to_bytes_new_pv((const U8 **) &keypv, &keylen, &free_me))
{
flags &= ~REFCOUNTED_HE_KEY_UTF8;
}
@ -3821,7 +3821,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
char hekflags;
STRLEN key_offset = 1;
struct refcounted_he *he;
U8 * free_me = NULL;
void * free_me = NULL;
if (!value || value == &PL_sv_placeholder) {
value_type = HVrhek_delete;
@ -3847,7 +3847,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
/* Canonicalise to Latin-1 where possible. */
if ( (flags & REFCOUNTED_HE_KEY_UTF8)
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
&& utf8_to_bytes_new_pv((const U8 **) &keypv, &keylen, &free_me))
{
flags &= ~REFCOUNTED_HE_KEY_UTF8;
}

View File

@ -1,4 +1,4 @@
/* inline.h
/*> inline.h
*
* Copyright (C) 2012 by Larry Wall and others
*
@ -1236,6 +1236,40 @@ Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp)
return bytes_to_utf8_free_me(s, lenp, NULL);
}
PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void ** free_me)
{
/* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
* change it, but NOT when called with PL_utf8_to_bytes_new_memory, so it
* is ok to cast away const */
return utf8_to_bytes_((U8 **) s_ptr, lenp, free_me,
PL_utf8_to_bytes_new_memory);
}
PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp)
{
/* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
* called with PL_utf8_to_bytes_use_temporary */
void* dummy = NULL;
/* utf8_to_bytes_() is declared to take a non-const s_ptr because it may
* change it, but NOT when called with PL_utf8_to_bytes_use_temporary, so
* it is ok to cast away const */
return utf8_to_bytes_((U8 **) s_ptr, lenp, &dummy,
PL_utf8_to_bytes_use_temporary);
}
PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp)
{
/* utf8_to_bytes_() requires a non-NULL pointer, but doesn't use it when
* called with PL_utf8_to_bytes_overwrite */
void* dummy = NULL;
return utf8_to_bytes_(s_ptr, lenp, &dummy, PL_utf8_to_bytes_overwrite);
}
/*
=for apidoc valid_utf8_to_uvchr
Like C<L<perlapi/utf8_to_uvchr_buf>>, but should only be called when it is

10
pp.c
View File

@ -793,7 +793,7 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
s = SvPV(sv, len);
if (chomping) {
if (s && len) {
U8 *temp_buffer = NULL;
void *temp_buffer = NULL;
s += --len;
if (RsPARA(PL_rs)) {
if (*s != '\n')
@ -817,7 +817,7 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
/* Assumption is that rs is shorter than the scalar. */
if (SvUTF8(PL_rs)) {
/* RS is utf8, scalar is 8 bit. */
if (! utf8_to_bytes_new_pv(&rsptr, &rslen,
if (! utf8_to_bytes_new_pv((const U8 **) &rsptr, &rslen,
&temp_buffer))
{
/* Cannot downgrade, therefore cannot possibly
@ -3911,8 +3911,10 @@ PP(pp_index)
if (little_utf8) {
/* Well, maybe instead we might be able to downgrade the small
string? */
U8 * free_little_p = NULL;
if (utf8_to_bytes_new_pv(&little_p, &llen, &free_little_p)) {
void * free_little_p = NULL;
if (utf8_to_bytes_new_pv((const U8 **) &little_p, &llen,
&free_little_p))
{
little_utf8 = false;
/* Here 'little_p' is in byte form, and 'free_little_p' is

26
proto.h generated
View File

@ -5350,19 +5350,10 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp);
assert(s); assert(lenp)
PERL_CALLCONV bool
Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, U8 **free_me, Perl_utf8_to_bytes_arg result_as);
Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void **free_me, Perl_utf8_to_bytes_arg result_as);
#define PERL_ARGS_ASSERT_UTF8_TO_BYTES_ \
assert(s_ptr); assert(lenp); assert(free_me)
/* PERL_CALLCONV bool
Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, U8 *free_me); */
/* PERL_CALLCONV bool
Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp); */
/* PERL_CALLCONV bool
Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp); */
PERL_CALLCONV U8 *
Perl_utf8_to_utf16_base(pTHX_ U8 *s, U8 *d, Size_t bytelen, Size_t *newlen, const bool high, const bool low);
#define PERL_ARGS_ASSERT_UTF8_TO_UTF16_BASE \
@ -10039,6 +10030,21 @@ Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, const U8 * const start, const
# define PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT \
assert(s); assert(start); assert(end)
PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void **free_me);
# define PERL_ARGS_ASSERT_UTF8_TO_BYTES_NEW_PV \
assert(s_ptr); assert(lenp); assert(free_me)
PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp);
# define PERL_ARGS_ASSERT_UTF8_TO_BYTES_OVERWRITE \
assert(s_ptr); assert(lenp)
PERL_STATIC_INLINE bool
Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp);
# define PERL_ARGS_ASSERT_UTF8_TO_BYTES_TEMP_PV \
assert(s_ptr); assert(lenp)
PERL_STATIC_INLINE bool
Perl_utf8_to_uv_msgs(const U8 * const s0, const U8 *e, UV *cp_p, Size_t *advance_p, U32 flags, U32 *errors, AV **msgs);
# define PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS \

4
utf8.c
View File

@ -2940,7 +2940,7 @@ New code should use the first three functions listed above.
*/
bool
Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, U8 ** free_me,
Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, void ** free_me,
Perl_utf8_to_bytes_arg result_as)
{
PERL_ARGS_ASSERT_UTF8_TO_BYTES_;
@ -3219,7 +3219,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p)
PERL_ARGS_ASSERT_BYTES_FROM_UTF8;
if (*is_utf8p) {
U8 * new_memory = NULL;
void * new_memory = NULL;
if (utf8_to_bytes_new_pv(&s, lenp, &new_memory)) {
*is_utf8p = false;

13
utf8.h
View File

@ -1336,19 +1336,6 @@ typedef enum {
PL_utf8_to_bytes_use_temporary,
} Perl_utf8_to_bytes_arg;
/* INT2PTR() is because this parameter should not be used in this case, but
* there is a NN assertion for it. It causes that to pass but to still
* segfault if wrongly gets used */
#define Perl_utf8_to_bytes_overwrite(mTHX, s, l) \
Perl_utf8_to_bytes_(aTHX_ s, l, INT2PTR(U8 **, 1), \
PL_utf8_to_bytes_overwrite)
#define Perl_utf8_to_bytes_new_pv(mTHX, s, l, f) \
Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, f, \
PL_utf8_to_bytes_new_memory)
#define Perl_utf8_to_bytes_temp_pv(mTHX, s, l) \
Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, INT2PTR(U8 **, 1), \
PL_utf8_to_bytes_use_temporary)
/* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is
* retained solely for backwards compatibility */
#define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)