malformed utf8 message: use symbol instead of magic number with comment

symbols expressing meaning are easier to understand, easier to grep,
as well as resistant to change value without changing comment
This commit is contained in:
Branislav Zahradník 2023-11-04 06:08:32 +01:00 committed by Karl Williamson
parent 6c2ae79c9f
commit 319f5a3f97
7 changed files with 24 additions and 21 deletions

4
doop.c
View File

@ -373,7 +373,7 @@ S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap)
else {
from = utf8_to_uvchr_buf(s, send, &s_len);
if (from == 0 && *s != '\0') {
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
}
}
@ -492,7 +492,7 @@ S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap)
else {
from = utf8_to_uvchr_buf(s, send, &s_len);
if (from == 0 && *s != '\0') {
_force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE);
_force_out_malformed_utf8_message(s, send, 0, MALFORMED_UTF8_DIE);
}
}

14
handy.h
View File

@ -1436,7 +1436,7 @@ or casts
/* Likewise, this is effectively a static assert to be used to guarantee the
* parameter is a pointer
*
* NOT suitable for void*
* NOT suitable for void*
*/
#define ASSERT_IS_PTR(x) (__ASSERT_(sizeof(*(x))) (x))
@ -2276,7 +2276,7 @@ END_EXTERN_C
#define generic_utf8_safe_(classnum, p, e, above_latin1) \
((! _utf8_safe_assert(p, e)) \
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, 1), 0)\
? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)\
: (UTF8_IS_INVARIANT(*(p))) \
? generic_isCC_(*(p), classnum) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p)) \
@ -2284,7 +2284,7 @@ END_EXTERN_C
? generic_isCC_(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1 )), \
classnum) \
: (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
: above_latin1))
/* Like the above, but calls 'above_latin1(p)' to get the utf8 value.
* 'above_latin1' can be a macro */
@ -2294,7 +2294,7 @@ END_EXTERN_C
generic_utf8_safe_(classnum, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
: above_latin1(p)))
/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
* 'above_latin1' parameter */
@ -2384,7 +2384,7 @@ END_EXTERN_C
generic_utf8_safe_no_upper_latin1_(CC_XDIGIT_, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
: is_XDIGIT_high(p)))
#define toFOLD_utf8(p,e,s,l) toFOLD_utf8_safe(p,e,s,l)
@ -2433,7 +2433,7 @@ END_EXTERN_C
? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \
? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1))) \
: (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0)) \
: above_latin1))
#define generic_LC_invlist_utf8_safe_(macro, classnum, p, e) \
@ -2447,7 +2447,7 @@ END_EXTERN_C
generic_LC_utf8_safe_(classnum, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
(U8 *) (p), (U8 *) (e), 0, MALFORMED_UTF8_DIE), 0) \
: above_latin1(p)))
#define isALPHANUMERIC_LC_utf8_safe(p, e) \

View File

@ -32,7 +32,7 @@
#define PERL_IN_PP_PACK_C
#include "perl.h"
/* Types used by pack/unpack */
/* Types used by pack/unpack */
typedef enum {
e_no_len, /* no length */
e_number, /* number, [] */
@ -48,7 +48,7 @@ typedef struct tempsym {
U32 flags; /* /=4, comma=2, pack=1 */
/* and group modifiers */
SSize_t length; /* length/repeat count */
howlen_t howlen; /* how length is given */
howlen_t howlen; /* how length is given */
int level; /* () nesting level */
STRLEN strbeg; /* offset of group start */
struct tempsym *previous; /* previous group */
@ -3167,7 +3167,7 @@ PP_wrapped(pp_pack, 0, 1)
_force_out_malformed_utf8_message(error_pos,
(U8 *) result + result_len,
0, /* no flags */
1 /* Die */
MALFORMED_UTF8_DIE
);
NOT_REACHED; /* NOTREACHED */
}

View File

@ -8041,7 +8041,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
break;
}
} while(n);
if (!n) /* this means there is nothing that matched */
sayNO;
}
@ -10921,7 +10921,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
if (c_len == (STRLEN)-1) {
_force_out_malformed_utf8_message(p, p_end,
utf8n_flags,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
if ( c > 255

10
toke.c
View File

@ -972,7 +972,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) s + SvCUR(line),
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
else if (ONLY_ASCII && UNLIKELY(! is_ascii_string_loc(
@ -1589,7 +1589,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags)
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_parser->bufend,
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
}
@ -1679,7 +1679,7 @@ Perl_lex_peek_unichar(pTHX_ U32 flags)
_force_out_malformed_utf8_message((U8 *) s,
(U8 *) bufend,
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
return unichar;
@ -3058,7 +3058,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_parser->bufend,
0,
0 /* 0 means don't die */ );
MALFORMED_UTF8_WARN);
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
*error_msg = Perl_form(aTHX_
@ -9656,7 +9656,7 @@ Perl_yylex(pTHX)
_force_out_malformed_utf8_message(first_bad_char_loc,
(U8 *) PL_bufend,
0,
1 /* 1 means die */ );
MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
else if (ONLY_ASCII && UNLIKELY(! is_ascii_string_loc(

4
utf8.c
View File

@ -3289,7 +3289,7 @@ S_is_utf8_common(pTHX_ const U8 *const p, const U8 * const e,
PERL_ARGS_ASSERT_IS_UTF8_COMMON;
if (cp == 0 && (p >= e || *p != '\0')) {
_force_out_malformed_utf8_message(p, e, 0, 1);
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE);
NOT_REACHED; /* NOTREACHED */
}
@ -3834,7 +3834,7 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
STRLEN len_result; \
result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \
if (len_result == (STRLEN) -1) { \
_force_out_malformed_utf8_message(p, e, 0, 1 /* Die */ ); \
_force_out_malformed_utf8_message(p, e, 0, MALFORMED_UTF8_DIE ); \
}
#define CASE_CHANGE_BODY_END(locale_flags, change_macro) \

3
utf8.h
View File

@ -1304,6 +1304,9 @@ point's representation.
* retained solely for backwards compatibility */
#define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n)
#define MALFORMED_UTF8_DIE TRUE
#define MALFORMED_UTF8_WARN FALSE
#endif /* PERL_UTF8_H_ */
/*