mirror of
https://github.com/Perl/perl5.git
synced 2026-01-27 01:44:43 +00:00
Avoid mojibake in "$!"
In stress testing, I discovered that the LC_CTYPE and LC_MESSAGES locales need to be the same locale, or strerror() can return question marks or mojibake instead of the proper message. This commit refactors the handling of stringifying "$!" to make the locales of both categories the same during the stringification. Actually, I suspect it isn't the locale, but the codeset of the locale that needs to be the same. I suspect that if the categories were both in different UTF-8 locales, or both in single-byte locales, that things would work fine. But it's cheaper to find the locale rather than the locale's codeset, so that is what is done.
This commit is contained in:
parent
091b5ed710
commit
cb5c690da6
245
locale.c
245
locale.c
@ -6707,8 +6707,27 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
|
||||
* LC_MESSAGES, it uses whatever strerror() returns. Otherwise the text is
|
||||
* derived from the locale, LC_MESSAGES if we have that; LC_CTYPE if not.
|
||||
*
|
||||
* The function just calls strerror(), but temporarily switches, if needed, to
|
||||
* the C locale */
|
||||
* The function just calls strerror(), but temporarily switches locales, if
|
||||
* needed. Many platforms require LC_CTYPE and LC_MESSAGES to be in the same
|
||||
* CODESET in order for the return from strerror() to not contain '?' symbols,
|
||||
* or worse, mojibaked. It's cheaper to just use the stricter criteria of
|
||||
* being in the same locale. So the code below uses a common locale for both
|
||||
* categories. Again, that is C if not within 'use locale' scope; or the
|
||||
* LC_MESSAGES locale if in scope and we have that category; and LC_CTYPE if we
|
||||
* don't have LC_MESSAGES; and whatever strerror returns if we don't have
|
||||
* either category.
|
||||
*
|
||||
* There are two sets of implementations. The first below is if we have
|
||||
* strerror_l(). This is the simpler. We just use the already-built C locale
|
||||
* object if not in locale scope, or build up a custom one otherwise.
|
||||
*
|
||||
* When strerror_l() is not available, we may have to swap locales temporarily
|
||||
* to bring the two categories into sync with each other, and possibly to the C
|
||||
* locale.
|
||||
*
|
||||
* Because the prepropessing directives to conditionally compile this function
|
||||
* would greatly obscure the logic of the various implementations, the whole
|
||||
* function is repeated for each configuration, with some common macros. */
|
||||
|
||||
/* Used to shorten the definitions of the following implementations of
|
||||
* my_strerror() */
|
||||
@ -6723,161 +6742,184 @@ Perl__is_in_locale_category(pTHX_ const bool compiling, const int category)
|
||||
print_bytes_for_locale(errstr, errstr + strlen(errstr), 0); \
|
||||
PerlIO_printf(Perl_debug_log, "'\n");)
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifndef USE_LOCALE_MESSAGES
|
||||
/* On platforms that have precisely one of these categories (Windows
|
||||
* qualifies), these yield the correct one */
|
||||
#if defined(USE_LOCALE_CTYPE)
|
||||
# define WHICH_LC_INDEX LC_CTYPE_INDEX_
|
||||
#elif defined(USE_LOCALE_MESSAGES)
|
||||
# define WHICH_LC_INDEX LC_MESSAGES_INDEX_
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* First set of implementations, when have strerror_l() */
|
||||
|
||||
#if defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L)
|
||||
|
||||
# if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES)
|
||||
|
||||
/* Here, neither category is defined: use the C locale */
|
||||
char *
|
||||
Perl_my_strerror(pTHX_ const int errnum)
|
||||
{
|
||||
char *errstr;
|
||||
PERL_ARGS_ASSERT_MY_STRERROR;
|
||||
|
||||
DEBUG_STRERROR_ENTER(errnum, -1); /* Within locale scope is immaterial */
|
||||
DEBUG_STRERROR_ENTER(errnum, 0);
|
||||
|
||||
errstr = savepv(Strerror(errnum));
|
||||
char *errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
|
||||
|
||||
DEBUG_STRERROR_RETURN(errstr);
|
||||
|
||||
SAVEFREEPV(errstr);
|
||||
return errstr;
|
||||
}
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#else
|
||||
|
||||
/* The rest of the invocations all share the same beginning, so show that: */
|
||||
# elif ! defined(USE_LOCALE_CTYPE) || ! defined(USE_LOCALE_MESSAGES)
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
/* Here one or the other of CTYPE or MESSAGES is defined, but not both. If we
|
||||
* are not within 'use locale' scope of the only one defined, we use the C
|
||||
* locale; otherwise use the current locale object */
|
||||
|
||||
char *
|
||||
Perl_my_strerror(pTHX_ const int errnum)
|
||||
{
|
||||
char *errstr;
|
||||
const bool within_locale_scope = IN_LC(LC_MESSAGES);
|
||||
PERL_ARGS_ASSERT_MY_STRERROR;
|
||||
|
||||
DEBUG_STRERROR_ENTER(errnum, within_locale_scope);
|
||||
/*--------------------------------------------------------------------------*/
|
||||
# if ! defined(USE_LOCALE_THREADS)
|
||||
DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX]));
|
||||
|
||||
/* This function is also pretty trivial without threads. */
|
||||
if (within_locale_scope) {
|
||||
errstr = savepv(Strerror(errnum));
|
||||
}
|
||||
else {
|
||||
const char * save_locale = querylocale_c(LC_MESSAGES);
|
||||
/* Use C if not within locale scope; Otherwise, use current locale */
|
||||
const locale_t which_obj = (IN_LC(categories[WHICH_LC_INDEX]))
|
||||
? PL_C_locale_obj
|
||||
: use_curlocale_scratch();
|
||||
|
||||
void_setlocale_c(LC_MESSAGES, "C");
|
||||
errstr = savepv(Strerror(errnum));
|
||||
void_setlocale_c(LC_MESSAGES, save_locale);
|
||||
}
|
||||
char *errstr = savepv(strerror_l(errnum, which_obj));
|
||||
|
||||
DEBUG_STRERROR_RETURN(errstr);
|
||||
|
||||
SAVEFREEPV(errstr);
|
||||
return errstr;
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
# elif defined(USE_POSIX_2008_LOCALE) \
|
||||
&& defined(HAS_STRERROR_L) \
|
||||
&& defined(HAS_STRERROR_R)
|
||||
|
||||
/* This function is also trivial if we don't have to worry about thread
|
||||
* safety and have strerror_l(), as it handles the switch of locales so we
|
||||
* don't have to deal with that. We don't have to worry about thread
|
||||
* safety if strerror_r() is also available. Both it and strerror_l() are
|
||||
* thread-safe. Plain strerror() isn't thread safe. But on threaded
|
||||
* builds when strerror_r() is available, the apparent call to strerror()
|
||||
* below is actually a macro that behind-the-scenes calls strerror_r(). */
|
||||
# else /* Are using both categories. Place them in the same CODESET,
|
||||
* either C or the LC_MESSAGES locale */
|
||||
|
||||
if (within_locale_scope) {
|
||||
errstr = savepv(Strerror(errnum));
|
||||
}
|
||||
else {
|
||||
char *
|
||||
Perl_my_strerror(pTHX_ const int errnum)
|
||||
{
|
||||
PERL_ARGS_ASSERT_MY_STRERROR;
|
||||
|
||||
DEBUG_STRERROR_ENTER(errnum, IN_LC(LC_MESSAGES));
|
||||
|
||||
char *errstr;
|
||||
if (! IN_LC(LC_MESSAGES)) { /* Use C if not within locale scope */
|
||||
errstr = savepv(strerror_l(errnum, PL_C_locale_obj));
|
||||
}
|
||||
else { /* Otherwise, use the LC_MESSAGES locale, making sure LC_CTYPE
|
||||
matches */
|
||||
locale_t cur = duplocale(use_curlocale_scratch());
|
||||
|
||||
cur = newlocale(LC_CTYPE_MASK, querylocale_c(LC_MESSAGES), cur);
|
||||
errstr = savepv(strerror_l(errnum, cur));
|
||||
freelocale(cur);
|
||||
}
|
||||
|
||||
DEBUG_STRERROR_RETURN(errstr);
|
||||
|
||||
SAVEFREEPV(errstr);
|
||||
return errstr;
|
||||
}
|
||||
# endif /* Above is using strerror_l */
|
||||
|
||||
/*==========================================================================*/
|
||||
#else /* Below is not using strerror_l */
|
||||
# if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES)
|
||||
|
||||
/* If not using using either of the categories, return plain, unadorned
|
||||
* strerror */
|
||||
|
||||
char *
|
||||
Perl_my_strerror(pTHX_ const int errnum)
|
||||
{
|
||||
PERL_ARGS_ASSERT_MY_STRERROR;
|
||||
|
||||
DEBUG_STRERROR_ENTER(errnum, 0);
|
||||
|
||||
char *errstr = savepv(Strerror(errnum));
|
||||
|
||||
DEBUG_STRERROR_RETURN(errstr);
|
||||
|
||||
SAVEFREEPV(errstr);
|
||||
return errstr;
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
# elif defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L)
|
||||
# elif ! defined(USE_LOCALE_CTYPE) || ! defined(USE_LOCALE_MESSAGES)
|
||||
|
||||
/* It's a little more complicated with strerror_l() but strerror_r() is not
|
||||
* available. We use strerror_l() for everything, constructing a locale to
|
||||
* pass to it if necessary */
|
||||
/* Here one or the other of CTYPE or MESSAGES is defined, but not both. If we
|
||||
* are not within 'use locale' scope of the only one defined, we use the C
|
||||
* locale; otherwise use the current locale */
|
||||
|
||||
locale_t locale_to_use;
|
||||
char *
|
||||
Perl_my_strerror(pTHX_ const int errnum)
|
||||
{
|
||||
PERL_ARGS_ASSERT_MY_STRERROR;
|
||||
|
||||
if (within_locale_scope) {
|
||||
locale_to_use = use_curlocale_scratch();
|
||||
}
|
||||
else { /* Use C locale if not within 'use locale' scope */
|
||||
locale_to_use = PL_C_locale_obj;
|
||||
DEBUG_STRERROR_ENTER(errnum, IN_LC(categories[WHICH_LC_INDEX]));
|
||||
|
||||
char *errstr;
|
||||
|
||||
if (IN_LC(categories[WHICH_LC_INDEX])) {
|
||||
errstr = savepv(Strerror(errnum));
|
||||
}
|
||||
else {
|
||||
|
||||
errstr = savepv(strerror_l(errnum, locale_to_use));
|
||||
SETLOCALE_LOCK;
|
||||
|
||||
const char * orig_locale = toggle_locale_i(WHICH_LC_INDEX, "C");
|
||||
|
||||
errstr = savepv(Strerror(errnum));
|
||||
|
||||
restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale);
|
||||
|
||||
SETLOCALE_UNLOCK;
|
||||
}
|
||||
|
||||
DEBUG_STRERROR_RETURN(errstr);
|
||||
|
||||
SAVEFREEPV(errstr);
|
||||
return errstr;
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
# else
|
||||
|
||||
/* And most complicated of all is without strerror_l(). */
|
||||
/* Below, have both LC_CTYPE and LC_MESSAGES. Place them in the same CODESET,
|
||||
* either C or the LC_MESSAGES locale */
|
||||
|
||||
const char * save_locale = NULL;
|
||||
bool locale_is_C = FALSE;
|
||||
char *
|
||||
Perl_my_strerror(pTHX_ const int errnum)
|
||||
{
|
||||
PERL_ARGS_ASSERT_MY_STRERROR;
|
||||
|
||||
DEBUG_STRERROR_ENTER(errnum, IN_LC(LC_MESSAGES));
|
||||
|
||||
const char * desired_locale = (IN_LC(LC_MESSAGES))
|
||||
? querylocale_c(LC_MESSAGES)
|
||||
: "C";
|
||||
/* XXX Can fail on z/OS */
|
||||
|
||||
/* We have a critical section to prevent another thread from executing this
|
||||
* same code at the same time which could cause LC_MESSAGES to be changed
|
||||
* to something else while we need it to be constant. (On thread-safe
|
||||
* perls, the LOCK is a no-op.) Since this is the only place in core that
|
||||
* changes LC_MESSAGES (unless the user has called setlocale()), this works
|
||||
* to prevent races. */
|
||||
SETLOCALE_LOCK;
|
||||
|
||||
DEBUG_STRERROR_ENTER(errnum, within_locale_scope);
|
||||
const char* orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, desired_locale);
|
||||
const char* orig_MESSAGES_locale = toggle_locale_c(LC_MESSAGES,
|
||||
desired_locale);
|
||||
char *errstr = savepv(Strerror(errnum));
|
||||
|
||||
/* If not within locale scope, need to return messages in the C locale */
|
||||
if (! within_locale_scope) {
|
||||
save_locale = querylocale_c(LC_MESSAGES);
|
||||
if (! save_locale) {
|
||||
SETLOCALE_UNLOCK;
|
||||
locale_panic_("Could not find current LC_MESSAGES locale");
|
||||
NOT_REACHED; /* NOTREACHED */ \
|
||||
}
|
||||
|
||||
locale_is_C = isNAME_C_OR_POSIX(save_locale);
|
||||
|
||||
/* Switch to the C locale if not already in it */
|
||||
if (! locale_is_C && ! bool_setlocale_c(LC_MESSAGES, "C")) {
|
||||
|
||||
/* If, for some reason, the locale change failed, we soldier on as
|
||||
* best as possible under the circumstances, using the current
|
||||
* locale, and clear save_locale, so we don't try to change back.
|
||||
* On z/0S, all setlocale() calls fail after you've created a
|
||||
* thread. This is their way of making sure the entire process is
|
||||
* always a single locale. This means that 'use locale' is always
|
||||
* in place for messages under these circumstances. */
|
||||
save_locale = NULL;
|
||||
}
|
||||
} /* end of ! within_locale_scope */
|
||||
|
||||
DEBUG_Lv(PerlIO_printf(Perl_debug_log,
|
||||
"Any locale change has been done; about to call Strerror\n"));
|
||||
errstr = savepv(Strerror(errnum));
|
||||
|
||||
/* Switch back if we successully switched */
|
||||
if ( save_locale
|
||||
&& ! locale_is_C
|
||||
&& ! bool_setlocale_c(LC_MESSAGES, save_locale))
|
||||
{
|
||||
SETLOCALE_UNLOCK;
|
||||
locale_panic_(Perl_form(aTHX_
|
||||
"setlocale restore to '%s' failed",
|
||||
save_locale));
|
||||
NOT_REACHED; /* NOTREACHED */ \
|
||||
}
|
||||
restore_toggled_locale_c(LC_MESSAGES, orig_MESSAGES_locale);
|
||||
restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale);
|
||||
|
||||
SETLOCALE_UNLOCK;
|
||||
|
||||
@ -6887,7 +6929,8 @@ Perl_my_strerror(pTHX_ const int errnum)
|
||||
return errstr;
|
||||
}
|
||||
|
||||
# endif
|
||||
/*--------------------------------------------------------------------------*/
|
||||
# endif /* end of not using strerror_l() */
|
||||
#endif /* end of all the my_strerror() implementations */
|
||||
|
||||
/*
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user