intl: Support the AIX 7 locale names.

* gettext-runtime/intl/explodename.c (_nl_find_language): Remove function.
(_nl_explode_name): Inline it here. On AIX, lowercase the language and map a
script identifier to a modifier.
* NEWS: Mention it.
This commit is contained in:
Bruno Haible 2020-12-10 17:01:43 +01:00
parent a9a2a75f46
commit 68fce70127
2 changed files with 87 additions and 34 deletions

6
NEWS
View File

@ -5,6 +5,12 @@ Version 0.21.1 - December 2020
(of type 'const wchar_t *', 'const char16_t *', or 'const char32_t *') as
arguments.
* Runtime behaviour:
- On AIX, locale names with a script or with an uppercase language are now
supported.
For example, sr_Cyrl_RS.UTF-8 is treated like sr_RS.UTF-8@cyrillic, and
EN_US.UTF-8 is treated like en_US.UTF-8.
Version 0.21 - July 2020
* Programming languages support:

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
/* Copyright (C) 1995-2016, 2020 Free Software Foundation, Inc.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
This program is free software: you can redistribute it and/or modify
@ -35,20 +35,6 @@
/* @@ end of prolog @@ */
/* Split a locale name NAME into a leading language part and all the
rest. Return a pointer to the first character after the language,
i.e. to the first byte of the rest. */
static char *_nl_find_language (const char *name);
static char *
_nl_find_language (const char *name)
{
while (name[0] != '\0' && name[0] != '_' && name[0] != '@' && name[0] != '.')
++name;
return (char *) name;
}
int
_nl_explode_name (char *name,
@ -64,23 +50,84 @@ _nl_explode_name (char *name,
*codeset = NULL;
*normalized_codeset = NULL;
/* Now we determine the single parts of the locale name. First
look for the language. Termination symbols are `_', '.', and `@'. */
mask = 0;
*language = cp = name;
cp = _nl_find_language (*language);
/* Determine the individual parts of the locale name.
Accept the XPG syntax
if (*language == cp)
language[_territory][.codeset][@modifier]
On AIX systems, also accept the same syntax with an uppercased language,
and a syntax similar to RFC 5646:
language[_script]_territory[.codeset]
where script is a four-letter code for a script, per ISO 15924.
*/
mask = 0;
/* First look for the language. Termination symbols are `_', '.', and `@'. */
*language = name;
cp = name;
while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '.')
++cp;
if (cp == name)
/* This does not make sense: language has to be specified. Use
this entry as it is without exploding. Perhaps it is an alias. */
cp = strchr (*language, '\0');
cp = strchr (name, '\0');
else
{
if (cp[0] == '_')
{
*cp++ = '\0';
#if defined _AIX
/* Lowercase the language. */
{
char *lcp;
for (lcp = name; lcp < cp; lcp++)
if (*lcp >= 'A' && *lcp <= 'Z')
*lcp += 'a' - 'A';
}
/* Next is the script or the territory. It depends on whether
there is another '_'. */
char *next = cp;
while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '.')
++cp;
if (cp[0] == '_')
{
*cp++ = '\0';
/* Next is the script. Translate the script to a modifier.
We don't need to support all of ISO 15924 here, only those
scripts that actually occur:
Latn -> latin
Cyrl -> cyrillic
Guru -> gurmukhi
Hans -> (omitted, redundant with the territory CN or SG)
Hant -> (omitted, redundant with the territory TW or HK) */
if (strcmp (next, "Latn") == 0)
*modifier = "latin";
else if (strcmp (next, "Cyrl") == 0)
*modifier = "cyrillic";
else if (strcmp (next, "Guru") == 0)
*modifier = "gurmukhi";
else if (!(strcmp (next, "Hans") == 0
|| strcmp (next, "Hant") == 0))
*modifier = next;
if (*modifier != NULL && (*modifier)[0] != '\0')
mask |= XPG_MODIFIER;
}
else
cp = next;
#endif
/* Next is the territory. */
cp[0] = '\0';
*territory = ++cp;
*territory = cp;
while (cp[0] != '\0' && cp[0] != '.' && cp[0] != '@')
++cp;
@ -91,8 +138,8 @@ _nl_explode_name (char *name,
if (cp[0] == '.')
{
/* Next is the codeset. */
cp[0] = '\0';
*codeset = ++cp;
*cp++ = '\0';
*codeset = cp;
while (cp[0] != '\0' && cp[0] != '@')
++cp;
@ -111,16 +158,16 @@ _nl_explode_name (char *name,
mask |= XPG_NORM_CODESET;
}
}
}
if (cp[0] == '@')
{
/* Next is the modifier. */
cp[0] = '\0';
*modifier = ++cp;
if (cp[0] == '@')
{
/* Next is the modifier. */
*cp++ = '\0';
*modifier = cp;
if (cp[0] != '\0')
mask |= XPG_MODIFIER;
if (cp[0] != '\0')
mask |= XPG_MODIFIER;
}
}
if (*territory != NULL && (*territory)[0] == '\0')