mirror of
https://https.git.savannah.gnu.org/git/gettext.git
synced 2026-01-25 23:27:58 +00:00
cldr-plurals: Fix parsing of plurals.xml from CLDR 38 or newer.
Reported by Michele Locati at <https://savannah.gnu.org/bugs/?66378>. * gettext-tools/src/cldr-plural-exp.c (eval_relation): Accept operands 'c', 'e'. * gettext-tools/src/cldr-plural.y (yylex): Likewise. * gettext-tools/src/cldr-plurals.c (extract_rules): Test for type='cardinal'. (get_XcY_end, force_spaces, remove_XcY, remove_empty_examples): New functions. (main): Invoke force_spaces, remove_XcY, remove_empty_examples. * gettext-tools/tests/cldr-plurals-1: Add more tests. * AUTHORS: Add Michele Locati.
This commit is contained in:
parent
c558619bb3
commit
77441e0283
3
AUTHORS
3
AUTHORS
@ -91,3 +91,6 @@ Assigns past and future changes.
|
||||
GETTEXT John Darrington
|
||||
Assigns past and future changes.
|
||||
(No contributions so far.)
|
||||
|
||||
GETTEXT Michele Locati
|
||||
Assigns past and future changes.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Unicode CLDR plural rule parser and converter.
|
||||
Copyright (C) 2015-2025 Free Software Foundation, Inc.
|
||||
Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
|
||||
This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
|
||||
|
||||
@ -133,10 +133,11 @@ eval_relation (struct cldr_plural_relation_ty *relation)
|
||||
break;
|
||||
case 'f': case 't':
|
||||
case 'v': case 'w':
|
||||
case 'c': case 'e':
|
||||
{
|
||||
/* Since plural expression in gettext only supports unsigned
|
||||
integer, turn relations whose operand is either 'f', 't',
|
||||
'v', or 'w' into a constant truth value. */
|
||||
'v', 'w', 'c', or 'e' into a constant truth value. */
|
||||
/* FIXME: check mod? */
|
||||
for (size_t i = 0; i < relation->ranges->nitems; i++)
|
||||
{
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Unicode CLDR plural rule parser and converter.
|
||||
Copyright (C) 2015-2025 Free Software Foundation, Inc.
|
||||
Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
|
||||
This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
|
||||
|
||||
@ -428,7 +428,8 @@ yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg)
|
||||
{
|
||||
switch (ident[0])
|
||||
{
|
||||
case 'n': case 'i': case 'f': case 't': case 'v': case 'w':
|
||||
// See https://unicode.org/reports/tr35/tr35-numbers.html#table-plural-operand-meanings
|
||||
case 'n': case 'i': case 'f': case 't': case 'v': case 'w': case 'c': case 'e':
|
||||
arg->cp = exp;
|
||||
lval->ival = ident[0];
|
||||
sb_free (&buffer);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Unicode CLDR plural rule parser and converter
|
||||
Copyright (C) 2015-2025 Free Software Foundation, Inc.
|
||||
Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
|
||||
This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
|
||||
|
||||
@ -39,6 +39,11 @@
|
||||
#define _(s) gettext(s)
|
||||
|
||||
|
||||
/**
|
||||
* Extract the rules from a CLDR plurals.xml file
|
||||
* @return NULL in case of errors, the CLDR rules otherwise
|
||||
* @example "one: i = 1 and v = 0 @integer 1; other: @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, \u2026 @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, \u2026"
|
||||
*/
|
||||
static char *
|
||||
extract_rules (FILE *fp,
|
||||
const char *real_filename, const char *logical_filename,
|
||||
@ -71,8 +76,15 @@ extract_rules (FILE *fp,
|
||||
for (n = node->children; n; n = n->next)
|
||||
{
|
||||
if (n->type == XML_ELEMENT_NODE
|
||||
&& xmlStrEqual (n->name, BAD_CAST "plurals"))
|
||||
break;
|
||||
&& xmlStrEqual (n->name, BAD_CAST "plurals")
|
||||
&& xmlHasProp (n, BAD_CAST "type"))
|
||||
{
|
||||
xmlChar *type = xmlGetProp (n, BAD_CAST "type");
|
||||
bool is_cardinal = xmlStrEqual (type, BAD_CAST "cardinal") != 0;
|
||||
xmlFree (type);
|
||||
if (is_cardinal)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!n)
|
||||
{
|
||||
@ -156,6 +168,157 @@ extract_rules (FILE *fp,
|
||||
return sb_xdupfree_c (&buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the position after the string in format XcY (eg "1c9")
|
||||
* @param str the possible starting position of the string XcY
|
||||
* @return NULL if str does not start with a XcY string,
|
||||
* the position of str after the XcY string (and after a comma/spaces
|
||||
after it) otherwise
|
||||
*/
|
||||
static const char *
|
||||
get_XcY_end (const char *str)
|
||||
{
|
||||
bool found_c = false;
|
||||
if (str[0] < '0' || str[0] > '9')
|
||||
return NULL;
|
||||
str++;
|
||||
while (str[0] != '\0')
|
||||
{
|
||||
if (str[0] == 'c')
|
||||
{
|
||||
if (found_c || str[1] < '0' || str[1] > '9')
|
||||
return NULL;
|
||||
found_c = true;
|
||||
}
|
||||
else if ((str[0] < '0' || str[0] > '9') && str[0] != '.')
|
||||
break;
|
||||
str++;
|
||||
}
|
||||
if (!found_c)
|
||||
return NULL;
|
||||
while (str[0] == ' ')
|
||||
str++;
|
||||
if (str[0] == ',')
|
||||
{
|
||||
str++;
|
||||
while (str[0] == ' ')
|
||||
str++;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
static void
|
||||
force_spaces (char *input)
|
||||
{
|
||||
while (input[0] != '\0')
|
||||
{
|
||||
if (c_isspace (input[0]))
|
||||
input[0] = ' ';
|
||||
input++;
|
||||
}
|
||||
}
|
||||
|
||||
static char *
|
||||
remove_XcY (const char *input)
|
||||
{
|
||||
const char *p = (char *) input;
|
||||
const char *p_next;
|
||||
struct string_buffer buffer;
|
||||
sb_init (&buffer);
|
||||
for (;;)
|
||||
{
|
||||
int comma_and_spaces = -1;
|
||||
const char *p_next1 = strstr (p, "@integer ");
|
||||
const char *p_next2 = strstr (p, "@decimal ");
|
||||
if (p_next1 == NULL && p_next2 == NULL)
|
||||
{
|
||||
sb_append_c (&buffer, p);
|
||||
break;
|
||||
}
|
||||
if (p_next1 != NULL && (p_next2 == NULL || p_next1 < p_next2))
|
||||
p_next = p_next1 + /* strlen ("@integer ") */ 9;
|
||||
else
|
||||
p_next = p_next2 + /* strlen ("@decimal ") */ 9;
|
||||
while (p < p_next)
|
||||
sb_append1 (&buffer, *p++);
|
||||
while (p[0] == ' ')
|
||||
sb_append1 (&buffer, *p++);
|
||||
for (;;)
|
||||
{
|
||||
const char *XcY_end;
|
||||
if (p[0] < '0' || p[0] > '9')
|
||||
break;
|
||||
XcY_end = get_XcY_end (p);
|
||||
if (XcY_end != NULL)
|
||||
{
|
||||
p = XcY_end;
|
||||
continue;
|
||||
}
|
||||
if (comma_and_spaces >= 0)
|
||||
{
|
||||
sb_append1 (&buffer, ',');
|
||||
while (comma_and_spaces > 0)
|
||||
{
|
||||
sb_append1 (&buffer, ' ');
|
||||
comma_and_spaces--;
|
||||
}
|
||||
}
|
||||
while ((p[0] >= '0' && p[0] <= '9') || p[0] == '.' || p[0] == '~')
|
||||
{
|
||||
sb_append1 (&buffer, p[0]);
|
||||
p++;
|
||||
}
|
||||
if (p[0] != ',')
|
||||
break;
|
||||
comma_and_spaces = 0;
|
||||
p++;
|
||||
while (p[0] == ' ')
|
||||
{
|
||||
comma_and_spaces++;
|
||||
p++;
|
||||
}
|
||||
}
|
||||
if (comma_and_spaces > 0 && (
|
||||
(p[0] == '\xE2' && p[1] == '\x80' && p[2] == '\xA6')
|
||||
||
|
||||
(p[0] == '.' && p[1] == '.' && p[2] == '.')
|
||||
))
|
||||
{
|
||||
sb_append1 (&buffer, ',');
|
||||
while (comma_and_spaces > 0)
|
||||
{
|
||||
sb_append1 (&buffer, ' ');
|
||||
comma_and_spaces--;
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb_dupfree_c (&buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
remove_empty_examples (char *input)
|
||||
{
|
||||
const char *prefixes[] =
|
||||
{
|
||||
" @integer \xE2\x80\xA6", " @integer ...",
|
||||
" @decimal \xE2\x80\xA6", " @decimal ..."
|
||||
};
|
||||
int num_prefixes = sizeof (prefixes) / sizeof (prefixes[0]);
|
||||
int i;
|
||||
for (i = 0; i < num_prefixes; i++)
|
||||
{
|
||||
const char *prefix = prefixes[i];
|
||||
size_t prefix_length = strlen (prefix);
|
||||
char *p = input;
|
||||
while ((p = strstr (p, prefix)) != NULL)
|
||||
{
|
||||
memmove (p, p + prefix_length, strlen (p + prefix_length) + 1);
|
||||
while (p[0] == ' ')
|
||||
memmove (p, p + 1, strlen (p + 1) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Display usage information and exit. */
|
||||
static void
|
||||
usage (int status)
|
||||
@ -306,6 +469,16 @@ There is NO WARRANTY, to the extent permitted by law.\n\
|
||||
printf ("%s\n", extracted_rules);
|
||||
else
|
||||
{
|
||||
force_spaces (extracted_rules);
|
||||
{
|
||||
char *tmp = remove_XcY (extracted_rules);
|
||||
if (tmp != NULL)
|
||||
{
|
||||
free (extracted_rules);
|
||||
extracted_rules = tmp;
|
||||
remove_empty_examples (extracted_rules);
|
||||
}
|
||||
}
|
||||
struct cldr_plural_rule_list_ty *result =
|
||||
cldr_plural_parse (extracted_rules);
|
||||
if (result == NULL)
|
||||
|
||||
@ -3,7 +3,8 @@
|
||||
|
||||
: ${DIFF=diff}
|
||||
|
||||
# Test conversion from CLDR to gettext, for Arabic and Russian
|
||||
|
||||
# Test if we have XML support
|
||||
|
||||
LC_ALL=C "$top_builddir/src/cldr-plurals" ru /dev/null 2>&1 | grep 'extraction is not supported' > /dev/null 2>&1
|
||||
test $? = 0 && {
|
||||
@ -11,6 +12,9 @@ test $? = 0 && {
|
||||
Exit 77
|
||||
}
|
||||
|
||||
|
||||
# Test conversion from CLDR to gettext, for Arabic
|
||||
|
||||
cat > ar.ok <<\EOF
|
||||
nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5);
|
||||
EOF
|
||||
@ -22,6 +26,9 @@ LC_ALL=C tr -d '\r' < ar.tmp > ar.out || Exit 1
|
||||
|
||||
${DIFF} ar.ok ar.out || Exit 1
|
||||
|
||||
|
||||
# Test conversion from CLDR to gettext, for Russian
|
||||
|
||||
cat > ru.ok <<\EOF
|
||||
nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : 2);
|
||||
EOF
|
||||
@ -33,31 +40,33 @@ LC_ALL=C tr -d '\r' < ru.tmp > ru.out || Exit 1
|
||||
|
||||
${DIFF} ru.ok ru.out || Exit 1
|
||||
|
||||
|
||||
# Test extraction from CLDR
|
||||
|
||||
cat > foo.in <<\EOF
|
||||
<supplementalData>
|
||||
<plurals type="cardinal">
|
||||
<pluralRules locales="foo">
|
||||
<pluralRule count="one">i = 1 and v = 0 @integer 1</pluralRule>
|
||||
<pluralRule count="other"> @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …</pluralRule>
|
||||
<pluralRule count="one">i = 0,1 @integer 0, 1 @decimal 0.0~1.5</pluralRule>
|
||||
<pluralRule count="many">e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6, 6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6, …</pluralRule>
|
||||
<pluralRule count="other"> @integer 2~17, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3, 6c3, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, …</pluralRule>
|
||||
</pluralRules>
|
||||
</plurals>
|
||||
</supplementalData>
|
||||
EOF
|
||||
|
||||
"$top_builddir/src/cldr-plurals" foo foo.in > foo.tmp
|
||||
cat > foo.ok <<\EOF
|
||||
nplurals=2; plural=(n != 1);
|
||||
EOF
|
||||
LC_ALL=C tr -d '\r' < foo.tmp > foo.out || Exit 1
|
||||
${DIFF} foo.ok foo.out || Exit 1
|
||||
|
||||
"$top_builddir/src/cldr-plurals" -c foo foo.in > foo.cldr.tmp
|
||||
cat > foo.cldr.ok <<\EOF
|
||||
one: i = 1 and v = 0 @integer 1; other: @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …
|
||||
one: i = 0,1 @integer 0, 1 @decimal 0.0~1.5; many: e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6, 6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6, …; other: @integer 2~17, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3, 6c3, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, …
|
||||
EOF
|
||||
LC_ALL=C tr -d '\r' < foo.cldr.tmp > foo.cldr.out || Exit 1
|
||||
${DIFF} foo.cldr.ok foo.cldr.out || Exit 1
|
||||
|
||||
"$top_builddir/src/cldr-plurals" foo foo.in > foo.tmp
|
||||
cat > foo.ok <<\EOF
|
||||
nplurals=3; plural=(n==0 || n==1 ? 0 : n!=0 && n%1000000==0 ? 1 : 2);
|
||||
EOF
|
||||
LC_ALL=C tr -d '\r' < foo.tmp > foo.out || Exit 1
|
||||
${DIFF} foo.ok foo.out || Exit 1
|
||||
|
||||
Exit 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user