diff --git a/AUTHORS b/AUTHORS index 76cf7add2..8004c7c78 100644 --- a/AUTHORS +++ b/AUTHORS @@ -91,3 +91,6 @@ Assigns past and future changes. GETTEXT John Darrington Assigns past and future changes. (No contributions so far.) + +GETTEXT Michele Locati +Assigns past and future changes. diff --git a/gettext-tools/src/cldr-plural-exp.c b/gettext-tools/src/cldr-plural-exp.c index 5a58abbd3..84e78254c 100644 --- a/gettext-tools/src/cldr-plural-exp.c +++ b/gettext-tools/src/cldr-plural-exp.c @@ -1,5 +1,5 @@ /* Unicode CLDR plural rule parser and converter. - Copyright (C) 2015-2025 Free Software Foundation, Inc. + Copyright (C) 2015-2026 Free Software Foundation, Inc. This file was written by Daiki Ueno , 2015. @@ -133,10 +133,11 @@ eval_relation (struct cldr_plural_relation_ty *relation) break; case 'f': case 't': case 'v': case 'w': + case 'c': case 'e': { /* Since plural expression in gettext only supports unsigned integer, turn relations whose operand is either 'f', 't', - 'v', or 'w' into a constant truth value. */ + 'v', 'w', 'c', or 'e' into a constant truth value. */ /* FIXME: check mod? */ for (size_t i = 0; i < relation->ranges->nitems; i++) { diff --git a/gettext-tools/src/cldr-plural.y b/gettext-tools/src/cldr-plural.y index 2c14e4ed9..dffe80bbd 100644 --- a/gettext-tools/src/cldr-plural.y +++ b/gettext-tools/src/cldr-plural.y @@ -1,5 +1,5 @@ /* Unicode CLDR plural rule parser and converter. - Copyright (C) 2015-2025 Free Software Foundation, Inc. + Copyright (C) 2015-2026 Free Software Foundation, Inc. This file was written by Daiki Ueno , 2015. @@ -428,7 +428,8 @@ yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg) { switch (ident[0]) { - case 'n': case 'i': case 'f': case 't': case 'v': case 'w': + // See https://unicode.org/reports/tr35/tr35-numbers.html#table-plural-operand-meanings + case 'n': case 'i': case 'f': case 't': case 'v': case 'w': case 'c': case 'e': arg->cp = exp; lval->ival = ident[0]; sb_free (&buffer); diff --git a/gettext-tools/src/cldr-plurals.c b/gettext-tools/src/cldr-plurals.c index e8afb35c9..108149364 100644 --- a/gettext-tools/src/cldr-plurals.c +++ b/gettext-tools/src/cldr-plurals.c @@ -1,5 +1,5 @@ /* Unicode CLDR plural rule parser and converter - Copyright (C) 2015-2025 Free Software Foundation, Inc. + Copyright (C) 2015-2026 Free Software Foundation, Inc. This file was written by Daiki Ueno , 2015. @@ -39,6 +39,11 @@ #define _(s) gettext(s) +/** + * Extract the rules from a CLDR plurals.xml file + * @return NULL in case of errors, the CLDR rules otherwise + * @example "one: i = 1 and v = 0 @integer 1; other: @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, \u2026 @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, \u2026" + */ static char * extract_rules (FILE *fp, const char *real_filename, const char *logical_filename, @@ -71,8 +76,15 @@ extract_rules (FILE *fp, for (n = node->children; n; n = n->next) { if (n->type == XML_ELEMENT_NODE - && xmlStrEqual (n->name, BAD_CAST "plurals")) - break; + && xmlStrEqual (n->name, BAD_CAST "plurals") + && xmlHasProp (n, BAD_CAST "type")) + { + xmlChar *type = xmlGetProp (n, BAD_CAST "type"); + bool is_cardinal = xmlStrEqual (type, BAD_CAST "cardinal") != 0; + xmlFree (type); + if (is_cardinal) + break; + } } if (!n) { @@ -156,6 +168,157 @@ extract_rules (FILE *fp, return sb_xdupfree_c (&buffer); } +/** + * Find the position after the string in format XcY (eg "1c9") + * @param str the possible starting position of the string XcY + * @return NULL if str does not start with a XcY string, + * the position of str after the XcY string (and after a comma/spaces + after it) otherwise + */ +static const char * +get_XcY_end (const char *str) +{ + bool found_c = false; + if (str[0] < '0' || str[0] > '9') + return NULL; + str++; + while (str[0] != '\0') + { + if (str[0] == 'c') + { + if (found_c || str[1] < '0' || str[1] > '9') + return NULL; + found_c = true; + } + else if ((str[0] < '0' || str[0] > '9') && str[0] != '.') + break; + str++; + } + if (!found_c) + return NULL; + while (str[0] == ' ') + str++; + if (str[0] == ',') + { + str++; + while (str[0] == ' ') + str++; + } + return str; +} + +static void +force_spaces (char *input) +{ + while (input[0] != '\0') + { + if (c_isspace (input[0])) + input[0] = ' '; + input++; + } +} + +static char * +remove_XcY (const char *input) +{ + const char *p = (char *) input; + const char *p_next; + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + int comma_and_spaces = -1; + const char *p_next1 = strstr (p, "@integer "); + const char *p_next2 = strstr (p, "@decimal "); + if (p_next1 == NULL && p_next2 == NULL) + { + sb_append_c (&buffer, p); + break; + } + if (p_next1 != NULL && (p_next2 == NULL || p_next1 < p_next2)) + p_next = p_next1 + /* strlen ("@integer ") */ 9; + else + p_next = p_next2 + /* strlen ("@decimal ") */ 9; + while (p < p_next) + sb_append1 (&buffer, *p++); + while (p[0] == ' ') + sb_append1 (&buffer, *p++); + for (;;) + { + const char *XcY_end; + if (p[0] < '0' || p[0] > '9') + break; + XcY_end = get_XcY_end (p); + if (XcY_end != NULL) + { + p = XcY_end; + continue; + } + if (comma_and_spaces >= 0) + { + sb_append1 (&buffer, ','); + while (comma_and_spaces > 0) + { + sb_append1 (&buffer, ' '); + comma_and_spaces--; + } + } + while ((p[0] >= '0' && p[0] <= '9') || p[0] == '.' || p[0] == '~') + { + sb_append1 (&buffer, p[0]); + p++; + } + if (p[0] != ',') + break; + comma_and_spaces = 0; + p++; + while (p[0] == ' ') + { + comma_and_spaces++; + p++; + } + } + if (comma_and_spaces > 0 && ( + (p[0] == '\xE2' && p[1] == '\x80' && p[2] == '\xA6') + || + (p[0] == '.' && p[1] == '.' && p[2] == '.') + )) + { + sb_append1 (&buffer, ','); + while (comma_and_spaces > 0) + { + sb_append1 (&buffer, ' '); + comma_and_spaces--; + } + } + } + return sb_dupfree_c (&buffer); +} + +static void +remove_empty_examples (char *input) +{ + const char *prefixes[] = + { + " @integer \xE2\x80\xA6", " @integer ...", + " @decimal \xE2\x80\xA6", " @decimal ..." + }; + int num_prefixes = sizeof (prefixes) / sizeof (prefixes[0]); + int i; + for (i = 0; i < num_prefixes; i++) + { + const char *prefix = prefixes[i]; + size_t prefix_length = strlen (prefix); + char *p = input; + while ((p = strstr (p, prefix)) != NULL) + { + memmove (p, p + prefix_length, strlen (p + prefix_length) + 1); + while (p[0] == ' ') + memmove (p, p + 1, strlen (p + 1) + 1); + } + } +} + /* Display usage information and exit. */ static void usage (int status) @@ -306,6 +469,16 @@ There is NO WARRANTY, to the extent permitted by law.\n\ printf ("%s\n", extracted_rules); else { + force_spaces (extracted_rules); + { + char *tmp = remove_XcY (extracted_rules); + if (tmp != NULL) + { + free (extracted_rules); + extracted_rules = tmp; + remove_empty_examples (extracted_rules); + } + } struct cldr_plural_rule_list_ty *result = cldr_plural_parse (extracted_rules); if (result == NULL) diff --git a/gettext-tools/tests/cldr-plurals-1 b/gettext-tools/tests/cldr-plurals-1 index 3c6dad0df..87d262d8b 100755 --- a/gettext-tools/tests/cldr-plurals-1 +++ b/gettext-tools/tests/cldr-plurals-1 @@ -3,7 +3,8 @@ : ${DIFF=diff} -# Test conversion from CLDR to gettext, for Arabic and Russian + +# Test if we have XML support LC_ALL=C "$top_builddir/src/cldr-plurals" ru /dev/null 2>&1 | grep 'extraction is not supported' > /dev/null 2>&1 test $? = 0 && { @@ -11,6 +12,9 @@ test $? = 0 && { Exit 77 } + +# Test conversion from CLDR to gettext, for Arabic + cat > ar.ok <<\EOF nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5); EOF @@ -22,6 +26,9 @@ LC_ALL=C tr -d '\r' < ar.tmp > ar.out || Exit 1 ${DIFF} ar.ok ar.out || Exit 1 + +# Test conversion from CLDR to gettext, for Russian + cat > ru.ok <<\EOF nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : 2); EOF @@ -33,31 +40,33 @@ LC_ALL=C tr -d '\r' < ru.tmp > ru.out || Exit 1 ${DIFF} ru.ok ru.out || Exit 1 + # Test extraction from CLDR cat > foo.in <<\EOF - i = 1 and v = 0 @integer 1 - @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … + i = 0,1 @integer 0, 1 @decimal 0.0~1.5 + e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6, 6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6, … + @integer 2~17, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3, 6c3, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, … EOF -"$top_builddir/src/cldr-plurals" foo foo.in > foo.tmp -cat > foo.ok <<\EOF -nplurals=2; plural=(n != 1); -EOF -LC_ALL=C tr -d '\r' < foo.tmp > foo.out || Exit 1 -${DIFF} foo.ok foo.out || Exit 1 - "$top_builddir/src/cldr-plurals" -c foo foo.in > foo.cldr.tmp cat > foo.cldr.ok <<\EOF -one: i = 1 and v = 0 @integer 1; other: @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … +one: i = 0,1 @integer 0, 1 @decimal 0.0~1.5; many: e = 0 and i != 0 and i % 1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6, 6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6, …; other: @integer 2~17, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3, 6c3, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, … EOF LC_ALL=C tr -d '\r' < foo.cldr.tmp > foo.cldr.out || Exit 1 ${DIFF} foo.cldr.ok foo.cldr.out || Exit 1 +"$top_builddir/src/cldr-plurals" foo foo.in > foo.tmp +cat > foo.ok <<\EOF +nplurals=3; plural=(n==0 || n==1 ? 0 : n!=0 && n%1000000==0 ? 1 : 2); +EOF +LC_ALL=C tr -d '\r' < foo.tmp > foo.out || Exit 1 +${DIFF} foo.ok foo.out || Exit 1 + Exit 0