xgettext: Rust: Support named format string arguments.

Reported by Kévin Commaille <zecakeh@tedomum.fr>
in <https://savannah.gnu.org/bugs/?56774>.

* autogen.sh (GNULIB_MODULES_TOOLS_FOR_SRC): Add unictype/property-xid-start,
unictype/property-xid-continue.
(GNULIB_MODULES_LIBGETTEXTPO): Likewise.
* gettext-tools/src/format-rust.c (struct named_arg): New type.
(struct spec): Add fields named_arg_count, named.
(named_arg_compare): New function.
(format_parse): Recognize named arguments.
(format_check): Search for differences also between the named arguments.
(format_print): Print also the named arguments.
* gettext-tools/tests/format-rust-1: Add test cases for named arguments.
* gettext-tools/tests/format-rust-2: Likewise.
* gettext-tools/doc/gettext.texi (rust-format): Describe rust-format in detail.
This commit is contained in:
Bruno Haible 2025-02-11 11:37:40 +01:00
parent a24c903f5f
commit 3cbdb13073
5 changed files with 274 additions and 18 deletions

View File

@ -259,6 +259,8 @@ if ! $skip_gnulib; then
unictype/ctype-space
unictype/property-white-space
unictype/syntax-java-whitespace
unictype/property-xid-start
unictype/property-xid-continue
unilbrk/ulc-width-linebreaks
uniname/uniname
uninorm/nfc
@ -401,6 +403,8 @@ if ! $skip_gnulib; then
strnlen
unictype/ctype-space
unictype/property-white-space
unictype/property-xid-start
unictype/property-xid-continue
unilbrk/ulc-width-linebreaks
unistr/u8-mbtouc
unistr/u8-mbtoucr

View File

@ -10105,6 +10105,44 @@ These are those supported by the @code{format!} built-in
with the restrictions listed in
@url{https://crates.io/crates/formatx}, section "Limitations".
A Rust format string consists of
@itemize @bullet
@item
an opening brace @samp{@{},
@item
an optional non-empty sequence of digits or an optional identifier,
@item
optionally, a @samp{:} and a format specifier,
where a format specifier is of the form
@code{[[@var{fill}]@var{align}][@var{sign}][#][0][@var{minimumwidth}][.@var{precision}][@var{type}]}
where
@itemize -
@item
the @var{fill} character is any character,
@item
the @var{align} flag is one of @samp{<}, @samp{>}, @samp{^},
@item
the @var{sign} is one of @samp{+}, @samp{-},
@item
the # flag is @samp{#},
@item
the 0 flag is @samp{0},
@item
@var{minimumwidth} is a non-empty sequence of digits,
@item
@var{precision} is a non-empty sequence of digits,
@item
@var{type} is @samp{?},
@end itemize
@item
optional white-space,
@item
a closing brace @samp{@}}.
@end itemize
@noindent
Brace characters @samp{@{} and @samp{@}} can be escaped by doubling them:
@samp{@{@{} and @samp{@}@}}.
@node ruby-format
@subsection Ruby Format Strings

View File

@ -46,7 +46,11 @@
A format string directive here consists of
- an opening brace '{',
- an optional non-empty sequence of digits,
- an optional non-empty sequence of digits
or an optional identifier_or_keyword according to
<https://doc.rust-lang.org/1.84.0/reference/identifiers.html>
(that is, a XID_Start character and a sequence of XID_Continue characters
or an underscore '_' and a non-empty sequence of XID_Continue characters),
- optionally, a ':' and a format specifier, where a format specifier is
of the form [[fill]align][sign][#][0][minimumwidth][.precision][type]
where
@ -66,7 +70,23 @@
Numbered ('{m}') and unnumbered ('{}') argument specifications cannot be used
in the same string; that's unsupported (although it does not always lead to
an error at runtime, see <https://github.com/clitic/formatx/issues/7>). */
an error at runtime, see <https://github.com/clitic/formatx/issues/7>).
Named ('{name}') and unnamed ('{m}', '{}') argument specifications can be
used in the same string. In the formatx! arguments, all unnamed arguments
must come before all named arguments; but this is not a restriction for the
format string.
In the 'format!' built-in, all arguments that are passed must be used by the
format string, but this is not a requirement for formatx!:
formatx!("{1} {1}", 44, 55)
formatx!("{}", 9, a = 47)
are valid (see <https://github.com/clitic/formatx/issues/8>). */
struct named_arg
{
char *name;
};
struct numbered_arg
{
@ -77,7 +97,9 @@ struct numbered_arg
struct spec
{
unsigned int directives;
unsigned int named_arg_count;
unsigned int numbered_arg_count;
struct named_arg *named;
struct numbered_arg *numbered;
};
@ -88,6 +110,13 @@ struct spec
the token, and true is returned. Otherwise, FORMATP will be
unchanged and false is returned. */
static int
named_arg_compare (const void *p1, const void *p2)
{
return strcmp (((const struct named_arg *) p1)->name,
((const struct named_arg *) p2)->name);
}
static int
numbered_arg_compare (const void *p1, const void *p2)
{
@ -102,14 +131,18 @@ format_parse (const char *format, bool translated, char *fdi,
char **invalid_reason)
{
struct spec spec;
unsigned int named_allocated;
unsigned int numbered_allocated;
bool seen_numbered_args;
unsigned int unnumbered_arg_count;
struct spec *result;
spec.directives = 0;
spec.named_arg_count = 0;
spec.numbered_arg_count = 0;
spec.named = NULL;
spec.numbered = NULL;
named_allocated = 0;
numbered_allocated = 0;
seen_numbered_args = false;
unnumbered_arg_count = 0;
@ -129,6 +162,7 @@ format_parse (const char *format, bool translated, char *fdi,
else
{
const char *const format_start = format;
bool seen_identifier_or_keyword = false;
unsigned int arg_id;
if (c_isdigit (*format))
@ -161,16 +195,67 @@ format_parse (const char *format, bool translated, char *fdi,
}
else
{
/* Numbered and unnumbered specifications are exclusive. */
if (seen_numbered_args > 0)
{
*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
FDI_SET (format - 1, FMTDIR_ERROR);
goto bad_format;
}
/* Try to parse an identifier_or_keyword (that is,
- a XID_Start character and a sequence of XID_Continue
characters
- or an underscore '_' and a non-empty sequence of
XID_Continue characters). */
{
ucs4_t uc1;
int n1 = u8_mbtouc (&uc1,
(const uint8_t *) format,
strnlen (format, 4));
if (n1 > 0
&& (uc_is_property_xid_start (uc1) || uc1 == '_'))
{
const char *name_start = format;
const char *f = format + n1;
arg_id = unnumbered_arg_count;
unnumbered_arg_count++;
for (;;)
{
ucs4_t uc;
int n = u8_mbtouc (&uc,
(const uint8_t *) f,
strnlen (f, 4));
if (n > 0 && uc_is_property_xid_continue (uc))
f += n;
else
break;
}
if (uc1 != '_' || f > format + 1)
{
const char *name_end = f;
size_t n = name_end - name_start;
char *name = XNMALLOC (n + 1, char);
memcpy (name, name_start, n);
name[n] = '\0';
if (named_allocated == spec.named_arg_count)
{
named_allocated = 2 * named_allocated + 1;
spec.named = (struct named_arg *) xrealloc (spec.named, named_allocated * sizeof (struct named_arg));
}
spec.named[spec.named_arg_count].name = name;
spec.named_arg_count++;
format = f;
seen_identifier_or_keyword = true;
}
}
}
if (!seen_identifier_or_keyword)
{
/* Numbered and unnumbered specifications are exclusive. */
if (seen_numbered_args > 0)
{
*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
FDI_SET (format - 1, FMTDIR_ERROR);
goto bad_format;
}
arg_id = unnumbered_arg_count;
unnumbered_arg_count++;
}
}
c = *format;
@ -253,13 +338,16 @@ format_parse (const char *format, bool translated, char *fdi,
spec.directives++;
if (numbered_allocated == spec.numbered_arg_count)
if (!seen_identifier_or_keyword)
{
numbered_allocated = 2 * numbered_allocated + 1;
spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
if (numbered_allocated == spec.numbered_arg_count)
{
numbered_allocated = 2 * numbered_allocated + 1;
spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
}
spec.numbered[spec.numbered_arg_count].number = arg_id;
spec.numbered_arg_count++;
}
spec.numbered[spec.numbered_arg_count].number = arg_id;
spec.numbered_arg_count++;
FDI_SET (format, FMTDIR_END);
}
@ -288,11 +376,39 @@ format_parse (const char *format, bool translated, char *fdi,
spec.numbered_arg_count = j;
}
/* Sort the named argument array, and eliminate duplicates. */
if (spec.named_arg_count > 1)
{
unsigned int i, j;
qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
named_arg_compare);
/* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
for (i = j = 0; i < spec.named_arg_count; i++)
if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
free (spec.named[i].name);
else
{
if (j < i)
spec.named[j].name = spec.named[i].name;
j++;
}
spec.named_arg_count = j;
}
result = XMALLOC (struct spec);
*result = spec;
return result;
bad_format:
if (spec.named != NULL)
{
unsigned int i;
for (i = 0; i < spec.named_arg_count; i++)
free (spec.named[i].name);
free (spec.named);
}
if (spec.numbered != NULL)
free (spec.numbered);
return NULL;
@ -303,6 +419,13 @@ format_free (void *descr)
{
struct spec *spec = (struct spec *) descr;
if (spec->named != NULL)
{
unsigned int i;
for (i = 0; i < spec->named_arg_count; i++)
free (spec->named[i].name);
free (spec->named);
}
free (spec->numbered);
free (spec);
}
@ -324,6 +447,49 @@ format_check (void *msgid_descr, void *msgstr_descr, bool equality,
struct spec *spec2 = (struct spec *) msgstr_descr;
bool err = false;
if (spec1->named_arg_count + spec2->named_arg_count > 0)
{
unsigned int i, j;
unsigned int n1 = spec1->named_arg_count;
unsigned int n2 = spec2->named_arg_count;
/* Check the argument names in spec2 are contained in those of spec1.
Both arrays are sorted. We search for the first difference. */
for (i = 0, j = 0; i < n1 || j < n2; )
{
int cmp = (i >= n1 ? 1 :
j >= n2 ? -1 :
strcmp (spec1->named[i].name, spec2->named[j].name));
if (cmp > 0)
{
if (error_logger)
error_logger (error_logger_data,
_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
spec2->named[j].name, pretty_msgstr,
pretty_msgid);
err = true;
break;
}
else if (cmp < 0)
{
if (equality)
{
if (error_logger)
error_logger (error_logger_data,
_("a format specification for argument '%s' doesn't exist in '%s'"),
spec1->named[i].name, pretty_msgstr);
err = true;
break;
}
else
i++;
}
else
j++, i++;
}
}
if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
{
unsigned int i, j;
@ -409,7 +575,7 @@ format_print (void *descr)
{
struct spec *spec = (struct spec *) descr;
unsigned int last;
unsigned int i;
unsigned int i, j;
if (spec == NULL)
{
@ -432,6 +598,12 @@ format_print (void *descr)
printf ("*");
last = number + 1;
}
for (j = 0; j < spec->named_arg_count; j++)
{
if (i > 0 || j > 0)
printf (" ");
printf ("%s", spec->named[j].name);
}
printf (")");
}

View File

@ -10,22 +10,46 @@ cat <<\EOF > f-rs-1.data
"abc{{}}"
# Valid: a numeric argument
"abc{0}"
# Invalid: a named argument
# Valid: a named argument
"abc{value}"
# Valid: an omitted number
"abc{}"
# Invalid: unterminated directive
"abc{1"
# Invalid: unterminated directive
"abc{v"
# Valid: a numeric argument and a named argument
"abc{0}def{value}"
# Valid: a numeric argument without number and a named argument
"abc{}def{value}"
# Valid: a named argument and a numeric argument
"abc{value}def{0}"
# Valid: a named argument and a numeric argument without number
"abc{value}def{}"
# Invalid: numeric arguments with and without number are unsupported
"abc{}def{1}"
# Invalid: numeric arguments with and without number are unsupported
"abc{1}def{}"
# Valid: format specifier
"abc{1:0}"
# Valid: format specifier
"abc{value:0}"
# Valid: format specifier
"abc{1:<<-#012.34}"
# Valid: format specifier
"abc{value:<<-#012.34}"
# Invalid: conversion in format specifier
"abc{1:<<-#012.34e}"
# Invalid: conversion in format specifier
"abc{value:<<-#012.34e}"
# Invalid: empty precision
"abc{1:8.}"
# Invalid: empty precision
"abc{value:8.}"
# Invalid: invalid format specifier
"abc{1:<c>}"
# Invalid: invalid format specifier
"abc{value:<c>}"
EOF
: ${XGETTEXT=xgettext}

View File

@ -10,24 +10,42 @@ msgstr "xyz{1}{0}{2}"
# Valid: permutation
msgid "abc{2}{0}{1}def"
msgstr "xyz{1}{0}{2}"
# Valid: permutation
msgid "abc{w}{u}{v}def"
msgstr "xyz{v}{u}{w}"
# Valid: permutation
msgid "abc{1}{u}{0}def"
msgstr "xyz{0}{u}{1}"
# Invalid: missing argument
msgid "abc{1}def{0}"
msgstr "xyz{0}"
# Invalid: missing argument
msgid "abc{0}def{1}"
msgstr "xyz{1}"
# Invalid: missing argument
msgid "abc{y}def{x}"
msgstr "xyz{x}"
# Invalid: added argument
msgid "abc{}def"
msgstr "xyz{}uvw{}"
# Invalid: added argument
msgid "abc{0}def"
msgstr "xyz{0}uvw{1}"
# Invalid: added argument
msgid "abc{x}def"
msgstr "xyz{x}uvw{y}"
# Valid: multiple reuse of same argument
msgid "{2} {0} {1}"
msgstr "{1} {0} {2} {0}"
# Valid: multiple reuse of same argument
msgid "{w} {u} {v}"
msgstr "{v} {u} {w} {u}"
# Valid: single reuse of same argument
msgid "{1} {0} {2} {0}"
msgstr "{2} {0} {1}"
# Valid: single reuse of same argument
msgid "{v} {u} {w} {u}"
msgstr "{w} {u} {v}"
# Valid: "{{" is an escape of "{"
msgid "abc{{{1}{2}"
msgstr "{2}abc{1}"