mirror of
https://https.git.savannah.gnu.org/git/gettext.git
synced 2026-01-26 15:39:11 +00:00
xgettext: Rust: Support named format string arguments.
Reported by Kévin Commaille <zecakeh@tedomum.fr> in <https://savannah.gnu.org/bugs/?56774>. * autogen.sh (GNULIB_MODULES_TOOLS_FOR_SRC): Add unictype/property-xid-start, unictype/property-xid-continue. (GNULIB_MODULES_LIBGETTEXTPO): Likewise. * gettext-tools/src/format-rust.c (struct named_arg): New type. (struct spec): Add fields named_arg_count, named. (named_arg_compare): New function. (format_parse): Recognize named arguments. (format_check): Search for differences also between the named arguments. (format_print): Print also the named arguments. * gettext-tools/tests/format-rust-1: Add test cases for named arguments. * gettext-tools/tests/format-rust-2: Likewise. * gettext-tools/doc/gettext.texi (rust-format): Describe rust-format in detail.
This commit is contained in:
parent
a24c903f5f
commit
3cbdb13073
@ -259,6 +259,8 @@ if ! $skip_gnulib; then
|
||||
unictype/ctype-space
|
||||
unictype/property-white-space
|
||||
unictype/syntax-java-whitespace
|
||||
unictype/property-xid-start
|
||||
unictype/property-xid-continue
|
||||
unilbrk/ulc-width-linebreaks
|
||||
uniname/uniname
|
||||
uninorm/nfc
|
||||
@ -401,6 +403,8 @@ if ! $skip_gnulib; then
|
||||
strnlen
|
||||
unictype/ctype-space
|
||||
unictype/property-white-space
|
||||
unictype/property-xid-start
|
||||
unictype/property-xid-continue
|
||||
unilbrk/ulc-width-linebreaks
|
||||
unistr/u8-mbtouc
|
||||
unistr/u8-mbtoucr
|
||||
|
||||
@ -10105,6 +10105,44 @@ These are those supported by the @code{format!} built-in
|
||||
with the restrictions listed in
|
||||
@url{https://crates.io/crates/formatx}, section "Limitations".
|
||||
|
||||
A Rust format string consists of
|
||||
@itemize @bullet
|
||||
@item
|
||||
an opening brace @samp{@{},
|
||||
@item
|
||||
an optional non-empty sequence of digits or an optional identifier,
|
||||
@item
|
||||
optionally, a @samp{:} and a format specifier,
|
||||
where a format specifier is of the form
|
||||
@code{[[@var{fill}]@var{align}][@var{sign}][#][0][@var{minimumwidth}][.@var{precision}][@var{type}]}
|
||||
where
|
||||
@itemize -
|
||||
@item
|
||||
the @var{fill} character is any character,
|
||||
@item
|
||||
the @var{align} flag is one of @samp{<}, @samp{>}, @samp{^},
|
||||
@item
|
||||
the @var{sign} is one of @samp{+}, @samp{-},
|
||||
@item
|
||||
the # flag is @samp{#},
|
||||
@item
|
||||
the 0 flag is @samp{0},
|
||||
@item
|
||||
@var{minimumwidth} is a non-empty sequence of digits,
|
||||
@item
|
||||
@var{precision} is a non-empty sequence of digits,
|
||||
@item
|
||||
@var{type} is @samp{?},
|
||||
@end itemize
|
||||
@item
|
||||
optional white-space,
|
||||
@item
|
||||
a closing brace @samp{@}}.
|
||||
@end itemize
|
||||
@noindent
|
||||
Brace characters @samp{@{} and @samp{@}} can be escaped by doubling them:
|
||||
@samp{@{@{} and @samp{@}@}}.
|
||||
|
||||
@node ruby-format
|
||||
@subsection Ruby Format Strings
|
||||
|
||||
|
||||
@ -46,7 +46,11 @@
|
||||
|
||||
A format string directive here consists of
|
||||
- an opening brace '{',
|
||||
- an optional non-empty sequence of digits,
|
||||
- an optional non-empty sequence of digits
|
||||
or an optional identifier_or_keyword according to
|
||||
<https://doc.rust-lang.org/1.84.0/reference/identifiers.html>
|
||||
(that is, a XID_Start character and a sequence of XID_Continue characters
|
||||
or an underscore '_' and a non-empty sequence of XID_Continue characters),
|
||||
- optionally, a ':' and a format specifier, where a format specifier is
|
||||
of the form [[fill]align][sign][#][0][minimumwidth][.precision][type]
|
||||
where
|
||||
@ -66,7 +70,23 @@
|
||||
|
||||
Numbered ('{m}') and unnumbered ('{}') argument specifications cannot be used
|
||||
in the same string; that's unsupported (although it does not always lead to
|
||||
an error at runtime, see <https://github.com/clitic/formatx/issues/7>). */
|
||||
an error at runtime, see <https://github.com/clitic/formatx/issues/7>).
|
||||
|
||||
Named ('{name}') and unnamed ('{m}', '{}') argument specifications can be
|
||||
used in the same string. In the formatx! arguments, all unnamed arguments
|
||||
must come before all named arguments; but this is not a restriction for the
|
||||
format string.
|
||||
|
||||
In the 'format!' built-in, all arguments that are passed must be used by the
|
||||
format string, but this is not a requirement for formatx!:
|
||||
formatx!("{1} {1}", 44, 55)
|
||||
formatx!("{}", 9, a = 47)
|
||||
are valid (see <https://github.com/clitic/formatx/issues/8>). */
|
||||
|
||||
struct named_arg
|
||||
{
|
||||
char *name;
|
||||
};
|
||||
|
||||
struct numbered_arg
|
||||
{
|
||||
@ -77,7 +97,9 @@ struct numbered_arg
|
||||
struct spec
|
||||
{
|
||||
unsigned int directives;
|
||||
unsigned int named_arg_count;
|
||||
unsigned int numbered_arg_count;
|
||||
struct named_arg *named;
|
||||
struct numbered_arg *numbered;
|
||||
};
|
||||
|
||||
@ -88,6 +110,13 @@ struct spec
|
||||
the token, and true is returned. Otherwise, FORMATP will be
|
||||
unchanged and false is returned. */
|
||||
|
||||
static int
|
||||
named_arg_compare (const void *p1, const void *p2)
|
||||
{
|
||||
return strcmp (((const struct named_arg *) p1)->name,
|
||||
((const struct named_arg *) p2)->name);
|
||||
}
|
||||
|
||||
static int
|
||||
numbered_arg_compare (const void *p1, const void *p2)
|
||||
{
|
||||
@ -102,14 +131,18 @@ format_parse (const char *format, bool translated, char *fdi,
|
||||
char **invalid_reason)
|
||||
{
|
||||
struct spec spec;
|
||||
unsigned int named_allocated;
|
||||
unsigned int numbered_allocated;
|
||||
bool seen_numbered_args;
|
||||
unsigned int unnumbered_arg_count;
|
||||
struct spec *result;
|
||||
|
||||
spec.directives = 0;
|
||||
spec.named_arg_count = 0;
|
||||
spec.numbered_arg_count = 0;
|
||||
spec.named = NULL;
|
||||
spec.numbered = NULL;
|
||||
named_allocated = 0;
|
||||
numbered_allocated = 0;
|
||||
seen_numbered_args = false;
|
||||
unnumbered_arg_count = 0;
|
||||
@ -129,6 +162,7 @@ format_parse (const char *format, bool translated, char *fdi,
|
||||
else
|
||||
{
|
||||
const char *const format_start = format;
|
||||
bool seen_identifier_or_keyword = false;
|
||||
unsigned int arg_id;
|
||||
|
||||
if (c_isdigit (*format))
|
||||
@ -161,16 +195,67 @@ format_parse (const char *format, bool translated, char *fdi,
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Numbered and unnumbered specifications are exclusive. */
|
||||
if (seen_numbered_args > 0)
|
||||
{
|
||||
*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
|
||||
FDI_SET (format - 1, FMTDIR_ERROR);
|
||||
goto bad_format;
|
||||
}
|
||||
/* Try to parse an identifier_or_keyword (that is,
|
||||
- a XID_Start character and a sequence of XID_Continue
|
||||
characters
|
||||
- or an underscore '_' and a non-empty sequence of
|
||||
XID_Continue characters). */
|
||||
{
|
||||
ucs4_t uc1;
|
||||
int n1 = u8_mbtouc (&uc1,
|
||||
(const uint8_t *) format,
|
||||
strnlen (format, 4));
|
||||
if (n1 > 0
|
||||
&& (uc_is_property_xid_start (uc1) || uc1 == '_'))
|
||||
{
|
||||
const char *name_start = format;
|
||||
const char *f = format + n1;
|
||||
|
||||
arg_id = unnumbered_arg_count;
|
||||
unnumbered_arg_count++;
|
||||
for (;;)
|
||||
{
|
||||
ucs4_t uc;
|
||||
int n = u8_mbtouc (&uc,
|
||||
(const uint8_t *) f,
|
||||
strnlen (f, 4));
|
||||
if (n > 0 && uc_is_property_xid_continue (uc))
|
||||
f += n;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (uc1 != '_' || f > format + 1)
|
||||
{
|
||||
const char *name_end = f;
|
||||
size_t n = name_end - name_start;
|
||||
char *name = XNMALLOC (n + 1, char);
|
||||
memcpy (name, name_start, n);
|
||||
name[n] = '\0';
|
||||
|
||||
if (named_allocated == spec.named_arg_count)
|
||||
{
|
||||
named_allocated = 2 * named_allocated + 1;
|
||||
spec.named = (struct named_arg *) xrealloc (spec.named, named_allocated * sizeof (struct named_arg));
|
||||
}
|
||||
spec.named[spec.named_arg_count].name = name;
|
||||
spec.named_arg_count++;
|
||||
|
||||
format = f;
|
||||
seen_identifier_or_keyword = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!seen_identifier_or_keyword)
|
||||
{
|
||||
/* Numbered and unnumbered specifications are exclusive. */
|
||||
if (seen_numbered_args > 0)
|
||||
{
|
||||
*invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
|
||||
FDI_SET (format - 1, FMTDIR_ERROR);
|
||||
goto bad_format;
|
||||
}
|
||||
|
||||
arg_id = unnumbered_arg_count;
|
||||
unnumbered_arg_count++;
|
||||
}
|
||||
}
|
||||
|
||||
c = *format;
|
||||
@ -253,13 +338,16 @@ format_parse (const char *format, bool translated, char *fdi,
|
||||
|
||||
spec.directives++;
|
||||
|
||||
if (numbered_allocated == spec.numbered_arg_count)
|
||||
if (!seen_identifier_or_keyword)
|
||||
{
|
||||
numbered_allocated = 2 * numbered_allocated + 1;
|
||||
spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
|
||||
if (numbered_allocated == spec.numbered_arg_count)
|
||||
{
|
||||
numbered_allocated = 2 * numbered_allocated + 1;
|
||||
spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
|
||||
}
|
||||
spec.numbered[spec.numbered_arg_count].number = arg_id;
|
||||
spec.numbered_arg_count++;
|
||||
}
|
||||
spec.numbered[spec.numbered_arg_count].number = arg_id;
|
||||
spec.numbered_arg_count++;
|
||||
|
||||
FDI_SET (format, FMTDIR_END);
|
||||
}
|
||||
@ -288,11 +376,39 @@ format_parse (const char *format, bool translated, char *fdi,
|
||||
spec.numbered_arg_count = j;
|
||||
}
|
||||
|
||||
/* Sort the named argument array, and eliminate duplicates. */
|
||||
if (spec.named_arg_count > 1)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
|
||||
named_arg_compare);
|
||||
|
||||
/* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
|
||||
for (i = j = 0; i < spec.named_arg_count; i++)
|
||||
if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
|
||||
free (spec.named[i].name);
|
||||
else
|
||||
{
|
||||
if (j < i)
|
||||
spec.named[j].name = spec.named[i].name;
|
||||
j++;
|
||||
}
|
||||
spec.named_arg_count = j;
|
||||
}
|
||||
|
||||
result = XMALLOC (struct spec);
|
||||
*result = spec;
|
||||
return result;
|
||||
|
||||
bad_format:
|
||||
if (spec.named != NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < spec.named_arg_count; i++)
|
||||
free (spec.named[i].name);
|
||||
free (spec.named);
|
||||
}
|
||||
if (spec.numbered != NULL)
|
||||
free (spec.numbered);
|
||||
return NULL;
|
||||
@ -303,6 +419,13 @@ format_free (void *descr)
|
||||
{
|
||||
struct spec *spec = (struct spec *) descr;
|
||||
|
||||
if (spec->named != NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = 0; i < spec->named_arg_count; i++)
|
||||
free (spec->named[i].name);
|
||||
free (spec->named);
|
||||
}
|
||||
free (spec->numbered);
|
||||
free (spec);
|
||||
}
|
||||
@ -324,6 +447,49 @@ format_check (void *msgid_descr, void *msgstr_descr, bool equality,
|
||||
struct spec *spec2 = (struct spec *) msgstr_descr;
|
||||
bool err = false;
|
||||
|
||||
if (spec1->named_arg_count + spec2->named_arg_count > 0)
|
||||
{
|
||||
unsigned int i, j;
|
||||
unsigned int n1 = spec1->named_arg_count;
|
||||
unsigned int n2 = spec2->named_arg_count;
|
||||
|
||||
/* Check the argument names in spec2 are contained in those of spec1.
|
||||
Both arrays are sorted. We search for the first difference. */
|
||||
for (i = 0, j = 0; i < n1 || j < n2; )
|
||||
{
|
||||
int cmp = (i >= n1 ? 1 :
|
||||
j >= n2 ? -1 :
|
||||
strcmp (spec1->named[i].name, spec2->named[j].name));
|
||||
|
||||
if (cmp > 0)
|
||||
{
|
||||
if (error_logger)
|
||||
error_logger (error_logger_data,
|
||||
_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
|
||||
spec2->named[j].name, pretty_msgstr,
|
||||
pretty_msgid);
|
||||
err = true;
|
||||
break;
|
||||
}
|
||||
else if (cmp < 0)
|
||||
{
|
||||
if (equality)
|
||||
{
|
||||
if (error_logger)
|
||||
error_logger (error_logger_data,
|
||||
_("a format specification for argument '%s' doesn't exist in '%s'"),
|
||||
spec1->named[i].name, pretty_msgstr);
|
||||
err = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
i++;
|
||||
}
|
||||
else
|
||||
j++, i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
|
||||
{
|
||||
unsigned int i, j;
|
||||
@ -409,7 +575,7 @@ format_print (void *descr)
|
||||
{
|
||||
struct spec *spec = (struct spec *) descr;
|
||||
unsigned int last;
|
||||
unsigned int i;
|
||||
unsigned int i, j;
|
||||
|
||||
if (spec == NULL)
|
||||
{
|
||||
@ -432,6 +598,12 @@ format_print (void *descr)
|
||||
printf ("*");
|
||||
last = number + 1;
|
||||
}
|
||||
for (j = 0; j < spec->named_arg_count; j++)
|
||||
{
|
||||
if (i > 0 || j > 0)
|
||||
printf (" ");
|
||||
printf ("%s", spec->named[j].name);
|
||||
}
|
||||
printf (")");
|
||||
}
|
||||
|
||||
|
||||
@ -10,22 +10,46 @@ cat <<\EOF > f-rs-1.data
|
||||
"abc{{}}"
|
||||
# Valid: a numeric argument
|
||||
"abc{0}"
|
||||
# Invalid: a named argument
|
||||
# Valid: a named argument
|
||||
"abc{value}"
|
||||
# Valid: an omitted number
|
||||
"abc{}"
|
||||
# Invalid: unterminated directive
|
||||
"abc{1"
|
||||
# Invalid: unterminated directive
|
||||
"abc{v"
|
||||
# Valid: a numeric argument and a named argument
|
||||
"abc{0}def{value}"
|
||||
# Valid: a numeric argument without number and a named argument
|
||||
"abc{}def{value}"
|
||||
# Valid: a named argument and a numeric argument
|
||||
"abc{value}def{0}"
|
||||
# Valid: a named argument and a numeric argument without number
|
||||
"abc{value}def{}"
|
||||
# Invalid: numeric arguments with and without number are unsupported
|
||||
"abc{}def{1}"
|
||||
# Invalid: numeric arguments with and without number are unsupported
|
||||
"abc{1}def{}"
|
||||
# Valid: format specifier
|
||||
"abc{1:0}"
|
||||
# Valid: format specifier
|
||||
"abc{value:0}"
|
||||
# Valid: format specifier
|
||||
"abc{1:<<-#012.34}"
|
||||
# Valid: format specifier
|
||||
"abc{value:<<-#012.34}"
|
||||
# Invalid: conversion in format specifier
|
||||
"abc{1:<<-#012.34e}"
|
||||
# Invalid: conversion in format specifier
|
||||
"abc{value:<<-#012.34e}"
|
||||
# Invalid: empty precision
|
||||
"abc{1:8.}"
|
||||
# Invalid: empty precision
|
||||
"abc{value:8.}"
|
||||
# Invalid: invalid format specifier
|
||||
"abc{1:<c>}"
|
||||
# Invalid: invalid format specifier
|
||||
"abc{value:<c>}"
|
||||
EOF
|
||||
|
||||
: ${XGETTEXT=xgettext}
|
||||
|
||||
@ -10,24 +10,42 @@ msgstr "xyz{1}{0}{2}"
|
||||
# Valid: permutation
|
||||
msgid "abc{2}{0}{1}def"
|
||||
msgstr "xyz{1}{0}{2}"
|
||||
# Valid: permutation
|
||||
msgid "abc{w}{u}{v}def"
|
||||
msgstr "xyz{v}{u}{w}"
|
||||
# Valid: permutation
|
||||
msgid "abc{1}{u}{0}def"
|
||||
msgstr "xyz{0}{u}{1}"
|
||||
# Invalid: missing argument
|
||||
msgid "abc{1}def{0}"
|
||||
msgstr "xyz{0}"
|
||||
# Invalid: missing argument
|
||||
msgid "abc{0}def{1}"
|
||||
msgstr "xyz{1}"
|
||||
# Invalid: missing argument
|
||||
msgid "abc{y}def{x}"
|
||||
msgstr "xyz{x}"
|
||||
# Invalid: added argument
|
||||
msgid "abc{}def"
|
||||
msgstr "xyz{}uvw{}"
|
||||
# Invalid: added argument
|
||||
msgid "abc{0}def"
|
||||
msgstr "xyz{0}uvw{1}"
|
||||
# Invalid: added argument
|
||||
msgid "abc{x}def"
|
||||
msgstr "xyz{x}uvw{y}"
|
||||
# Valid: multiple reuse of same argument
|
||||
msgid "{2} {0} {1}"
|
||||
msgstr "{1} {0} {2} {0}"
|
||||
# Valid: multiple reuse of same argument
|
||||
msgid "{w} {u} {v}"
|
||||
msgstr "{v} {u} {w} {u}"
|
||||
# Valid: single reuse of same argument
|
||||
msgid "{1} {0} {2} {0}"
|
||||
msgstr "{2} {0} {1}"
|
||||
# Valid: single reuse of same argument
|
||||
msgid "{v} {u} {w} {u}"
|
||||
msgstr "{w} {u} {v}"
|
||||
# Valid: "{{" is an escape of "{"
|
||||
msgid "abc{{{1}{2}"
|
||||
msgstr "{2}abc{1}"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user