xgettext: awk: Recognize string concatenation.

* gettext-tools/src/x-awk.c (SIZEOF): New macro.
(phase3_pushback, phase3_pushback_length): New variables.
(phase3_get): Renamed from x_awk_lex. Return pushed-back token if present.
(phase3_unget): New function.
(string_concat_free1): New function.
(phase4_get): New function.
(extract_parenthesized): Invoke phase4_get instead of x_awk_lex.
(extract_awk): Initialize phase3_pushback_length.
* gettext-tools/tests/xgettext-awk-1: Add test cases for string concatenation.
* NEWS: Mention the improvement.
This commit is contained in:
Bruno Haible 2024-09-11 14:09:34 +02:00
parent 29f693f18c
commit d0f044719d
3 changed files with 78 additions and 3 deletions

1
NEWS
View File

@ -16,6 +16,7 @@ Version 0.23 - September 2024
o xgettext now recognizes comments of the form '#; <expression>'.
- Java: Improved recognition of format strings when the String.formatted
method is used.
- awk: String concatenation by juxtaposition is now recognized.
- Smalltalk: The string concatenation operator ',' is now recognized.
- Vala: Improved recognition of format strings when the string.printf method
is used.

View File

@ -45,6 +45,8 @@
#define _(s) gettext(s)
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
/* The awk syntax is defined in the gawk manual page and documentation.
See also gawk/awkgram.y. */
@ -374,14 +376,23 @@ free_token (token_ty *tp)
in between. */
static bool prefer_division_over_regexp;
static token_ty phase3_pushback[1];
static int phase3_pushback_length;
static void
x_awk_lex (token_ty *tp)
phase3_get (token_ty *tp)
{
static char *buffer;
static int bufmax;
int bufpos;
int c;
if (phase3_pushback_length)
{
*tp = phase3_pushback[--phase3_pushback_length];
return;
}
for (;;)
{
tp->line_number = line_number;
@ -657,6 +668,56 @@ x_awk_lex (token_ty *tp)
}
}
/* Supports only one pushback token. */
static void
phase3_unget (token_ty *tp)
{
if (tp->type != token_type_eof)
{
if (phase3_pushback_length == SIZEOF (phase3_pushback))
abort ();
phase3_pushback[phase3_pushback_length++] = *tp;
}
}
/* 8. Concatenate adjacent string literals to form single string literals. */
/* Concatenates two strings, and frees the first argument. */
static char *
string_concat_free1 (char *s1, const char *s2)
{
size_t len1 = strlen (s1);
size_t len2 = strlen (s2);
size_t len = len1 + len2 + 1;
char *result = XNMALLOC (len, char);
memcpy (result, s1, len1);
memcpy (result + len1, s2, len2 + 1);
free (s1);
return result;
}
static void
phase4_get (token_ty *tp)
{
phase3_get (tp);
if (tp->type != token_type_string)
return;
for (;;)
{
token_ty tmp;
phase3_get (&tmp);
if (tmp.type != token_type_string)
{
phase3_unget (&tmp);
return;
}
tp->string = string_concat_free1 (tp->string, tmp.string);
free_token (&tmp);
}
}
/* ========================= Extracting strings. ========================== */
@ -720,7 +781,7 @@ extract_parenthesized (message_list_ty *mlp,
{
token_ty token;
x_awk_lex (&token);
phase4_get (&token);
if (next_is_argument && token.type != token_type_lparen)
{
@ -892,6 +953,7 @@ extract_awk (FILE *f,
last_non_comment_line = -1;
prefer_division_over_regexp = false;
phase3_pushback_length = 0;
flag_context_list_table = flag_table;
nesting_depth = 0;

View File

@ -1,7 +1,7 @@
#!/bin/sh
. "${srcdir=.}/init.sh"; path_prepend_ . ../src
# Test awk support: --add-comments option.
# Test awk support: --add-comments option, string concatenation.
cat <<\EOF > xg-a-1.awk
# This comment will not be extracted.
@ -13,6 +13,12 @@ print _"Hey Jude"
# TRANSLATORS:
# Nickname of the Beatles
print _"The Fabulous Four"
# This string is not extracted.
print "not extracted"
# String concatenation in regular contexts.
print dcgettext("Olivia" " " "Newton-John")
# String concatenation *not* happening right after the _ marker.
print _"hello" " world"
EOF
: ${XGETTEXT=xgettext}
@ -35,6 +41,12 @@ msgstr ""
#. Nickname of the Beatles
msgid "The Fabulous Four"
msgstr ""
msgid "Olivia Newton-John"
msgstr ""
msgid "hello"
msgstr ""
EOF
: ${DIFF=diff}