xgettext: Shell: Reject \u and \U escape sequences that produce surrogates.

* gettext-tools/src/x-sh.c (read_word): Reject \u and \U values in the range
0xD800..0xDFFF.
* gettext-tools/tests/xgettext-sh-9: Add more test cases.
This commit is contained in:
Bruno Haible 2025-06-21 06:49:23 +02:00
parent 06858e5651
commit 3e7c882bd7
2 changed files with 10 additions and 1 deletions

View File

@ -1090,7 +1090,7 @@ read_word (struct word *wp, int looking_for, flag_region_ty *region)
}
if (j > 0)
{
if (n < 0x110000)
if (n < 0x110000 && !(n >= 0xD800 && n <= 0xDFFF))
{
if (wp->type == t_string)
mixed_string_buffer_append_unicode (wp->token, n);

View File

@ -12,6 +12,9 @@ echo `gettext $'depth_1_dollar_bash_\u20accd\U1f603kl'`
echo `echo \`gettext $'depth_2_dollar_posix_\xc1mn\301op'\``
echo `echo \`gettext $'depth_2_dollar_bash_\u20accd\U1f603kl'\``
gettext $'high surrogate \udada'
gettext $'low surrogate \udddd'
EOF
: ${XGETTEXT=xgettext}
@ -55,6 +58,12 @@ msgstr ""
msgid "depth_2_dollar_bash_€cd😃kl"
msgstr ""
msgid "high surrogate \\udada"
msgstr ""
msgid "low surrogate \\udddd"
msgstr ""
EOF
: ${DIFF=diff}