sed: do not reject "\c[" in a regular expression

As a GNU extension, "\c[" in a regexp matches the ESC character
(i.e., control-[). Yet 's/\c[//' was rejected because "[" was treated
as the beginning of a bracket expression.
* sed/compile.c (match_slash): When matching "slash"es, treat \c as
an escape sequence, so that s/\c[// no longer fails because it
misinterprets [ as a bracket-group start.
* testsuite/misc.pl (bug79519_1,2,3,fail): Add tests.
* NEWS (Bug fixes): Mention it.
Reported by Michael Ludwig in https://bugs.gnu.org/79519.
This commit is contained in:
Jim Meyering 2025-09-29 07:27:36 -07:00
parent f124c83561
commit e137695202
4 changed files with 31 additions and 2 deletions

2
NEWS
View File

@ -4,6 +4,8 @@ GNU sed NEWS -*- outline -*-
** Bug fixes
sed no longer rejects "\c[" in regular expressions
'sed --follow-symlinks -i' no longer mishandles an operand that is a
short symbolic link to a long symbolic link to a file.
[bug introduced in sed 4.9]

1
m4/.gitignore vendored
View File

@ -271,7 +271,6 @@
/wctype.m4
/windows-rc.m4
/xgetcwd.m4
/getlocalename_l.m4
/getopt.m4
/frexp.m4
/frexpl.m4

View File

@ -475,8 +475,20 @@ match_slash (int slash, bool regex, bool s_command)
ch = inchar ();
if (ch == EOF)
break;
else if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
/* Preserve backslash except when escaping delimiter in regex. */
if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
add1_buffer (b, '\\');
/* Special case: in regex, treat \cX as atomic escape,
but only in GNU-extension mode (not strict POSIX). */
if (regex && ch == 'c' && posixicity != POSIXLY_BASIC) {
add1_buffer (b, ch);
int next = inchar ();
if (next == EOF)
break;
add1_buffer (b, next);
/* Skip end-of-loop add1_buffer, we already did it. */
continue;
}
if (s_command && posixicity != POSIXLY_EXTENDED && ch != '&'
&& ch != '\\' && !ISDIGIT (ch) && ch != '\n' && ch != slash)
fprintf (stderr, _("%s: warning: using \"\\%c\" in the 's' "

View File

@ -1199,6 +1199,22 @@ s,.*[^\/],,
['bug30794_3', "s/z/\\\\x5c1/", {IN=>'z'}, {OUT => "\\1"}],
['bug40242', q('sn\nnXn'), {IN=>'n'}, {OUT => 'X'}],
# sed 's/\c[//' is valid, but was rejected by sed-4.9 and prior
['bug79519_1', q('s/\c[//'), {IN=>"a\eb"}, {OUT => 'ab'}],
# Exercise in a bracket expression.
['bug79519_2', q('s/[\c[a]//g'), {IN=>"a\ebc"}, {OUT => 'bc'}],
# With --posix, \c is treated like 'c'
['bug79519_3', qw(--posix), q('s/[\c[a]//g'), {IN=>"a\ebc"}, {OUT => "\eb"}],
# Exercise in a search (non-subst) regexp:
['bug79519_4', q('/\c[/d'), {IN=>"a\e\nc\n"}, {OUT => "c\n"}],
# sed --posix must reject, since \cX is a GNU-only feature.
['bug79519_fail', qw(--posix), q('s/\c[//'), {IN=>"x"}, {OUT => ''},
{ERR => "sed: warning: using \"\\c\" in the 's' command is not portable\n"
. "sed: -e expression #1, char 7: unterminated 's' command\n"
},
{EXIT => 1},
],
);
my $save_temps = $ENV{SAVE_TEMPS};