diff, sdiff: new option --ignore-trailing-space (-Z)

Derived from Roland McGrath's patch (dated June 2004!) in:
http://lists.gnu.org/archive/html/bug-gnu-utils/2004-07/msg00000.html
* NEWS:
* doc/diffutils.texi (White Space, Blank Lines)
(sdiff Option Summary, diff Options, sdiff Options): Document -Z.
* src/diff.h (IGNORE_TRAILING_SPACE)
(IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE): New constants, for -Z.
* src/diff.c (shortopts, longopts, main, option_help_msgid):
* src/sdiff.c (longopts, option_help_msgid, main):
* src/io.c (find_and_hash_each_line):
* src/util.c (lines_differ, analyze_hunk): Support -Z.
This commit is contained in:
Roland McGrath 2011-08-14 14:37:01 -07:00 committed by Paul Eggert
parent efb0557f4c
commit 86a40dd6ac
7 changed files with 171 additions and 61 deletions

4
NEWS
View File

@ -7,6 +7,10 @@ GNU diffutils NEWS -*- outline -*-
--ignore-file-name-case now applies at the top level too.
For example, "diff dir inIt" might compare "dir/Init" to "inIt".
** New features
diff and sdiff have a new option --ignore-trailing-space (-Z).
* Noteworthy changes in release 3.1 (2011-08-10) [stable]
** Bug fixes

View File

@ -289,7 +289,11 @@ The @option{--ignore-tab-expansion} (@option{-E}) option ignores the
distinction between tabs and spaces on input. A tab is considered to be
equivalent to the number of spaces to the next tab stop (@pxref{Tabs}).
The @option{--ignore-space-change} (@option{-b}) option is stronger.
The @option{--ignore-trailing-space} (@option{-Z}) option ignores white
space at line end.
The @option{--ignore-space-change} (@option{-b}) option is stronger than
@option{-E} and @option{-Z} combined.
It ignores white space at line end, and considers all other sequences of
one or more white space characters within a line to be equivalent. With this
option, @command{diff} considers the following two lines to be equivalent,
@ -344,12 +348,11 @@ is considered identical to a file containing
@end example
Normally this option affects only lines that are completely empty, but
if you also specify the @option{--ignore-space-change} (@option{-b})
option, or the @option{--ignore-all-space} (@option{-w}) option,
if you also specify an option that ignores trailing spaces,
lines are also affected if they look empty but contain white space.
In other words, @option{-B} is equivalent to @samp{-I '^$'} by
default, but it is equivalent to @option{-I '^[[:space:]]*$'} if
@option{-b} or @option{-w} is also specified.
@option{-b}, @option{-w} or @option{-Z} is also specified.
@node Specified Lines
@section Suppressing Differences Whose Lines All Match a Regular Expression
@ -2446,12 +2449,12 @@ The following @command{sdiff} options have the same meaning as for
@example
-a -b -d -i -t -v
-B -E -I @var{regexp}
-B -E -I @var{regexp} -Z
--expand-tabs
--ignore-blank-lines --ignore-case
--ignore-matching-lines=@var{regexp} --ignore-space-change
--ignore-tab-expansion
--ignore-tab-expansion --ignore-trailing-space
--left-column --minimal --speed-large-files
--strip-trailing-cr --suppress-common-lines
--tabsize=@var{columns} --text --version --width=@var{columns}
@ -3966,6 +3969,10 @@ match any pattern contained in @var{file}. @xref{Comparing Directories}.
@item -y
@itemx --side-by-side
Use the side by side output format. @xref{Side by Side Format}.
@item -Z
@itemx --ignore-trailing-space
Ignore white space at line end. @xref{White Space}.
@end table
@node Invoking diff3
@ -4446,6 +4453,10 @@ option is @option{-W} in @command{diff}, @option{-w} in @command{sdiff}.
Ignore white space when comparing lines. @xref{White Space}.
Note that for historical reasons, this option is @option{-w} in @command{diff},
@option{-W} in @command{sdiff}.
@item -Z
@itemx --ignore-trailing-space
Ignore white space at line end. @xref{White Space}.
@end table
@node Standards conformance

View File

@ -107,7 +107,7 @@ static bool unidirectional_new_file;
static bool report_identical_files;
static char const shortopts[] =
"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";
/* Values for long options that do not have single-letter equivalents. */
enum
@ -178,6 +178,7 @@ static struct option const longopts[] =
{"ignore-matching-lines", 1, 0, 'I'},
{"ignore-space-change", 0, 0, 'b'},
{"ignore-tab-expansion", 0, 0, 'E'},
{"ignore-trailing-space", 0, 0, 'Z'},
{"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
{"initial-tab", 0, 0, 'T'},
{"label", 1, 0, 'L'},
@ -320,6 +321,11 @@ main (int argc, char **argv)
ignore_white_space = IGNORE_SPACE_CHANGE;
break;
case 'Z':
if (ignore_white_space < IGNORE_SPACE_CHANGE)
ignore_white_space |= IGNORE_TRAILING_SPACE;
break;
case 'B':
ignore_blank_lines = true;
break;
@ -381,8 +387,8 @@ main (int argc, char **argv)
break;
case 'E':
if (ignore_white_space < IGNORE_TAB_EXPANSION)
ignore_white_space = IGNORE_TAB_EXPANSION;
if (ignore_white_space < IGNORE_SPACE_CHANGE)
ignore_white_space |= IGNORE_TAB_EXPANSION;
break;
case 'f':
@ -880,6 +886,7 @@ static char const * const option_help_msgid[] = {
"",
N_("-i, --ignore-case ignore case differences in file contents"),
N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"),
N_("-Z, --ignore-trailing-space ignore white space at line end"),
N_("-b, --ignore-space-change ignore changes in the amount of white space"),
N_("-w, --ignore-all-space ignore all white space"),
N_("-B, --ignore-blank-lines ignore changes whose lines are all blank"),

View File

@ -106,6 +106,14 @@ enum DIFF_white_space
/* Ignore changes due to tab expansion (-E). */
IGNORE_TAB_EXPANSION,
/* Ignore changes in trailing horizontal white space (-Z). */
IGNORE_TRAILING_SPACE,
/* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case
because they are independent and can be ORed together, yielding
IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE. */
IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE,
/* Ignore changes in horizontal white space (-b). */
IGNORE_SPACE_CHANGE,

122
src/io.c
View File

@ -255,36 +255,53 @@ find_and_hash_each_line (struct file_data *current)
break;
case IGNORE_TAB_EXPANSION:
case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
case IGNORE_TRAILING_SPACE:
{
size_t column = 0;
while ((c = *p++) != '\n')
{
size_t repetitions = 1;
switch (c)
if (ignore_white_space & IGNORE_TRAILING_SPACE
&& isspace (c))
{
case '\b':
column -= 0 < column;
break;
case '\t':
c = ' ';
repetitions = tabsize - column % tabsize;
column = (column + repetitions < column
? 0
: column + repetitions);
break;
case '\r':
column = 0;
break;
default:
c = tolower (c);
column++;
break;
char const *p1 = p;
unsigned char c1;
do
if ((c1 = *p1++) == '\n')
{
p = p1;
goto hashing_done;
}
while (isspace (c1));
}
size_t repetitions = 1;
if (ignore_white_space & IGNORE_TAB_EXPANSION)
switch (c)
{
case '\b':
column -= 0 < column;
break;
case '\t':
c = ' ';
repetitions = tabsize - column % tabsize;
column = (column + repetitions < column
? 0
: column + repetitions);
break;
case '\r':
column = 0;
break;
default:
column++;
break;
}
c = tolower (c);
do
h = HASH (h, c);
while (--repetitions != 0);
@ -325,35 +342,52 @@ find_and_hash_each_line (struct file_data *current)
break;
case IGNORE_TAB_EXPANSION:
case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
case IGNORE_TRAILING_SPACE:
{
size_t column = 0;
while ((c = *p++) != '\n')
{
size_t repetitions = 1;
switch (c)
if (ignore_white_space & IGNORE_TRAILING_SPACE
&& isspace (c))
{
case '\b':
column -= 0 < column;
break;
case '\t':
c = ' ';
repetitions = tabsize - column % tabsize;
column = (column + repetitions < column
? 0
: column + repetitions);
break;
case '\r':
column = 0;
break;
default:
column++;
break;
char const *p1 = p;
unsigned char c1;
do
if ((c1 = *p1++) == '\n')
{
p = p1;
goto hashing_done;
}
while (isspace (c1));
}
if (ignore_white_space & IGNORE_TAB_EXPANSION)
switch (c)
{
case '\b':
column -= 0 < column;
break;
case '\t':
c = ' ';
repetitions = tabsize - column % tabsize;
column = (column + repetitions < column
? 0
: column + repetitions);
break;
case '\r':
column = 0;
break;
default:
column++;
break;
}
do
h = HASH (h, c);
while (--repetitions != 0);
@ -381,7 +415,7 @@ find_and_hash_each_line (struct file_data *current)
complete line, put it into buckets[-1] so that it can
compare equal only to the other file's incomplete line
(if one exists). */
if (ignore_white_space < IGNORE_SPACE_CHANGE)
if (ignore_white_space < IGNORE_TRAILING_SPACE)
bucket = &buckets[-1];
}

View File

@ -132,6 +132,7 @@ static struct option const longopts[] =
{"ignore-matching-lines", 1, 0, 'I'},
{"ignore-space-change", 0, 0, 'b'},
{"ignore-tab-expansion", 0, 0, 'E'},
{"ignore-trailing-space", 0, 0, 'Z'},
{"left-column", 0, 0, 'l'},
{"minimal", 0, 0, 'd'},
{"output", 1, 0, 'o'},
@ -170,6 +171,7 @@ static char const * const option_help_msgid[] = {
"",
N_("-i, --ignore-case consider upper- and lower-case to be the same"),
N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"),
N_("-Z, --ignore-trailing-space ignore white space at line end"),
N_("-b, --ignore-space-change ignore changes in the amount of white space"),
N_("-W, --ignore-all-space ignore all white space"),
N_("-B, --ignore-blank-lines ignore changes whose lines are all blank"),
@ -458,7 +460,7 @@ main (int argc, char *argv[])
diffarg (DEFAULT_DIFF_PROGRAM);
/* parse command line args */
while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:W", longopts, 0))
while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:WZ", longopts, 0))
!= -1)
{
switch (opt)
@ -527,6 +529,10 @@ main (int argc, char *argv[])
diffarg ("-w");
break;
case 'Z':
diffarg ("-Z");
break;
case DIFF_PROGRAM_OPTION:
diffargv[0] = optarg;
break;

View File

@ -395,6 +395,33 @@ lines_differ (char const *s1, char const *s2)
break;
case IGNORE_TRAILING_SPACE:
case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
if (isspace (c1) && isspace (c2))
{
unsigned char c;
if (c1 != '\n')
{
char const *p = t1;
while ((c = *p) != '\n' && isspace (c))
++p;
if (c != '\n')
break;
}
if (c2 != '\n')
{
char const *p = t2;
while ((c = *p) != '\n' && isspace (c))
++p;
if (c != '\n')
break;
}
/* Both lines have nothing but whitespace left. */
return false;
}
if (ignore_white_space == IGNORE_TRAILING_SPACE)
break;
/* Fall through. */
case IGNORE_TAB_EXPANSION:
if ((c1 == ' ' && c2 == '\t')
|| (c1 == '\t' && c2 == ' '))
@ -674,8 +701,11 @@ analyze_hunk (struct change *hunk,
size_t trivial_length = ignore_blank_lines - 1;
/* If 0, ignore zero-length lines;
if SIZE_MAX, do not ignore lines just because of their length. */
bool skip_white_space =
ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space;
bool skip_leading_white_space =
(ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space;
char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
char const * const *linbuf1 = files[1].linbuf;
@ -699,9 +729,14 @@ analyze_hunk (struct change *hunk,
char const *newline = linbuf0[i + 1] - 1;
size_t len = newline - line;
char const *p = line;
if (skip_leading_white_space)
while (isspace ((unsigned char) *p) && *p != '\n')
p++;
if (skip_white_space)
for (; *p != '\n'; p++)
if (! isspace ((unsigned char) *p))
{
if (! skip_leading_white_space)
p = line;
break;
}
if (newline - p != trivial_length
&& (! ignore_regexp.fastmap
|| re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
@ -714,9 +749,14 @@ analyze_hunk (struct change *hunk,
char const *newline = linbuf1[i + 1] - 1;
size_t len = newline - line;
char const *p = line;
if (skip_leading_white_space)
while (isspace ((unsigned char) *p) && *p != '\n')
p++;
if (skip_white_space)
for (; *p != '\n'; p++)
if (! isspace ((unsigned char) *p))
{
if (! skip_leading_white_space)
p = line;
break;
}
if (newline - p != trivial_length
&& (! ignore_regexp.fastmap
|| re_search (&ignore_regexp, line, len, 0, len, 0) < 0))