diff, sdiff: new option --ignore-trailing-space (-Z)

Derived from Roland McGrath's patch (dated June 2004!) in: http://lists.gnu.org/archive/html/bug-gnu-utils/2004-07/msg00000.html * NEWS: * doc/diffutils.texi (White Space, Blank Lines) (sdiff Option Summary, diff Options, sdiff Options): Document -Z. * src/diff.h (IGNORE_TRAILING_SPACE) (IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE): New constants, for -Z. * src/diff.c (shortopts, longopts, main, option_help_msgid): * src/sdiff.c (longopts, option_help_msgid, main): * src/io.c (find_and_hash_each_line): * src/util.c (lines_differ, analyze_hunk): Support -Z.
2026-01-27 01:44:20 +00:00 · 2011-08-14 14:37:01 -07:00 · 2011-08-14 14:37:01 -07:00 · 86a40dd6ac
commit 86a40dd6ac
parent efb0557f4c
7 changed files with 171 additions and 61 deletions
--- a/4
+++ b/4
@ -7,6 +7,10 @@ GNU diffutils NEWS                                    -*- outline -*-
  --ignore-file-name-case now applies at the top level too.
  For example, "diff dir inIt" might compare "dir/Init" to "inIt".

+** New features
+
+  diff and sdiff have a new option --ignore-trailing-space (-Z).
+
 * Noteworthy changes in release 3.1 (2011-08-10) [stable]

 ** Bug fixes
--- a/doc/diffutils.texi
+++ b/doc/diffutils.texi
@ -289,7 +289,11 @@ The @option{--ignore-tab-expansion} (@option{-E}) option ignores the
 distinction between tabs and spaces on input.  A tab is considered to be
 equivalent to the number of spaces to the next tab stop (@pxref{Tabs}).

-The @option{--ignore-space-change} (@option{-b}) option is stronger.
+The @option{--ignore-trailing-space} (@option{-Z}) option ignores white
+space at line end.
+
+The @option{--ignore-space-change} (@option{-b}) option is stronger than
+@option{-E} and @option{-Z} combined.
 It ignores white space at line end, and considers all other sequences of
 one or more white space characters within a line to be equivalent.  With this
 option, @command{diff} considers the following two lines to be equivalent,
@ -344,12 +348,11 @@ is considered identical to a file containing
@end example

 Normally this option affects only lines that are completely empty, but
-if you also specify the @option{--ignore-space-change} (@option{-b})
-option, or the @option{--ignore-all-space} (@option{-w}) option,
+if you also specify an option that ignores trailing spaces,
 lines are also affected if they look empty but contain white space.
 In other words, @option{-B} is equivalent to @samp{-I '^$'} by
 default, but it is equivalent to @option{-I '^[[:space:]]*$'} if
-@option{-b} or @option{-w} is also specified.
+@option{-b}, @option{-w} or @option{-Z} is also specified.

@node Specified Lines
@section Suppressing Differences Whose Lines All Match a Regular Expression
@ -2446,12 +2449,12 @@ The following @command{sdiff} options have the same meaning as for

@example
 -a -b -d -i -t -v
-B -E -I @var{regexp}
+-B -E -I @var{regexp} -Z

 --expand-tabs
 --ignore-blank-lines  --ignore-case
 --ignore-matching-lines=@var{regexp}  --ignore-space-change
--ignore-tab-expansion
+--ignore-tab-expansion  --ignore-trailing-space
 --left-column  --minimal  --speed-large-files
 --strip-trailing-cr  --suppress-common-lines
 --tabsize=@var{columns}  --text  --version  --width=@var{columns}
@ -3966,6 +3969,10 @@ match any pattern contained in @var{file}.  @xref{Comparing Directories}.
@item -y
@itemx --side-by-side
 Use the side by side output format.  @xref{Side by Side Format}.
+
+@item -Z
+@itemx --ignore-trailing-space
+Ignore white space at line end.  @xref{White Space}.
@end table

@node Invoking diff3
@ -4446,6 +4453,10 @@ option is @option{-W} in @command{diff}, @option{-w} in @command{sdiff}.
 Ignore white space when comparing lines.  @xref{White Space}.
 Note that for historical reasons, this option is @option{-w} in @command{diff},
@option{-W} in @command{sdiff}.
+
+@item -Z
+@itemx --ignore-trailing-space
+Ignore white space at line end.  @xref{White Space}.
@end table

@node Standards conformance
--- a/src/diff.c
+++ b/src/diff.c
@ -107,7 +107,7 @@ static bool unidirectional_new_file;
 static bool report_identical_files;

 static char const shortopts[] =
-"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
+"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";

 /* Values for long options that do not have single-letter equivalents.  */
 enum
@ -178,6 +178,7 @@ static struct option const longopts[] =
  {"ignore-matching-lines", 1, 0, 'I'},
  {"ignore-space-change", 0, 0, 'b'},
  {"ignore-tab-expansion", 0, 0, 'E'},
+  {"ignore-trailing-space", 0, 0, 'Z'},
  {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
  {"initial-tab", 0, 0, 'T'},
  {"label", 1, 0, 'L'},
@ -320,6 +321,11 @@ main (int argc, char **argv)
 	    ignore_white_space = IGNORE_SPACE_CHANGE;
 	  break;

+	case 'Z':
+	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
+	    ignore_white_space |= IGNORE_TRAILING_SPACE;
+	  break;
+
 	case 'B':
 	  ignore_blank_lines = true;
 	  break;
@ -381,8 +387,8 @@ main (int argc, char **argv)
 	  break;

 	case 'E':
-	  if (ignore_white_space < IGNORE_TAB_EXPANSION)
-	    ignore_white_space = IGNORE_TAB_EXPANSION;
+	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
+	    ignore_white_space |= IGNORE_TAB_EXPANSION;
 	  break;

 	case 'f':
@ -880,6 +886,7 @@ static char const * const option_help_msgid[] = {
  "",
  N_("-i, --ignore-case               ignore case differences in file contents"),
  N_("-E, --ignore-tab-expansion      ignore changes due to tab expansion"),
+  N_("-Z, --ignore-trailing-space     ignore white space at line end"),
  N_("-b, --ignore-space-change       ignore changes in the amount of white space"),
  N_("-w, --ignore-all-space          ignore all white space"),
  N_("-B, --ignore-blank-lines        ignore changes whose lines are all blank"),
--- a/src/diff.h
+++ b/src/diff.h
@ -106,6 +106,14 @@ enum DIFF_white_space
  /* Ignore changes due to tab expansion (-E).  */
  IGNORE_TAB_EXPANSION,

+  /* Ignore changes in trailing horizontal white space (-Z).  */
+  IGNORE_TRAILING_SPACE,
+
+  /* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case
+     because they are independent and can be ORed together, yielding
+     IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE.  */
+  IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE,
+
  /* Ignore changes in horizontal white space (-b).  */
  IGNORE_SPACE_CHANGE,

--- a/src/io.c
+++ b/src/io.c
@ -255,36 +255,53 @@ find_and_hash_each_line (struct file_data *current)
 	    break;

 	  case IGNORE_TAB_EXPANSION:
+	  case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+	  case IGNORE_TRAILING_SPACE:
 	    {
 	      size_t column = 0;
 	      while ((c = *p++) != '\n')
 		{
-		  size_t repetitions = 1;
-
-		  switch (c)
+		  if (ignore_white_space & IGNORE_TRAILING_SPACE
+		      && isspace (c))
 		    {
-		    case '\b':
-		      column -= 0 < column;
-		      break;
-
-		    case '\t':
-		      c = ' ';
-		      repetitions = tabsize - column % tabsize;
-		      column = (column + repetitions < column
-				? 0
-				: column + repetitions);
-		      break;
-
-		    case '\r':
-		      column = 0;
-		      break;
-
-		    default:
-		      c = tolower (c);
-		      column++;
-		      break;
+		      char const *p1 = p;
+		      unsigned char c1;
+		      do
+			if ((c1 = *p1++) == '\n')
+			  {
+			    p = p1;
+			    goto hashing_done;
+			  }
+		      while (isspace (c1));
 		    }

+		  size_t repetitions = 1;
+
+		  if (ignore_white_space & IGNORE_TAB_EXPANSION)
+		    switch (c)
+		      {
+		      case '\b':
+			column -= 0 < column;
+			break;
+
+		      case '\t':
+			c = ' ';
+			repetitions = tabsize - column % tabsize;
+			column = (column + repetitions < column
+				  ? 0
+				  : column + repetitions);
+			break;
+
+		      case '\r':
+			column = 0;
+			break;
+
+		      default:
+			column++;
+			break;
+		      }
+
+		  c = tolower (c);
 		  do
 		    h = HASH (h, c);
 		  while (--repetitions != 0);
@ -325,35 +342,52 @@ find_and_hash_each_line (struct file_data *current)
 	    break;

 	  case IGNORE_TAB_EXPANSION:
+	  case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+	  case IGNORE_TRAILING_SPACE:
 	    {
 	      size_t column = 0;
 	      while ((c = *p++) != '\n')
 		{
 		  size_t repetitions = 1;

-		  switch (c)
+		  if (ignore_white_space & IGNORE_TRAILING_SPACE
+		      && isspace (c))
 		    {
-		    case '\b':
-		      column -= 0 < column;
-		      break;
-
-		    case '\t':
-		      c = ' ';
-		      repetitions = tabsize - column % tabsize;
-		      column = (column + repetitions < column
-				? 0
-				: column + repetitions);
-		      break;
-
-		    case '\r':
-		      column = 0;
-		      break;
-
-		    default:
-		      column++;
-		      break;
+		      char const *p1 = p;
+		      unsigned char c1;
+		      do
+			if ((c1 = *p1++) == '\n')
+			  {
+			    p = p1;
+			    goto hashing_done;
+			  }
+		      while (isspace (c1));
 		    }

+		  if (ignore_white_space & IGNORE_TAB_EXPANSION)
+		    switch (c)
+		      {
+		      case '\b':
+			column -= 0 < column;
+			break;
+
+		      case '\t':
+			c = ' ';
+			repetitions = tabsize - column % tabsize;
+			column = (column + repetitions < column
+				  ? 0
+				  : column + repetitions);
+			break;
+
+		      case '\r':
+			column = 0;
+			break;
+
+		      default:
+			column++;
+			break;
+		      }
+
 		  do
 		    h = HASH (h, c);
 		  while (--repetitions != 0);
@ -381,7 +415,7 @@ find_and_hash_each_line (struct file_data *current)
 	     complete line, put it into buckets[-1] so that it can
 	     compare equal only to the other file's incomplete line
 	     (if one exists).  */
-	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
+	  if (ignore_white_space < IGNORE_TRAILING_SPACE)
 	    bucket = &buckets[-1];
 	}

--- a/src/sdiff.c
+++ b/src/sdiff.c
@ -132,6 +132,7 @@ static struct option const longopts[] =
  {"ignore-matching-lines", 1, 0, 'I'},
  {"ignore-space-change", 0, 0, 'b'},
  {"ignore-tab-expansion", 0, 0, 'E'},
+  {"ignore-trailing-space", 0, 0, 'Z'},
  {"left-column", 0, 0, 'l'},
  {"minimal", 0, 0, 'd'},
  {"output", 1, 0, 'o'},
@ -170,6 +171,7 @@ static char const * const option_help_msgid[] = {
  "",
  N_("-i, --ignore-case            consider upper- and lower-case to be the same"),
  N_("-E, --ignore-tab-expansion   ignore changes due to tab expansion"),
+  N_("-Z, --ignore-trailing-space  ignore white space at line end"),
  N_("-b, --ignore-space-change    ignore changes in the amount of white space"),
  N_("-W, --ignore-all-space       ignore all white space"),
  N_("-B, --ignore-blank-lines     ignore changes whose lines are all blank"),
@ -458,7 +460,7 @@ main (int argc, char *argv[])
  diffarg (DEFAULT_DIFF_PROGRAM);

  /* parse command line args */
-  while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:W", longopts, 0))
+  while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:WZ", longopts, 0))
 	 != -1)
    {
      switch (opt)
@ -527,6 +529,10 @@ main (int argc, char *argv[])
 	  diffarg ("-w");
 	  break;

+	case 'Z':
+	  diffarg ("-Z");
+	  break;
+
 	case DIFF_PROGRAM_OPTION:
 	  diffargv[0] = optarg;
 	  break;
--- a/src/util.c
+++ b/src/util.c
@ -395,6 +395,33 @@ lines_differ (char const *s1, char const *s2)

 	      break;

+	    case IGNORE_TRAILING_SPACE:
+	    case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+	      if (isspace (c1) && isspace (c2))
+		{
+		  unsigned char c;
+		  if (c1 != '\n')
+		    {
+		      char const *p = t1;
+		      while ((c = *p) != '\n' && isspace (c))
+			++p;
+		      if (c != '\n')
+			break;
+		    }
+		  if (c2 != '\n')
+		    {
+		      char const *p = t2;
+		      while ((c = *p) != '\n' && isspace (c))
+			++p;
+		      if (c != '\n')
+			break;
+		    }
+		  /* Both lines have nothing but whitespace left.  */
+		  return false;
+		}
+	      if (ignore_white_space == IGNORE_TRAILING_SPACE)
+		break;
+	      /* Fall through.  */
 	    case IGNORE_TAB_EXPANSION:
 	      if ((c1 == ' ' && c2 == '\t')
 		  || (c1 == '\t' && c2 == ' '))
@ -674,8 +701,11 @@ analyze_hunk (struct change *hunk,
  size_t trivial_length = ignore_blank_lines - 1;
    /* If 0, ignore zero-length lines;
       if SIZE_MAX, do not ignore lines just because of their length.  */
+
+  bool skip_white_space =
+    ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space;
  bool skip_leading_white_space =
-    (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
+    skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space;

  char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
  char const * const *linbuf1 = files[1].linbuf;
@ -699,9 +729,14 @@ analyze_hunk (struct change *hunk,
 	  char const *newline = linbuf0[i + 1] - 1;
 	  size_t len = newline - line;
 	  char const *p = line;
-	  if (skip_leading_white_space)
-	    while (isspace ((unsigned char) *p) && *p != '\n')
-	      p++;
+	  if (skip_white_space)
+	    for (; *p != '\n'; p++)
+	      if (! isspace ((unsigned char) *p))
+		{
+		  if (! skip_leading_white_space)
+		    p = line;
+		  break;
+		}
 	  if (newline - p != trivial_length
 	      && (! ignore_regexp.fastmap
 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
@ -714,9 +749,14 @@ analyze_hunk (struct change *hunk,
 	  char const *newline = linbuf1[i + 1] - 1;
 	  size_t len = newline - line;
 	  char const *p = line;
-	  if (skip_leading_white_space)
-	    while (isspace ((unsigned char) *p) && *p != '\n')
-	      p++;
+	  if (skip_white_space)
+	    for (; *p != '\n'; p++)
+	      if (! isspace ((unsigned char) *p))
+		{
+		  if (! skip_leading_white_space)
+		    p = line;
+		  break;
+		}
 	  if (newline - p != trivial_length
 	      && (! ignore_regexp.fastmap
 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))