Fixed Savannah bug #14535

2026-01-26 15:39:06 +00:00 · 2007-03-03 04:23:51 +00:00 · 2007-03-03 04:23:51 +00:00 · 404de03d73
commit 404de03d73
parent 62a35467dd
5 changed files with 107 additions and 78 deletions
--- a/28
+++ b/28
@ -1,3 +1,31 @@
+2007-03-03  James Youngman  <jay@gnu.org>
+
+	* import-gnulib.config (modules): Also use Gnulib modules
+	mbscasestr and mbsstr in order to perform correct string searching
+	in multibyte strings, in order to fix Savannah bug #14535.
+
+	* locate/testsuite/locate.gnu/sv-bug-14535.exp: new test case for
+	Savannah bug #14535.
+	
+	* locale/locate.c (visit_substring_match_nocasefold): Use mbsstr
+	rather than strstr, in order to correctly support multibyte
+	strings.  
+	(visit_substring_match_casefold): Use mbscasestr rather than
+	strstr in order to correctly support case-folding in a multibyte
+	environment (e.g. with UTF-8 characters outside the normal ASCII
+	range).  This fixes Savannah bug #14535.
+	(struct casefolder): No longer needed, removed
+	(visit_casefold): No longer needed, removed.
+	(lc_strcpy): No longer needed, removed.
+	(search_one_database): Removed redundant variable need_fold and
+	the code which used to set it.  It had controlled the adding of
+	the visitor function visit_casefold, but that function itself is
+	no longer required.  Also there is now no need to pass in a
+	lower-case argument to visit_substring_match_casefold, so don't
+	pass that in the context argument.
+
+	* locate/locate.c (usage): Fixed typo.
+	
 2007-03-01  James Youngman  <jay@gnu.org>

 	* doc/find.texi (Multiple Files): Document the construct
--- a/9
+++ b/9
@ -1,6 +1,15 @@
 GNU findutils NEWS - User visible changes.	-*- outline -*- (allout)
 * Major changes in release 4.3.3-CVS

+** Bug Fixes
+
+Savannah bug #14535: correctly support case-folding in locate (that
+is, "locate -i") for multibyte character environments such as UTF-8.
+Previously, if your search string contained a character which was
+outside the single-byte-encoding range for UTF-8 for example, then the
+case-folding behaviour failed to work and only exact matches would be
+returned.
+
 ** Functional changes

 The -printf action (and similar related actions) now support %S,
--- a/import-gnulib.config
+++ b/import-gnulib.config
@ -20,7 +20,7 @@ build-aux/texinfo.tex
 # Solaris which lack those functions.
 modules="\
 alloca argmatch dirname error fileblocks fnmatch-gnu fopen-safer fts \
-getline getopt human idcache lstat malloc memcmp memset mktime \
+getline getopt human idcache lstat malloc mbscasestr mbsstr memcmp memset mktime \
 modechange pathmax quotearg realloc regex rpmatch savedir \
 stpcpy strdup strftime  strstr strtol strtoul strtoull strtoumax  \
 xalloc xalloc-die xgetcwd  xstrtod xstrtol  xstrtoumax yesno human filemode \
--- a/locate/locate.c
+++ b/locate/locate.c
@ -85,13 +85,8 @@

 #define NDEBUG
 #include <assert.h>
-
-#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
 #include <string.h>
-#else
-#include <strings.h>
-#define strchr index
-#endif
+

 #ifdef STDC_HEADERS
 #include <stdlib.h>
@ -143,7 +138,7 @@ extern int errno;
 #include "regextype.h"
 #include "gnulib-version.h"

-/* Note that this evaluates C many times.  */
+/* Note that this evaluates Ch many times.  */
 #ifdef _LIBC
 # define TOUPPER(Ch) toupper (Ch)
 # define TOLOWER(Ch) tolower (Ch)
@ -316,17 +311,6 @@ locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
 }


-static void
-lc_strcpy(char *dest, const char *src)
-{
-  while (*src)
-    {
-      *dest++ = TOLOWER(*src);
-      ++src;
-    }
-  *dest = 0;
-}
-
 struct locate_limits
 {
  uintmax_t limit;
@ -356,12 +340,6 @@ struct stringbuf
 static struct stringbuf casebuf;


-struct casefolder
-{
-  const char *pattern;
-  struct stringbuf *pbuf;
-};
-
 struct regular_expression
 {
  struct re_pattern_buffer regex; /* for --regex */
@ -646,21 +624,6 @@ visit_basename(struct process_data *procdata, void *context)
 }


-static int
-visit_casefold(struct process_data *procdata, void *context)
-{
-  struct stringbuf *b = context;
-
-  if (*b->preqlen+1 > b->buffersize)
-    {
-      b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
-      b->buffersize = *b->preqlen+1;
-    }
-  lc_strcpy(b->buffer, procdata->munged_filename);
-
-  return VISIT_CONTINUE;
-}
-  
 /* visit_existing_follow implements -L -e */
 static int
 visit_existing_follow(struct process_data *procdata, void *context)
@ -754,7 +717,7 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context)
 {
  const char *pattern = context;

-  if (NULL != strstr(procdata->munged_filename, pattern))
+  if (NULL != mbsstr(procdata->munged_filename, pattern))
    return VISIT_ACCEPTED;
  else
    return VISIT_REJECTED;
@ -763,11 +726,9 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context)
 static int
 visit_substring_match_casefold(struct process_data *procdata, void *context)
 {
-  const struct casefolder * p = context;
-  const struct stringbuf * b = p->pbuf;
-  (void) procdata;
+  const char *pattern = context;

-  if (NULL != strstr(b->buffer, p->pattern))
+  if (NULL != mbscasestr(procdata->munged_filename, pattern))
    return VISIT_ACCEPTED;
  else
    return VISIT_REJECTED;
@ -1026,7 +987,6 @@ search_one_database (int argc,
 {
  char *pathpart; 		/* A pattern to consider. */
  int argn;			/* Index to current pattern in argv. */
-  int need_fold;	/* Set when folding and any pattern is non-glob. */
  int nread;		     /* number of bytes read from an entry. */
  struct process_data procdata;	/* Storage for data shared with visitors. */
  int slocate_seclevel;
@ -1161,24 +1121,6 @@ search_one_database (int argc,
  if (basename_only)
    add_visitor(visit_basename, NULL);
  
-  /* See if we need fold. */
-  if (ignore_case && !regex)
-    for ( argn = 0; argn < argc; argn++ )
-      {
-        pathpart = argv[argn];
-        if (!contains_metacharacter(pathpart))
-	  {
-	    need_fold = 1;
-	    break;
-	  }
-      }
-
-  if (need_fold)
-    {
-      add_visitor(visit_casefold, &casebuf);
-      casebuf.preqlen = &procdata.pathsize;
-    }
-  
  /* Add an inspector for each pattern we're looking for. */
  for ( argn = 0; argn < argc; argn++ )
    {
@ -1225,20 +1167,9 @@ search_one_database (int argc,
 	   * James Youngman <jay@gnu.org> 
 	   */
 	  if (ignore_case)
-	    {
-	      struct casefolder * cf = xmalloc(sizeof(*cf));
-	      cf->pattern = pathpart;
-	      cf->pbuf = &casebuf;
-	      add_visitor(visit_substring_match_casefold, cf);
-	      /* If we ignore case, convert it to lower now so we don't have to
-	       * do it every time
-	       */
-	      lc_strcpy(pathpart, pathpart);
-	    }
+	    add_visitor(visit_substring_match_casefold, pathpart);
 	  else
-	    {
-	      add_visitor(visit_substring_match_nocasefold, pathpart);
-	    }
+	    add_visitor(visit_substring_match_nocasefold, pathpart);
 	}
    }

@ -1352,7 +1283,7 @@ Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
      [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
      [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
      [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
-      [--max-database-age D] [-version] [--help]\n\
+      [--max-database-age D] [--version] [--help]\n\
      pattern...\n"),
 	   program_name);
  fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
--- a/locate/testsuite/locate.gnu/sv-bug-14535.exp
+++ b/locate/testsuite/locate.gnu/sv-bug-14535.exp
@ -0,0 +1,61 @@
+# NOTE: this file contains UTF8 characters.
+
+
+proc selectlocale { pattern } {
+    if [ catch { set locale_list [ split [ eval exec locale -a ] "\n" ] } ] {
+	# Failed to figure out which tests are supported.
+	return ""
+    }
+    foreach locale $locale_list {
+	if { [string match $pattern $locale] } {
+	    global env
+	    set env(LC_ALL) $locale
+	    return $locale
+	}
+    }
+    return ""
+}
+
+proc select_any_locale { patternlist } {
+    foreach pattern $patternlist {
+	set locale [ selectlocale $pattern ]
+	if { [ string length $locale ] } {
+	    return $locale
+	}
+    }
+    return ""
+}
+
+# localeoptions contains a list (in order of preference) of the
+# locales in which we want to perform part of this test.  If we can
+# use any locale matching any of the patterns, we run an extra four
+# tests.  Otherwise, we skip them and issue a warning message.
+set localeoptions { 
+    "hu_HU.UTF-8"
+    "hu_*.UTF-8" 
+    "en_IE.utf8"
+    "en_GB.utf8"
+    "en_US.utf8"
+    "en_*.utf8"
+    "*.utf8"
+} 
+
+
+
+# Do the regular case-fold tests which only need ASCII support.
+locate_textonly p  a "teste\n" "-i teste" "teste\n"
+locate_textonly p  b "testE\n" "-i testE" "testE\n"
+locate_textonly p  c "testE\n" "-i teste" "testE\n"
+locate_textonly p  d "teste\n" "-i testE" "teste\n"
+
+set locale [ select_any_locale $localeoptions ]
+if { [ string length $locale ] } {
+    # We have a UTF-8 locale.  Do the extra tests.
+    locate_textonly p  0 "testé\n" "-i testé" "testé\n"
+    locate_textonly p  1 "testÉ\n" "-i testé" "testÉ\n"
+    locate_textonly p  2 "testé\n" "-i testÉ" "testé\n"
+    locate_textonly p  3 "testÉ\n" "-i testÉ" "testÉ\n"
+} else {
+    warning "Four tests have been skipped because I cannot find a UTF-8 locale configured on your system"
+}
+