diff --git a/ChangeLog b/ChangeLog index 8232b3e0..48cf2018 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +2007-03-03 James Youngman + + * import-gnulib.config (modules): Also use Gnulib modules + mbscasestr and mbsstr in order to perform correct string searching + in multibyte strings, in order to fix Savannah bug #14535. + + * locate/testsuite/locate.gnu/sv-bug-14535.exp: new test case for + Savannah bug #14535. + + * locale/locate.c (visit_substring_match_nocasefold): Use mbsstr + rather than strstr, in order to correctly support multibyte + strings. + (visit_substring_match_casefold): Use mbscasestr rather than + strstr in order to correctly support case-folding in a multibyte + environment (e.g. with UTF-8 characters outside the normal ASCII + range). This fixes Savannah bug #14535. + (struct casefolder): No longer needed, removed + (visit_casefold): No longer needed, removed. + (lc_strcpy): No longer needed, removed. + (search_one_database): Removed redundant variable need_fold and + the code which used to set it. It had controlled the adding of + the visitor function visit_casefold, but that function itself is + no longer required. Also there is now no need to pass in a + lower-case argument to visit_substring_match_casefold, so don't + pass that in the context argument. + + * locate/locate.c (usage): Fixed typo. + 2007-03-01 James Youngman * doc/find.texi (Multiple Files): Document the construct diff --git a/NEWS b/NEWS index 30e01c61..15e5a27f 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,15 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout) * Major changes in release 4.3.3-CVS +** Bug Fixes + +Savannah bug #14535: correctly support case-folding in locate (that +is, "locate -i") for multibyte character environments such as UTF-8. +Previously, if your search string contained a character which was +outside the single-byte-encoding range for UTF-8 for example, then the +case-folding behaviour failed to work and only exact matches would be +returned. + ** Functional changes The -printf action (and similar related actions) now support %S, diff --git a/import-gnulib.config b/import-gnulib.config index ba74bd61..60b57d25 100644 --- a/import-gnulib.config +++ b/import-gnulib.config @@ -20,7 +20,7 @@ build-aux/texinfo.tex # Solaris which lack those functions. modules="\ alloca argmatch dirname error fileblocks fnmatch-gnu fopen-safer fts \ -getline getopt human idcache lstat malloc memcmp memset mktime \ +getline getopt human idcache lstat malloc mbscasestr mbsstr memcmp memset mktime \ modechange pathmax quotearg realloc regex rpmatch savedir \ stpcpy strdup strftime strstr strtol strtoul strtoull strtoumax \ xalloc xalloc-die xgetcwd xstrtod xstrtol xstrtoumax yesno human filemode \ diff --git a/locate/locate.c b/locate/locate.c index 459239de..89541140 100644 --- a/locate/locate.c +++ b/locate/locate.c @@ -85,13 +85,8 @@ #define NDEBUG #include - -#if defined(HAVE_STRING_H) || defined(STDC_HEADERS) #include -#else -#include -#define strchr index -#endif + #ifdef STDC_HEADERS #include @@ -143,7 +138,7 @@ extern int errno; #include "regextype.h" #include "gnulib-version.h" -/* Note that this evaluates C many times. */ +/* Note that this evaluates Ch many times. */ #ifdef _LIBC # define TOUPPER(Ch) toupper (Ch) # define TOLOWER(Ch) tolower (Ch) @@ -316,17 +311,6 @@ locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs) } -static void -lc_strcpy(char *dest, const char *src) -{ - while (*src) - { - *dest++ = TOLOWER(*src); - ++src; - } - *dest = 0; -} - struct locate_limits { uintmax_t limit; @@ -356,12 +340,6 @@ struct stringbuf static struct stringbuf casebuf; -struct casefolder -{ - const char *pattern; - struct stringbuf *pbuf; -}; - struct regular_expression { struct re_pattern_buffer regex; /* for --regex */ @@ -646,21 +624,6 @@ visit_basename(struct process_data *procdata, void *context) } -static int -visit_casefold(struct process_data *procdata, void *context) -{ - struct stringbuf *b = context; - - if (*b->preqlen+1 > b->buffersize) - { - b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */ - b->buffersize = *b->preqlen+1; - } - lc_strcpy(b->buffer, procdata->munged_filename); - - return VISIT_CONTINUE; -} - /* visit_existing_follow implements -L -e */ static int visit_existing_follow(struct process_data *procdata, void *context) @@ -754,7 +717,7 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context) { const char *pattern = context; - if (NULL != strstr(procdata->munged_filename, pattern)) + if (NULL != mbsstr(procdata->munged_filename, pattern)) return VISIT_ACCEPTED; else return VISIT_REJECTED; @@ -763,11 +726,9 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context) static int visit_substring_match_casefold(struct process_data *procdata, void *context) { - const struct casefolder * p = context; - const struct stringbuf * b = p->pbuf; - (void) procdata; + const char *pattern = context; - if (NULL != strstr(b->buffer, p->pattern)) + if (NULL != mbscasestr(procdata->munged_filename, pattern)) return VISIT_ACCEPTED; else return VISIT_REJECTED; @@ -1026,7 +987,6 @@ search_one_database (int argc, { char *pathpart; /* A pattern to consider. */ int argn; /* Index to current pattern in argv. */ - int need_fold; /* Set when folding and any pattern is non-glob. */ int nread; /* number of bytes read from an entry. */ struct process_data procdata; /* Storage for data shared with visitors. */ int slocate_seclevel; @@ -1161,24 +1121,6 @@ search_one_database (int argc, if (basename_only) add_visitor(visit_basename, NULL); - /* See if we need fold. */ - if (ignore_case && !regex) - for ( argn = 0; argn < argc; argn++ ) - { - pathpart = argv[argn]; - if (!contains_metacharacter(pathpart)) - { - need_fold = 1; - break; - } - } - - if (need_fold) - { - add_visitor(visit_casefold, &casebuf); - casebuf.preqlen = &procdata.pathsize; - } - /* Add an inspector for each pattern we're looking for. */ for ( argn = 0; argn < argc; argn++ ) { @@ -1225,20 +1167,9 @@ search_one_database (int argc, * James Youngman */ if (ignore_case) - { - struct casefolder * cf = xmalloc(sizeof(*cf)); - cf->pattern = pathpart; - cf->pbuf = &casebuf; - add_visitor(visit_substring_match_casefold, cf); - /* If we ignore case, convert it to lower now so we don't have to - * do it every time - */ - lc_strcpy(pathpart, pathpart); - } + add_visitor(visit_substring_match_casefold, pathpart); else - { - add_visitor(visit_substring_match_nocasefold, pathpart); - } + add_visitor(visit_substring_match_nocasefold, pathpart); } } @@ -1352,7 +1283,7 @@ Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\ [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\ [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\ [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\ - [--max-database-age D] [-version] [--help]\n\ + [--max-database-age D] [--version] [--help]\n\ pattern...\n"), program_name); fputs (_("\nReport bugs to .\n"), stream); diff --git a/locate/testsuite/locate.gnu/sv-bug-14535.exp b/locate/testsuite/locate.gnu/sv-bug-14535.exp new file mode 100644 index 00000000..cc86b183 --- /dev/null +++ b/locate/testsuite/locate.gnu/sv-bug-14535.exp @@ -0,0 +1,61 @@ +# NOTE: this file contains UTF8 characters. + + +proc selectlocale { pattern } { + if [ catch { set locale_list [ split [ eval exec locale -a ] "\n" ] } ] { + # Failed to figure out which tests are supported. + return "" + } + foreach locale $locale_list { + if { [string match $pattern $locale] } { + global env + set env(LC_ALL) $locale + return $locale + } + } + return "" +} + +proc select_any_locale { patternlist } { + foreach pattern $patternlist { + set locale [ selectlocale $pattern ] + if { [ string length $locale ] } { + return $locale + } + } + return "" +} + +# localeoptions contains a list (in order of preference) of the +# locales in which we want to perform part of this test. If we can +# use any locale matching any of the patterns, we run an extra four +# tests. Otherwise, we skip them and issue a warning message. +set localeoptions { + "hu_HU.UTF-8" + "hu_*.UTF-8" + "en_IE.utf8" + "en_GB.utf8" + "en_US.utf8" + "en_*.utf8" + "*.utf8" +} + + + +# Do the regular case-fold tests which only need ASCII support. +locate_textonly p a "teste\n" "-i teste" "teste\n" +locate_textonly p b "testE\n" "-i testE" "testE\n" +locate_textonly p c "testE\n" "-i teste" "testE\n" +locate_textonly p d "teste\n" "-i testE" "teste\n" + +set locale [ select_any_locale $localeoptions ] +if { [ string length $locale ] } { + # We have a UTF-8 locale. Do the extra tests. + locate_textonly p 0 "testé\n" "-i testé" "testé\n" + locate_textonly p 1 "testÉ\n" "-i testé" "testÉ\n" + locate_textonly p 2 "testé\n" "-i testÉ" "testé\n" + locate_textonly p 3 "testÉ\n" "-i testÉ" "testÉ\n" +} else { + warning "Four tests have been skipped because I cannot find a UTF-8 locale configured on your system" +} +