Fixed Savannah bug #14535

This commit is contained in:
James Youngman 2007-03-03 04:23:51 +00:00
parent 62a35467dd
commit 404de03d73
5 changed files with 107 additions and 78 deletions

View File

@ -1,3 +1,31 @@
2007-03-03 James Youngman <jay@gnu.org>
* import-gnulib.config (modules): Also use Gnulib modules
mbscasestr and mbsstr in order to perform correct string searching
in multibyte strings, in order to fix Savannah bug #14535.
* locate/testsuite/locate.gnu/sv-bug-14535.exp: new test case for
Savannah bug #14535.
* locale/locate.c (visit_substring_match_nocasefold): Use mbsstr
rather than strstr, in order to correctly support multibyte
strings.
(visit_substring_match_casefold): Use mbscasestr rather than
strstr in order to correctly support case-folding in a multibyte
environment (e.g. with UTF-8 characters outside the normal ASCII
range). This fixes Savannah bug #14535.
(struct casefolder): No longer needed, removed
(visit_casefold): No longer needed, removed.
(lc_strcpy): No longer needed, removed.
(search_one_database): Removed redundant variable need_fold and
the code which used to set it. It had controlled the adding of
the visitor function visit_casefold, but that function itself is
no longer required. Also there is now no need to pass in a
lower-case argument to visit_substring_match_casefold, so don't
pass that in the context argument.
* locate/locate.c (usage): Fixed typo.
2007-03-01 James Youngman <jay@gnu.org>
* doc/find.texi (Multiple Files): Document the construct

9
NEWS
View File

@ -1,6 +1,15 @@
GNU findutils NEWS - User visible changes. -*- outline -*- (allout)
* Major changes in release 4.3.3-CVS
** Bug Fixes
Savannah bug #14535: correctly support case-folding in locate (that
is, "locate -i") for multibyte character environments such as UTF-8.
Previously, if your search string contained a character which was
outside the single-byte-encoding range for UTF-8 for example, then the
case-folding behaviour failed to work and only exact matches would be
returned.
** Functional changes
The -printf action (and similar related actions) now support %S,

View File

@ -20,7 +20,7 @@ build-aux/texinfo.tex
# Solaris which lack those functions.
modules="\
alloca argmatch dirname error fileblocks fnmatch-gnu fopen-safer fts \
getline getopt human idcache lstat malloc memcmp memset mktime \
getline getopt human idcache lstat malloc mbscasestr mbsstr memcmp memset mktime \
modechange pathmax quotearg realloc regex rpmatch savedir \
stpcpy strdup strftime strstr strtol strtoul strtoull strtoumax \
xalloc xalloc-die xgetcwd xstrtod xstrtol xstrtoumax yesno human filemode \

View File

@ -85,13 +85,8 @@
#define NDEBUG
#include <assert.h>
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
#else
#include <strings.h>
#define strchr index
#endif
#ifdef STDC_HEADERS
#include <stdlib.h>
@ -143,7 +138,7 @@ extern int errno;
#include "regextype.h"
#include "gnulib-version.h"
/* Note that this evaluates C many times. */
/* Note that this evaluates Ch many times. */
#ifdef _LIBC
# define TOUPPER(Ch) toupper (Ch)
# define TOLOWER(Ch) tolower (Ch)
@ -316,17 +311,6 @@ locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
}
static void
lc_strcpy(char *dest, const char *src)
{
while (*src)
{
*dest++ = TOLOWER(*src);
++src;
}
*dest = 0;
}
struct locate_limits
{
uintmax_t limit;
@ -356,12 +340,6 @@ struct stringbuf
static struct stringbuf casebuf;
struct casefolder
{
const char *pattern;
struct stringbuf *pbuf;
};
struct regular_expression
{
struct re_pattern_buffer regex; /* for --regex */
@ -646,21 +624,6 @@ visit_basename(struct process_data *procdata, void *context)
}
static int
visit_casefold(struct process_data *procdata, void *context)
{
struct stringbuf *b = context;
if (*b->preqlen+1 > b->buffersize)
{
b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
b->buffersize = *b->preqlen+1;
}
lc_strcpy(b->buffer, procdata->munged_filename);
return VISIT_CONTINUE;
}
/* visit_existing_follow implements -L -e */
static int
visit_existing_follow(struct process_data *procdata, void *context)
@ -754,7 +717,7 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context)
{
const char *pattern = context;
if (NULL != strstr(procdata->munged_filename, pattern))
if (NULL != mbsstr(procdata->munged_filename, pattern))
return VISIT_ACCEPTED;
else
return VISIT_REJECTED;
@ -763,11 +726,9 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context)
static int
visit_substring_match_casefold(struct process_data *procdata, void *context)
{
const struct casefolder * p = context;
const struct stringbuf * b = p->pbuf;
(void) procdata;
const char *pattern = context;
if (NULL != strstr(b->buffer, p->pattern))
if (NULL != mbscasestr(procdata->munged_filename, pattern))
return VISIT_ACCEPTED;
else
return VISIT_REJECTED;
@ -1026,7 +987,6 @@ search_one_database (int argc,
{
char *pathpart; /* A pattern to consider. */
int argn; /* Index to current pattern in argv. */
int need_fold; /* Set when folding and any pattern is non-glob. */
int nread; /* number of bytes read from an entry. */
struct process_data procdata; /* Storage for data shared with visitors. */
int slocate_seclevel;
@ -1161,24 +1121,6 @@ search_one_database (int argc,
if (basename_only)
add_visitor(visit_basename, NULL);
/* See if we need fold. */
if (ignore_case && !regex)
for ( argn = 0; argn < argc; argn++ )
{
pathpart = argv[argn];
if (!contains_metacharacter(pathpart))
{
need_fold = 1;
break;
}
}
if (need_fold)
{
add_visitor(visit_casefold, &casebuf);
casebuf.preqlen = &procdata.pathsize;
}
/* Add an inspector for each pattern we're looking for. */
for ( argn = 0; argn < argc; argn++ )
{
@ -1225,20 +1167,9 @@ search_one_database (int argc,
* James Youngman <jay@gnu.org>
*/
if (ignore_case)
{
struct casefolder * cf = xmalloc(sizeof(*cf));
cf->pattern = pathpart;
cf->pbuf = &casebuf;
add_visitor(visit_substring_match_casefold, cf);
/* If we ignore case, convert it to lower now so we don't have to
* do it every time
*/
lc_strcpy(pathpart, pathpart);
}
add_visitor(visit_substring_match_casefold, pathpart);
else
{
add_visitor(visit_substring_match_nocasefold, pathpart);
}
add_visitor(visit_substring_match_nocasefold, pathpart);
}
}
@ -1352,7 +1283,7 @@ Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
[--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
[-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
[-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
[--max-database-age D] [-version] [--help]\n\
[--max-database-age D] [--version] [--help]\n\
pattern...\n"),
program_name);
fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);

View File

@ -0,0 +1,61 @@
# NOTE: this file contains UTF8 characters.
proc selectlocale { pattern } {
if [ catch { set locale_list [ split [ eval exec locale -a ] "\n" ] } ] {
# Failed to figure out which tests are supported.
return ""
}
foreach locale $locale_list {
if { [string match $pattern $locale] } {
global env
set env(LC_ALL) $locale
return $locale
}
}
return ""
}
proc select_any_locale { patternlist } {
foreach pattern $patternlist {
set locale [ selectlocale $pattern ]
if { [ string length $locale ] } {
return $locale
}
}
return ""
}
# localeoptions contains a list (in order of preference) of the
# locales in which we want to perform part of this test. If we can
# use any locale matching any of the patterns, we run an extra four
# tests. Otherwise, we skip them and issue a warning message.
set localeoptions {
"hu_HU.UTF-8"
"hu_*.UTF-8"
"en_IE.utf8"
"en_GB.utf8"
"en_US.utf8"
"en_*.utf8"
"*.utf8"
}
# Do the regular case-fold tests which only need ASCII support.
locate_textonly p a "teste\n" "-i teste" "teste\n"
locate_textonly p b "testE\n" "-i testE" "testE\n"
locate_textonly p c "testE\n" "-i teste" "testE\n"
locate_textonly p d "teste\n" "-i testE" "teste\n"
set locale [ select_any_locale $localeoptions ]
if { [ string length $locale ] } {
# We have a UTF-8 locale. Do the extra tests.
locate_textonly p 0 "testé\n" "-i testé" "testé\n"
locate_textonly p 1 "testÉ\n" "-i testé" "testÉ\n"
locate_textonly p 2 "testé\n" "-i testÉ" "testé\n"
locate_textonly p 3 "testÉ\n" "-i testÉ" "testÉ\n"
} else {
warning "Four tests have been skipped because I cannot find a UTF-8 locale configured on your system"
}