mirror of
https://https.git.savannah.gnu.org/git/findutils.git
synced 2026-01-26 15:39:06 +00:00
Fixed Savannah bug #14535
This commit is contained in:
parent
62a35467dd
commit
404de03d73
28
ChangeLog
28
ChangeLog
@ -1,3 +1,31 @@
|
||||
2007-03-03 James Youngman <jay@gnu.org>
|
||||
|
||||
* import-gnulib.config (modules): Also use Gnulib modules
|
||||
mbscasestr and mbsstr in order to perform correct string searching
|
||||
in multibyte strings, in order to fix Savannah bug #14535.
|
||||
|
||||
* locate/testsuite/locate.gnu/sv-bug-14535.exp: new test case for
|
||||
Savannah bug #14535.
|
||||
|
||||
* locale/locate.c (visit_substring_match_nocasefold): Use mbsstr
|
||||
rather than strstr, in order to correctly support multibyte
|
||||
strings.
|
||||
(visit_substring_match_casefold): Use mbscasestr rather than
|
||||
strstr in order to correctly support case-folding in a multibyte
|
||||
environment (e.g. with UTF-8 characters outside the normal ASCII
|
||||
range). This fixes Savannah bug #14535.
|
||||
(struct casefolder): No longer needed, removed
|
||||
(visit_casefold): No longer needed, removed.
|
||||
(lc_strcpy): No longer needed, removed.
|
||||
(search_one_database): Removed redundant variable need_fold and
|
||||
the code which used to set it. It had controlled the adding of
|
||||
the visitor function visit_casefold, but that function itself is
|
||||
no longer required. Also there is now no need to pass in a
|
||||
lower-case argument to visit_substring_match_casefold, so don't
|
||||
pass that in the context argument.
|
||||
|
||||
* locate/locate.c (usage): Fixed typo.
|
||||
|
||||
2007-03-01 James Youngman <jay@gnu.org>
|
||||
|
||||
* doc/find.texi (Multiple Files): Document the construct
|
||||
|
||||
9
NEWS
9
NEWS
@ -1,6 +1,15 @@
|
||||
GNU findutils NEWS - User visible changes. -*- outline -*- (allout)
|
||||
* Major changes in release 4.3.3-CVS
|
||||
|
||||
** Bug Fixes
|
||||
|
||||
Savannah bug #14535: correctly support case-folding in locate (that
|
||||
is, "locate -i") for multibyte character environments such as UTF-8.
|
||||
Previously, if your search string contained a character which was
|
||||
outside the single-byte-encoding range for UTF-8 for example, then the
|
||||
case-folding behaviour failed to work and only exact matches would be
|
||||
returned.
|
||||
|
||||
** Functional changes
|
||||
|
||||
The -printf action (and similar related actions) now support %S,
|
||||
|
||||
@ -20,7 +20,7 @@ build-aux/texinfo.tex
|
||||
# Solaris which lack those functions.
|
||||
modules="\
|
||||
alloca argmatch dirname error fileblocks fnmatch-gnu fopen-safer fts \
|
||||
getline getopt human idcache lstat malloc memcmp memset mktime \
|
||||
getline getopt human idcache lstat malloc mbscasestr mbsstr memcmp memset mktime \
|
||||
modechange pathmax quotearg realloc regex rpmatch savedir \
|
||||
stpcpy strdup strftime strstr strtol strtoul strtoull strtoumax \
|
||||
xalloc xalloc-die xgetcwd xstrtod xstrtol xstrtoumax yesno human filemode \
|
||||
|
||||
@ -85,13 +85,8 @@
|
||||
|
||||
#define NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
|
||||
#include <string.h>
|
||||
#else
|
||||
#include <strings.h>
|
||||
#define strchr index
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
#include <stdlib.h>
|
||||
@ -143,7 +138,7 @@ extern int errno;
|
||||
#include "regextype.h"
|
||||
#include "gnulib-version.h"
|
||||
|
||||
/* Note that this evaluates C many times. */
|
||||
/* Note that this evaluates Ch many times. */
|
||||
#ifdef _LIBC
|
||||
# define TOUPPER(Ch) toupper (Ch)
|
||||
# define TOLOWER(Ch) tolower (Ch)
|
||||
@ -316,17 +311,6 @@ locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lc_strcpy(char *dest, const char *src)
|
||||
{
|
||||
while (*src)
|
||||
{
|
||||
*dest++ = TOLOWER(*src);
|
||||
++src;
|
||||
}
|
||||
*dest = 0;
|
||||
}
|
||||
|
||||
struct locate_limits
|
||||
{
|
||||
uintmax_t limit;
|
||||
@ -356,12 +340,6 @@ struct stringbuf
|
||||
static struct stringbuf casebuf;
|
||||
|
||||
|
||||
struct casefolder
|
||||
{
|
||||
const char *pattern;
|
||||
struct stringbuf *pbuf;
|
||||
};
|
||||
|
||||
struct regular_expression
|
||||
{
|
||||
struct re_pattern_buffer regex; /* for --regex */
|
||||
@ -646,21 +624,6 @@ visit_basename(struct process_data *procdata, void *context)
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
visit_casefold(struct process_data *procdata, void *context)
|
||||
{
|
||||
struct stringbuf *b = context;
|
||||
|
||||
if (*b->preqlen+1 > b->buffersize)
|
||||
{
|
||||
b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
|
||||
b->buffersize = *b->preqlen+1;
|
||||
}
|
||||
lc_strcpy(b->buffer, procdata->munged_filename);
|
||||
|
||||
return VISIT_CONTINUE;
|
||||
}
|
||||
|
||||
/* visit_existing_follow implements -L -e */
|
||||
static int
|
||||
visit_existing_follow(struct process_data *procdata, void *context)
|
||||
@ -754,7 +717,7 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context)
|
||||
{
|
||||
const char *pattern = context;
|
||||
|
||||
if (NULL != strstr(procdata->munged_filename, pattern))
|
||||
if (NULL != mbsstr(procdata->munged_filename, pattern))
|
||||
return VISIT_ACCEPTED;
|
||||
else
|
||||
return VISIT_REJECTED;
|
||||
@ -763,11 +726,9 @@ visit_substring_match_nocasefold(struct process_data *procdata, void *context)
|
||||
static int
|
||||
visit_substring_match_casefold(struct process_data *procdata, void *context)
|
||||
{
|
||||
const struct casefolder * p = context;
|
||||
const struct stringbuf * b = p->pbuf;
|
||||
(void) procdata;
|
||||
const char *pattern = context;
|
||||
|
||||
if (NULL != strstr(b->buffer, p->pattern))
|
||||
if (NULL != mbscasestr(procdata->munged_filename, pattern))
|
||||
return VISIT_ACCEPTED;
|
||||
else
|
||||
return VISIT_REJECTED;
|
||||
@ -1026,7 +987,6 @@ search_one_database (int argc,
|
||||
{
|
||||
char *pathpart; /* A pattern to consider. */
|
||||
int argn; /* Index to current pattern in argv. */
|
||||
int need_fold; /* Set when folding and any pattern is non-glob. */
|
||||
int nread; /* number of bytes read from an entry. */
|
||||
struct process_data procdata; /* Storage for data shared with visitors. */
|
||||
int slocate_seclevel;
|
||||
@ -1161,24 +1121,6 @@ search_one_database (int argc,
|
||||
if (basename_only)
|
||||
add_visitor(visit_basename, NULL);
|
||||
|
||||
/* See if we need fold. */
|
||||
if (ignore_case && !regex)
|
||||
for ( argn = 0; argn < argc; argn++ )
|
||||
{
|
||||
pathpart = argv[argn];
|
||||
if (!contains_metacharacter(pathpart))
|
||||
{
|
||||
need_fold = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_fold)
|
||||
{
|
||||
add_visitor(visit_casefold, &casebuf);
|
||||
casebuf.preqlen = &procdata.pathsize;
|
||||
}
|
||||
|
||||
/* Add an inspector for each pattern we're looking for. */
|
||||
for ( argn = 0; argn < argc; argn++ )
|
||||
{
|
||||
@ -1225,20 +1167,9 @@ search_one_database (int argc,
|
||||
* James Youngman <jay@gnu.org>
|
||||
*/
|
||||
if (ignore_case)
|
||||
{
|
||||
struct casefolder * cf = xmalloc(sizeof(*cf));
|
||||
cf->pattern = pathpart;
|
||||
cf->pbuf = &casebuf;
|
||||
add_visitor(visit_substring_match_casefold, cf);
|
||||
/* If we ignore case, convert it to lower now so we don't have to
|
||||
* do it every time
|
||||
*/
|
||||
lc_strcpy(pathpart, pathpart);
|
||||
}
|
||||
add_visitor(visit_substring_match_casefold, pathpart);
|
||||
else
|
||||
{
|
||||
add_visitor(visit_substring_match_nocasefold, pathpart);
|
||||
}
|
||||
add_visitor(visit_substring_match_nocasefold, pathpart);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1352,7 +1283,7 @@ Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
|
||||
[--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
|
||||
[-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
|
||||
[-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
|
||||
[--max-database-age D] [-version] [--help]\n\
|
||||
[--max-database-age D] [--version] [--help]\n\
|
||||
pattern...\n"),
|
||||
program_name);
|
||||
fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
|
||||
|
||||
61
locate/testsuite/locate.gnu/sv-bug-14535.exp
Normal file
61
locate/testsuite/locate.gnu/sv-bug-14535.exp
Normal file
@ -0,0 +1,61 @@
|
||||
# NOTE: this file contains UTF8 characters.
|
||||
|
||||
|
||||
proc selectlocale { pattern } {
|
||||
if [ catch { set locale_list [ split [ eval exec locale -a ] "\n" ] } ] {
|
||||
# Failed to figure out which tests are supported.
|
||||
return ""
|
||||
}
|
||||
foreach locale $locale_list {
|
||||
if { [string match $pattern $locale] } {
|
||||
global env
|
||||
set env(LC_ALL) $locale
|
||||
return $locale
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
proc select_any_locale { patternlist } {
|
||||
foreach pattern $patternlist {
|
||||
set locale [ selectlocale $pattern ]
|
||||
if { [ string length $locale ] } {
|
||||
return $locale
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
# localeoptions contains a list (in order of preference) of the
|
||||
# locales in which we want to perform part of this test. If we can
|
||||
# use any locale matching any of the patterns, we run an extra four
|
||||
# tests. Otherwise, we skip them and issue a warning message.
|
||||
set localeoptions {
|
||||
"hu_HU.UTF-8"
|
||||
"hu_*.UTF-8"
|
||||
"en_IE.utf8"
|
||||
"en_GB.utf8"
|
||||
"en_US.utf8"
|
||||
"en_*.utf8"
|
||||
"*.utf8"
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Do the regular case-fold tests which only need ASCII support.
|
||||
locate_textonly p a "teste\n" "-i teste" "teste\n"
|
||||
locate_textonly p b "testE\n" "-i testE" "testE\n"
|
||||
locate_textonly p c "testE\n" "-i teste" "testE\n"
|
||||
locate_textonly p d "teste\n" "-i testE" "teste\n"
|
||||
|
||||
set locale [ select_any_locale $localeoptions ]
|
||||
if { [ string length $locale ] } {
|
||||
# We have a UTF-8 locale. Do the extra tests.
|
||||
locate_textonly p 0 "testé\n" "-i testé" "testé\n"
|
||||
locate_textonly p 1 "testÉ\n" "-i testé" "testÉ\n"
|
||||
locate_textonly p 2 "testé\n" "-i testÉ" "testé\n"
|
||||
locate_textonly p 3 "testÉ\n" "-i testÉ" "testÉ\n"
|
||||
} else {
|
||||
warning "Four tests have been skipped because I cannot find a UTF-8 locale configured on your system"
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user