mirror of
https://https.git.savannah.gnu.org/git/findutils.git
synced 2026-01-26 15:39:06 +00:00
1348 lines
32 KiB
C
1348 lines
32 KiB
C
/* locate -- search databases for filenames that match patterns
|
|
Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
|
|
2004, 2005 Free Software Foundation, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
USA.
|
|
*/
|
|
|
|
/* Usage: locate [options] pattern...
|
|
|
|
Scan a pathname list for the full pathname of a file, given only
|
|
a piece of the name (possibly containing shell globbing metacharacters).
|
|
The list has been processed with front-compression, which reduces
|
|
the list size by a factor of 4-5.
|
|
Recognizes two database formats, old and new. The old format is
|
|
bigram coded, which reduces space by a further 20-25% and uses the
|
|
following encoding of the database bytes:
|
|
|
|
0-28 likeliest differential counts + offset (14) to make nonnegative
|
|
30 escape code for out-of-range count to follow in next halfword
|
|
128-255 bigram codes (the 128 most common, as determined by `updatedb')
|
|
32-127 single character (printable) ASCII remainder
|
|
|
|
Earlier versions of GNU locate used to use a novel two-tiered
|
|
string search technique, which was described in Usenix ;login:, Vol
|
|
8, No 1, February/March, 1983, p. 8.
|
|
|
|
However, latterly code changes to provide additional functionality
|
|
became dificult to make with the existing reading scheme, and so
|
|
we no longer perform the matching as efficiently as we used to (that is,
|
|
we no longer use the same algorithm).
|
|
|
|
The old algorithm was:
|
|
|
|
First, match a metacharacter-free subpattern and a partial
|
|
pathname BACKWARDS to avoid full expansion of the pathname list.
|
|
The time savings is 40-50% over forward matching, which cannot
|
|
efficiently handle overlapped search patterns and compressed
|
|
path remainders.
|
|
|
|
Then, match the actual shell glob pattern (if in this form)
|
|
against the candidate pathnames using the slower shell filename
|
|
matching routines.
|
|
|
|
|
|
Written by James A. Woods <jwoods@adobe.com>.
|
|
Modified by David MacKenzie <djm@gnu.org>.
|
|
Additional work by James Youngman and Bas van Gompel.
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <time.h>
|
|
#include <fnmatch.h>
|
|
#include <getopt.h>
|
|
#include <xstrtol.h>
|
|
|
|
/* The presence of unistd.h is assumed by gnulib these days, so we
|
|
* might as well assume it too.
|
|
*/
|
|
/* We need <unistd.h> for isatty(). */
|
|
#include <unistd.h>
|
|
|
|
|
|
#define NDEBUG
|
|
#include <assert.h>
|
|
|
|
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
|
|
#include <string.h>
|
|
#else
|
|
#include <strings.h>
|
|
#define strchr index
|
|
#endif
|
|
|
|
#ifdef STDC_HEADERS
|
|
#include <stdlib.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_ERRNO_H
|
|
#include <errno.h>
|
|
#else
|
|
extern int errno;
|
|
#endif
|
|
|
|
#ifdef HAVE_LOCALE_H
|
|
#include <locale.h>
|
|
#endif
|
|
|
|
#if ENABLE_NLS
|
|
# include <libintl.h>
|
|
# define _(Text) gettext (Text)
|
|
#else
|
|
# define _(Text) Text
|
|
#define textdomain(Domain)
|
|
#define bindtextdomain(Package, Directory)
|
|
#endif
|
|
#ifdef gettext_noop
|
|
# define N_(String) gettext_noop (String)
|
|
#else
|
|
/* We used to use (String) instead of just String, but apparentl;y ISO C
|
|
* doesn't allow this (at least, that's what HP said when someone reported
|
|
* this as a compiler bug). This is HP case number 1205608192. See
|
|
* also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
|
|
* ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
|
|
* like: static const char buf[] = ("string");
|
|
*/
|
|
# define N_(String) String
|
|
#endif
|
|
|
|
#include "locatedb.h"
|
|
#include <getline.h>
|
|
#include "../gnulib/lib/xalloc.h"
|
|
#include "../gnulib/lib/error.h"
|
|
#include "../gnulib/lib/human.h"
|
|
#include "dirname.h"
|
|
#include "closeout.h"
|
|
#include "nextelem.h"
|
|
#include "regex.h"
|
|
#include "quote.h"
|
|
#include "quotearg.h"
|
|
#include "printquoted.h"
|
|
#include "regextype.h"
|
|
|
|
|
|
/* Note that this evaluates C many times. */
|
|
#ifdef _LIBC
|
|
# define TOUPPER(Ch) toupper (Ch)
|
|
# define TOLOWER(Ch) tolower (Ch)
|
|
#else
|
|
# define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
|
|
# define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
|
|
#endif
|
|
|
|
/* typedef enum {false, true} boolean; */
|
|
|
|
/* Warn if a database is older than this. 8 days allows for a weekly
|
|
update that takes up to a day to perform. */
|
|
#define WARN_NUMBER_UNITS (8)
|
|
/* Printable name of units used in WARN_SECONDS */
|
|
static const char warn_name_units[] = N_("days");
|
|
#define SECONDS_PER_UNIT (60 * 60 * 24)
|
|
|
|
#define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
|
|
|
|
enum visit_result
|
|
{
|
|
VISIT_CONTINUE = 1, /* please call the next visitor */
|
|
VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
|
|
VISIT_REJECTED = 4, /* rejected, process next file. */
|
|
VISIT_ABORT = 8 /* rejected, process no more files. */
|
|
};
|
|
|
|
enum ExistenceCheckType
|
|
{
|
|
ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
|
|
ACCEPT_EXISTING, /* Corresponds to option -e */
|
|
ACCEPT_NON_EXISTING /* Corresponds to option -E */
|
|
};
|
|
|
|
/* Check for existence of files before printing them out? */
|
|
enum ExistenceCheckType check_existence = ACCEPT_EITHER;
|
|
|
|
static int follow_symlinks = 1;
|
|
|
|
/* What to separate the results with. */
|
|
static int separator = '\n';
|
|
|
|
static struct quoting_options * quote_opts = NULL;
|
|
static bool stdout_is_a_tty;
|
|
static bool print_quoted_filename;
|
|
|
|
/* Read in a 16-bit int, high byte first (network byte order). */
|
|
|
|
static short
|
|
get_short (FILE *fp)
|
|
{
|
|
|
|
register short x;
|
|
|
|
x = (signed char) fgetc (fp) << 8;
|
|
x |= (fgetc (fp) & 0xff);
|
|
return x;
|
|
}
|
|
|
|
const char * const metacharacters = "*?[]\\";
|
|
|
|
/* Return nonzero if S contains any shell glob characters.
|
|
*/
|
|
static int
|
|
contains_metacharacter(const char *s)
|
|
{
|
|
if (NULL == strpbrk(s, metacharacters))
|
|
return 0;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
/* locate_read_str()
|
|
*
|
|
* Read bytes from FP into the buffer at offset OFFSET in (*BUF),
|
|
* until we reach DELIMITER or end-of-file. We reallocate the buffer
|
|
* as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
|
|
* is made regarding the content of the data (i.e. the implementation is
|
|
* 8-bit clean, the only delimiter is DELIMITER).
|
|
*
|
|
* Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
|
|
* has been removed from gnulib.
|
|
*
|
|
* We call the function locate_read_str() to avoid a name clash with the curses
|
|
* function getstr().
|
|
*/
|
|
static int
|
|
locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
|
|
{
|
|
char * p = NULL;
|
|
size_t sz = 0;
|
|
int needed, nread;
|
|
|
|
nread = getdelim(&p, &sz, delimiter, fp);
|
|
if (nread >= 0)
|
|
{
|
|
assert(p != NULL);
|
|
|
|
needed = offs + nread + 1;
|
|
if (needed > (*siz))
|
|
{
|
|
char *pnew = realloc(*buf, needed);
|
|
if (NULL == pnew)
|
|
{
|
|
return -1; /* FAIL */
|
|
}
|
|
else
|
|
{
|
|
*siz = needed;
|
|
*buf = pnew;
|
|
}
|
|
}
|
|
memcpy((*buf)+offs, p, nread);
|
|
free(p);
|
|
}
|
|
return nread;
|
|
}
|
|
|
|
|
|
static void
|
|
lc_strcpy(char *dest, const char *src)
|
|
{
|
|
while (*src)
|
|
{
|
|
*dest++ = TOLOWER(*src);
|
|
++src;
|
|
}
|
|
*dest = 0;
|
|
}
|
|
|
|
struct locate_limits
|
|
{
|
|
uintmax_t limit;
|
|
uintmax_t items_accepted;
|
|
};
|
|
static struct locate_limits limits;
|
|
|
|
|
|
struct locate_stats
|
|
{
|
|
uintmax_t compressed_bytes;
|
|
uintmax_t total_filename_count;
|
|
uintmax_t total_filename_length;
|
|
uintmax_t whitespace_count;
|
|
uintmax_t newline_count;
|
|
uintmax_t highbit_filename_count;
|
|
};
|
|
static struct locate_stats statistics;
|
|
|
|
|
|
struct stringbuf
|
|
{
|
|
char *buffer;
|
|
size_t buffersize;
|
|
size_t *soffs;
|
|
size_t *preqlen;
|
|
};
|
|
static struct stringbuf casebuf;
|
|
|
|
|
|
struct casefolder
|
|
{
|
|
const char *pattern;
|
|
struct stringbuf *pbuf;
|
|
};
|
|
|
|
struct regular_expression
|
|
{
|
|
struct re_pattern_buffer regex; /* for --regex */
|
|
};
|
|
|
|
|
|
struct process_data
|
|
{
|
|
int c; /* An input byte. */
|
|
int count; /* The length of the prefix shared with the previous database entry. */
|
|
int len;
|
|
char *original_filename; /* The current input database entry. */
|
|
size_t pathsize; /* Amount allocated for it. */
|
|
char *munged_filename; /* path or base_name(path) */
|
|
FILE *fp; /* The pathname database. */
|
|
char *dbfile; /* Its name, or "<stdin>" */
|
|
/* for the old database format,
|
|
the first and second characters of the most common bigrams. */
|
|
char bigram1[128];
|
|
char bigram2[128];
|
|
};
|
|
|
|
|
|
typedef int (*visitfunc)(struct process_data *procdata,
|
|
void *context);
|
|
|
|
struct visitor
|
|
{
|
|
visitfunc inspector;
|
|
void * context;
|
|
struct visitor *next;
|
|
};
|
|
|
|
|
|
static struct visitor *inspectors = NULL;
|
|
static struct visitor *lastinspector = NULL;
|
|
static struct visitor *past_pat_inspector = NULL;
|
|
|
|
/* 0 or 1 pattern(s) */
|
|
static int
|
|
process_simple(struct process_data *procdata)
|
|
{
|
|
int result = VISIT_CONTINUE;
|
|
const struct visitor *p = inspectors;
|
|
|
|
while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
|
|
{
|
|
result = (p->inspector)(procdata, p->context);
|
|
p = p->next;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/* Accept if any pattern matches. */
|
|
static int
|
|
process_or (struct process_data *procdata)
|
|
{
|
|
int result = VISIT_CONTINUE;
|
|
const struct visitor *p = inspectors;
|
|
|
|
while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
|
|
{
|
|
result = (p->inspector)(procdata, p->context);
|
|
p = p->next;
|
|
}
|
|
|
|
if (result == VISIT_CONTINUE)
|
|
result = VISIT_REJECTED;
|
|
if (result & (VISIT_ABORT | VISIT_REJECTED))
|
|
return result;
|
|
|
|
p = past_pat_inspector;
|
|
result = VISIT_CONTINUE;
|
|
|
|
while ( (VISIT_CONTINUE == result) && (NULL != p) )
|
|
{
|
|
result = (p->inspector)(procdata, p->context);
|
|
p = p->next;
|
|
}
|
|
|
|
if (VISIT_CONTINUE == result)
|
|
return VISIT_ACCEPTED;
|
|
else
|
|
return result;
|
|
}
|
|
|
|
/* Accept if all pattern match. */
|
|
static int
|
|
process_and (struct process_data *procdata)
|
|
{
|
|
int result = VISIT_CONTINUE;
|
|
const struct visitor *p = inspectors;
|
|
|
|
while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
|
|
{
|
|
result = (p->inspector)(procdata, p->context);
|
|
p = p->next;
|
|
}
|
|
|
|
if (result == VISIT_CONTINUE)
|
|
result = VISIT_REJECTED;
|
|
if (result & (VISIT_ABORT | VISIT_REJECTED))
|
|
return result;
|
|
|
|
p = past_pat_inspector;
|
|
result = VISIT_CONTINUE;
|
|
|
|
while ( (VISIT_CONTINUE == result) && (NULL != p) )
|
|
{
|
|
result = (p->inspector)(procdata, p->context);
|
|
p = p->next;
|
|
}
|
|
|
|
if (VISIT_CONTINUE == result)
|
|
return VISIT_ACCEPTED;
|
|
else
|
|
return result;
|
|
}
|
|
|
|
typedef int (*processfunc)(struct process_data *procdata);
|
|
|
|
static processfunc mainprocessor = NULL;
|
|
|
|
static void
|
|
add_visitor(visitfunc fn, void *context)
|
|
{
|
|
struct visitor *p = xmalloc(sizeof(struct visitor));
|
|
p->inspector = fn;
|
|
p->context = context;
|
|
p->next = NULL;
|
|
|
|
if (NULL == lastinspector)
|
|
{
|
|
lastinspector = inspectors = p;
|
|
}
|
|
else
|
|
{
|
|
lastinspector->next = p;
|
|
lastinspector = p;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
visit_justprint_quoted(struct process_data *procdata, void *context)
|
|
{
|
|
(void) context;
|
|
print_quoted (stdout, quote_opts, stdout_is_a_tty,
|
|
"%s",
|
|
procdata->original_filename);
|
|
putchar(separator);
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
static int
|
|
visit_justprint_unquoted(struct process_data *procdata, void *context)
|
|
{
|
|
(void) context;
|
|
fputs(procdata->original_filename, stdout);
|
|
putchar(separator);
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
static int
|
|
visit_old_format(struct process_data *procdata, void *context)
|
|
{
|
|
register char *s;
|
|
(void) context;
|
|
|
|
/* Get the offset in the path where this path info starts. */
|
|
if (procdata->c == LOCATEDB_OLD_ESCAPE)
|
|
procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
|
|
else
|
|
procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
|
|
|
|
/* Overlay the old path with the remainder of the new. */
|
|
for (s = procdata->original_filename + procdata->count;
|
|
(procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
|
|
if (procdata->c < 0200)
|
|
*s++ = procdata->c; /* An ordinary character. */
|
|
else
|
|
{
|
|
/* Bigram markers have the high bit set. */
|
|
procdata->c &= 0177;
|
|
*s++ = procdata->bigram1[procdata->c];
|
|
*s++ = procdata->bigram2[procdata->c];
|
|
}
|
|
*s-- = '\0';
|
|
|
|
procdata->munged_filename = procdata->original_filename;
|
|
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
|
|
static int
|
|
visit_locate02_format(struct process_data *procdata, void *context)
|
|
{
|
|
register char *s;
|
|
int nread;
|
|
(void) context;
|
|
|
|
if (procdata->c == LOCATEDB_ESCAPE)
|
|
procdata->count += (short)get_short (procdata->fp);
|
|
else if (procdata->c > 127)
|
|
procdata->count += procdata->c - 256;
|
|
else
|
|
procdata->count += procdata->c;
|
|
|
|
if (procdata->count > procdata->len || procdata->count < 0)
|
|
{
|
|
/* This should not happen generally , but since we're
|
|
* reading in data which is outside our control, we
|
|
* cannot prevent it.
|
|
*/
|
|
error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
|
|
}
|
|
|
|
/* Overlay the old path with the remainder of the new. */
|
|
nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
|
|
procdata->fp, 0, procdata->count);
|
|
if (nread < 0)
|
|
return VISIT_ABORT;
|
|
procdata->c = getc (procdata->fp);
|
|
procdata->len = procdata->count + nread;
|
|
s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
|
|
assert (s[0] != '\0');
|
|
assert (s[1] == '\0'); /* Our terminator. */
|
|
assert (s[2] == '\0'); /* Added by locate_read_str. */
|
|
|
|
procdata->munged_filename = procdata->original_filename;
|
|
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
static int
|
|
visit_basename(struct process_data *procdata, void *context)
|
|
{
|
|
(void) context;
|
|
procdata->munged_filename = base_name(procdata->original_filename);
|
|
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
|
|
static int
|
|
visit_casefold(struct process_data *procdata, void *context)
|
|
{
|
|
struct stringbuf *b = context;
|
|
|
|
if (*b->preqlen+1 > b->buffersize)
|
|
{
|
|
b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
|
|
b->buffersize = *b->preqlen+1;
|
|
}
|
|
lc_strcpy(b->buffer, procdata->munged_filename);
|
|
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
/* visit_existing_follow implements -L -e */
|
|
static int
|
|
visit_existing_follow(struct process_data *procdata, void *context)
|
|
{
|
|
struct stat st;
|
|
(void) context;
|
|
|
|
/* munged_filename has been converted in some way (to lower case,
|
|
* or is just the base name of the file), and original_filename has not.
|
|
* Hence only original_filename is still actually the name of the file
|
|
* whose existence we would need to check.
|
|
*/
|
|
if (stat(procdata->original_filename, &st) != 0)
|
|
{
|
|
return VISIT_REJECTED;
|
|
}
|
|
else
|
|
{
|
|
return VISIT_CONTINUE;
|
|
}
|
|
}
|
|
|
|
/* visit_non_existing_follow implements -L -E */
|
|
static int
|
|
visit_non_existing_follow(struct process_data *procdata, void *context)
|
|
{
|
|
struct stat st;
|
|
(void) context;
|
|
|
|
/* munged_filename has been converted in some way (to lower case,
|
|
* or is just the base name of the file), and original_filename has not.
|
|
* Hence only original_filename is still actually the name of the file
|
|
* whose existence we would need to check.
|
|
*/
|
|
if (stat(procdata->original_filename, &st) == 0)
|
|
{
|
|
return VISIT_REJECTED;
|
|
}
|
|
else
|
|
{
|
|
return VISIT_CONTINUE;
|
|
}
|
|
}
|
|
|
|
/* visit_existing_nofollow implements -P -e */
|
|
static int
|
|
visit_existing_nofollow(struct process_data *procdata, void *context)
|
|
{
|
|
struct stat st;
|
|
(void) context;
|
|
|
|
/* munged_filename has been converted in some way (to lower case,
|
|
* or is just the base name of the file), and original_filename has not.
|
|
* Hence only original_filename is still actually the name of the file
|
|
* whose existence we would need to check.
|
|
*/
|
|
if (lstat(procdata->original_filename, &st) != 0)
|
|
{
|
|
return VISIT_REJECTED;
|
|
}
|
|
else
|
|
{
|
|
return VISIT_CONTINUE;
|
|
}
|
|
}
|
|
|
|
/* visit_non_existing_nofollow implements -P -E */
|
|
static int
|
|
visit_non_existing_nofollow(struct process_data *procdata, void *context)
|
|
{
|
|
struct stat st;
|
|
(void) context;
|
|
|
|
/* munged_filename has been converted in some way (to lower case,
|
|
* or is just the base name of the file), and original_filename has not.
|
|
* Hence only original_filename is still actually the name of the file
|
|
* whose existence we would need to check.
|
|
*/
|
|
if (lstat(procdata->original_filename, &st) == 0)
|
|
{
|
|
return VISIT_REJECTED;
|
|
}
|
|
else
|
|
{
|
|
return VISIT_CONTINUE;
|
|
}
|
|
}
|
|
|
|
static int
|
|
visit_substring_match_nocasefold(struct process_data *procdata, void *context)
|
|
{
|
|
const char *pattern = context;
|
|
|
|
if (NULL != strstr(procdata->munged_filename, pattern))
|
|
return VISIT_ACCEPTED;
|
|
else
|
|
return VISIT_REJECTED;
|
|
}
|
|
|
|
static int
|
|
visit_substring_match_casefold(struct process_data *procdata, void *context)
|
|
{
|
|
const struct casefolder * p = context;
|
|
const struct stringbuf * b = p->pbuf;
|
|
(void) procdata;
|
|
|
|
if (NULL != strstr(b->buffer, p->pattern))
|
|
return VISIT_ACCEPTED;
|
|
else
|
|
return VISIT_REJECTED;
|
|
}
|
|
|
|
|
|
static int
|
|
visit_globmatch_nofold(struct process_data *procdata, void *context)
|
|
{
|
|
const char *glob = context;
|
|
if (fnmatch(glob, procdata->munged_filename, 0) != 0)
|
|
return VISIT_REJECTED;
|
|
else
|
|
return VISIT_ACCEPTED;
|
|
}
|
|
|
|
|
|
static int
|
|
visit_globmatch_casefold(struct process_data *procdata, void *context)
|
|
{
|
|
const char *glob = context;
|
|
if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
|
|
return VISIT_REJECTED;
|
|
else
|
|
return VISIT_ACCEPTED;
|
|
}
|
|
|
|
|
|
static int
|
|
visit_regex(struct process_data *procdata, void *context)
|
|
{
|
|
struct regular_expression *p = context;
|
|
const size_t len = strlen(procdata->munged_filename);
|
|
|
|
int rv = re_search (&p->regex, procdata->munged_filename,
|
|
len, 0, len,
|
|
(struct re_registers *) NULL);
|
|
if (rv < 0)
|
|
{
|
|
return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
|
|
}
|
|
else
|
|
{
|
|
return VISIT_ACCEPTED; /* match */
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
visit_stats(struct process_data *procdata, void *context)
|
|
{
|
|
struct locate_stats *p = context;
|
|
size_t len = strlen(procdata->original_filename);
|
|
const char *s;
|
|
int highbit, whitespace, newline;
|
|
|
|
++(p->total_filename_count);
|
|
p->total_filename_length += len;
|
|
|
|
highbit = whitespace = newline = 0;
|
|
for (s=procdata->original_filename; *s; ++s)
|
|
{
|
|
if ( (int)(*s) & 128 )
|
|
highbit = 1;
|
|
if ('\n' == *s)
|
|
{
|
|
newline = whitespace = 1;
|
|
}
|
|
else if (isspace((unsigned char)*s))
|
|
{
|
|
whitespace = 1;
|
|
}
|
|
}
|
|
|
|
if (highbit)
|
|
++(p->highbit_filename_count);
|
|
if (whitespace)
|
|
++(p->whitespace_count);
|
|
if (newline)
|
|
++(p->newline_count);
|
|
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
|
|
static int
|
|
visit_limit(struct process_data *procdata, void *context)
|
|
{
|
|
struct locate_limits *p = context;
|
|
|
|
(void) procdata;
|
|
|
|
if (++p->items_accepted >= p->limit)
|
|
return VISIT_ABORT;
|
|
else
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
static int
|
|
visit_count(struct process_data *procdata, void *context)
|
|
{
|
|
struct locate_limits *p = context;
|
|
|
|
(void) procdata;
|
|
|
|
++p->items_accepted;
|
|
return VISIT_CONTINUE;
|
|
}
|
|
|
|
/* Emit the statistics.
|
|
*/
|
|
static void
|
|
print_stats(int argc, size_t database_file_size)
|
|
{
|
|
char hbuf[LONGEST_HUMAN_READABLE + 1];
|
|
|
|
printf(_("Locate database size: %s bytes\n"),
|
|
human_readable ((uintmax_t) database_file_size,
|
|
hbuf, human_ceiling, 1, 1));
|
|
|
|
printf(_("Filenames: %s "),
|
|
human_readable (statistics.total_filename_count,
|
|
hbuf, human_ceiling, 1, 1));
|
|
printf(_("with a cumulative length of %s bytes"),
|
|
human_readable (statistics.total_filename_length,
|
|
hbuf, human_ceiling, 1, 1));
|
|
|
|
printf(_("\n\tof which %s contain whitespace, "),
|
|
human_readable (statistics.whitespace_count,
|
|
hbuf, human_ceiling, 1, 1));
|
|
printf(_("\n\t%s contain newline characters, "),
|
|
human_readable (statistics.newline_count,
|
|
hbuf, human_ceiling, 1, 1));
|
|
printf(_("\n\tand %s contain characters with the high bit set.\n"),
|
|
human_readable (statistics.highbit_filename_count,
|
|
hbuf, human_ceiling, 1, 1));
|
|
|
|
if (!argc)
|
|
printf(_("Compression ratio %4.2f%%\n"),
|
|
100.0 * ((double)statistics.total_filename_length
|
|
- (double) database_file_size)
|
|
/ (double) statistics.total_filename_length);
|
|
printf("\n");
|
|
}
|
|
|
|
|
|
/* Print or count the entries in DBFILE that match shell globbing patterns in
|
|
ARGV. Return the number of entries matched. */
|
|
|
|
static unsigned long
|
|
locate (int argc,
|
|
char **argv,
|
|
char *dbfile,
|
|
int ignore_case,
|
|
int enable_print,
|
|
int basename_only,
|
|
int use_limit,
|
|
struct locate_limits *plimit,
|
|
int stats,
|
|
int op_and,
|
|
int regex,
|
|
int regex_options)
|
|
{
|
|
char *pathpart; /* A pattern to consider. */
|
|
int argn; /* Index to current pattern in argv. */
|
|
int need_fold; /* Set when folding and any pattern is non-glob. */
|
|
int nread; /* number of bytes read from an entry. */
|
|
struct process_data procdata; /* Storage for data shared with visitors. */
|
|
|
|
int old_format = 0; /* true if reading a bigram-encoded database. */
|
|
static bool did_stdin = false; /* Set to prevent rereading stdin. */
|
|
struct visitor* pvis; /* temp for determining past_pat_inspector. */
|
|
|
|
/* To check the age of the database. */
|
|
struct stat st;
|
|
time_t now;
|
|
|
|
|
|
if (ignore_case)
|
|
regex_options |= RE_ICASE;
|
|
|
|
procdata.len = procdata.count = 0;
|
|
if (!strcmp (dbfile, "-"))
|
|
{
|
|
if (did_stdin)
|
|
{
|
|
error (0, 0, _("warning: the locate database can only be read from stdin once."));
|
|
return 0;
|
|
}
|
|
|
|
|
|
procdata.dbfile = "<stdin>";
|
|
procdata.fp = stdin;
|
|
did_stdin = true;
|
|
}
|
|
else
|
|
{
|
|
if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
|
|
{
|
|
error (0, errno, "%s", dbfile);
|
|
return 0;
|
|
}
|
|
time(&now);
|
|
if (now - st.st_mtime > WARN_SECONDS)
|
|
{
|
|
/* For example:
|
|
warning: database `fred' is more than 8 days old */
|
|
error (0, 0, _("warning: database `%s' is more than %d %s old"),
|
|
dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
|
|
}
|
|
procdata.dbfile = dbfile;
|
|
}
|
|
|
|
procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
|
|
procdata.original_filename = xmalloc (procdata.pathsize);
|
|
|
|
nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
|
|
procdata.fp);
|
|
if (nread != sizeof (LOCATEDB_MAGIC)
|
|
|| memcmp (procdata.original_filename, LOCATEDB_MAGIC,
|
|
sizeof (LOCATEDB_MAGIC)))
|
|
{
|
|
int i;
|
|
/* Read the list of the most common bigrams in the database. */
|
|
nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
|
|
256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
|
|
for (i = 0; i < 128; i++)
|
|
{
|
|
procdata.bigram1[i] = procdata.original_filename[i << 1];
|
|
procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
|
|
}
|
|
old_format = 1;
|
|
}
|
|
|
|
/* Set up the inspection regime */
|
|
inspectors = NULL;
|
|
lastinspector = NULL;
|
|
past_pat_inspector = NULL;
|
|
|
|
if (old_format)
|
|
add_visitor(visit_old_format, NULL);
|
|
else
|
|
add_visitor(visit_locate02_format, NULL);
|
|
|
|
if (basename_only)
|
|
add_visitor(visit_basename, NULL);
|
|
|
|
/* See if we need fold. */
|
|
if (ignore_case && !regex)
|
|
for ( argn = 0; argn < argc; argn++ )
|
|
{
|
|
pathpart = argv[argn];
|
|
if (!contains_metacharacter(pathpart))
|
|
{
|
|
need_fold = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (need_fold)
|
|
{
|
|
add_visitor(visit_casefold, &casebuf);
|
|
casebuf.preqlen = &procdata.pathsize;
|
|
casebuf.soffs = &procdata.count;
|
|
}
|
|
|
|
/* Add an inspector for each pattern we're looking for. */
|
|
for ( argn = 0; argn < argc; argn++ )
|
|
{
|
|
pathpart = argv[argn];
|
|
if (regex)
|
|
{
|
|
struct regular_expression *p = xmalloc(sizeof(*p));
|
|
const char *error_message = NULL;
|
|
|
|
memset (&p->regex, 0, sizeof (p->regex));
|
|
|
|
re_set_syntax(regex_options);
|
|
p->regex.allocated = 100;
|
|
p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
|
|
p->regex.fastmap = NULL;
|
|
p->regex.syntax = regex_options;
|
|
p->regex.translate = NULL;
|
|
|
|
error_message = re_compile_pattern (pathpart, strlen (pathpart),
|
|
&p->regex);
|
|
if (error_message)
|
|
{
|
|
error (1, 0, "%s", error_message);
|
|
}
|
|
else
|
|
{
|
|
add_visitor(visit_regex, p);
|
|
}
|
|
}
|
|
else if (contains_metacharacter(pathpart))
|
|
{
|
|
if (ignore_case)
|
|
add_visitor(visit_globmatch_casefold, pathpart);
|
|
else
|
|
add_visitor(visit_globmatch_nofold, pathpart);
|
|
}
|
|
else
|
|
{
|
|
/* No glob characters used. Hence we match on
|
|
* _any part_ of the filename, not just the
|
|
* basename. This seems odd to me, but it is the
|
|
* traditional behaviour.
|
|
* James Youngman <jay@gnu.org>
|
|
*/
|
|
if (ignore_case)
|
|
{
|
|
struct casefolder * cf = xmalloc(sizeof(*cf));
|
|
cf->pattern = pathpart;
|
|
cf->pbuf = &casebuf;
|
|
add_visitor(visit_substring_match_casefold, cf);
|
|
/* If we ignore case, convert it to lower now so we don't have to
|
|
* do it every time
|
|
*/
|
|
lc_strcpy(pathpart, pathpart);
|
|
}
|
|
else
|
|
{
|
|
add_visitor(visit_substring_match_nocasefold, pathpart);
|
|
}
|
|
}
|
|
}
|
|
|
|
pvis = lastinspector;
|
|
|
|
/* We add visit_existing_*() as late as possible to reduce the
|
|
* number of stat() calls.
|
|
*/
|
|
switch (check_existence)
|
|
{
|
|
case ACCEPT_EXISTING:
|
|
if (follow_symlinks) /* -L, default */
|
|
add_visitor(visit_existing_follow, NULL);
|
|
else /* -P */
|
|
add_visitor(visit_existing_nofollow, NULL);
|
|
break;
|
|
|
|
case ACCEPT_NON_EXISTING:
|
|
if (follow_symlinks) /* -L, default */
|
|
add_visitor(visit_non_existing_follow, NULL);
|
|
else /* -P */
|
|
add_visitor(visit_non_existing_nofollow, NULL);
|
|
break;
|
|
|
|
case ACCEPT_EITHER: /* Default, neither -E nor -e */
|
|
/* do nothing; no extra processing. */
|
|
break;
|
|
}
|
|
|
|
if (stats)
|
|
add_visitor(visit_stats, &statistics);
|
|
|
|
if (enable_print)
|
|
{
|
|
if (print_quoted_filename)
|
|
add_visitor(visit_justprint_quoted, NULL);
|
|
else
|
|
add_visitor(visit_justprint_unquoted, NULL);
|
|
}
|
|
|
|
|
|
if (use_limit)
|
|
add_visitor(visit_limit, plimit);
|
|
else
|
|
add_visitor(visit_count, plimit);
|
|
|
|
|
|
if (argc > 1)
|
|
{
|
|
past_pat_inspector = pvis->next;
|
|
if (op_and)
|
|
mainprocessor = process_and;
|
|
else
|
|
mainprocessor = process_or;
|
|
}
|
|
else
|
|
mainprocessor = process_simple;
|
|
|
|
if (stats)
|
|
{
|
|
printf(_("Database %s is in the %s format.\n"),
|
|
procdata.dbfile,
|
|
old_format ? _("old") : "LOCATE02");
|
|
}
|
|
|
|
|
|
procdata.c = getc (procdata.fp);
|
|
/* If we are searching for filename patterns, the inspector list
|
|
* will contain an entry for each pattern for which we are searching.
|
|
*/
|
|
while ( (procdata.c != EOF) &&
|
|
(VISIT_ABORT != (mainprocessor)(&procdata)) )
|
|
{
|
|
/* Do nothing; all the work is done in the visitor functions. */
|
|
}
|
|
|
|
if (stats)
|
|
{
|
|
print_stats(argc, st.st_size);
|
|
}
|
|
|
|
if (ferror (procdata.fp))
|
|
{
|
|
error (0, errno, "%s", procdata.dbfile);
|
|
return 0;
|
|
}
|
|
if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
|
|
{
|
|
error (0, errno, "%s", dbfile);
|
|
return 0;
|
|
}
|
|
|
|
return plimit->items_accepted;
|
|
}
|
|
|
|
|
|
|
|
|
|
extern char *version_string;
|
|
|
|
/* The name this program was run with. */
|
|
char *program_name;
|
|
|
|
static void
|
|
usage (FILE *stream)
|
|
{
|
|
fprintf (stream, _("\
|
|
Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
|
|
[-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
|
|
[--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
|
|
[-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
|
|
[-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
|
|
[-version] [--help]\n\
|
|
pattern...\n"),
|
|
program_name);
|
|
fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
|
|
}
|
|
enum
|
|
{
|
|
REGEXTYPE_OPTION = CHAR_MAX + 1
|
|
};
|
|
|
|
|
|
static struct option const longopts[] =
|
|
{
|
|
{"database", required_argument, NULL, 'd'},
|
|
{"existing", no_argument, NULL, 'e'},
|
|
{"non-existing", no_argument, NULL, 'E'},
|
|
{"ignore-case", no_argument, NULL, 'i'},
|
|
{"all", no_argument, NULL, 'A'},
|
|
{"help", no_argument, NULL, 'h'},
|
|
{"version", no_argument, NULL, 'v'},
|
|
{"null", no_argument, NULL, '0'},
|
|
{"count", no_argument, NULL, 'c'},
|
|
{"wholename", no_argument, NULL, 'w'},
|
|
{"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
|
|
{"basename", no_argument, NULL, 'b'},
|
|
{"print", no_argument, NULL, 'p'},
|
|
{"stdio", no_argument, NULL, 's'},
|
|
{"mmap", no_argument, NULL, 'm'},
|
|
{"limit", required_argument, NULL, 'l'},
|
|
{"regex", no_argument, NULL, 'r'},
|
|
{"regextype", required_argument, NULL, REGEXTYPE_OPTION},
|
|
{"statistics", no_argument, NULL, 'S'},
|
|
{"follow", no_argument, NULL, 'L'},
|
|
{"nofollow", no_argument, NULL, 'P'},
|
|
{NULL, no_argument, NULL, 0}
|
|
};
|
|
|
|
int
|
|
main (int argc, char **argv)
|
|
{
|
|
char *dbpath;
|
|
unsigned long int found = 0uL;
|
|
int optc;
|
|
int ignore_case = 0;
|
|
int print = 0;
|
|
int just_count = 0;
|
|
int basename_only = 0;
|
|
int use_limit = 0;
|
|
int regex = 0;
|
|
int regex_options = RE_SYNTAX_EMACS;
|
|
int stats = 0;
|
|
int op_and = 0;
|
|
char *e;
|
|
|
|
program_name = argv[0];
|
|
|
|
#ifdef HAVE_SETLOCALE
|
|
setlocale (LC_ALL, "");
|
|
#endif
|
|
bindtextdomain (PACKAGE, LOCALEDIR);
|
|
textdomain (PACKAGE);
|
|
atexit (close_stdout);
|
|
|
|
limits.limit = 0;
|
|
limits.items_accepted = 0;
|
|
|
|
quote_opts = clone_quoting_options (NULL);
|
|
print_quoted_filename = true;
|
|
|
|
dbpath = getenv ("LOCATE_PATH");
|
|
if (dbpath == NULL)
|
|
dbpath = LOCATE_DB;
|
|
|
|
check_existence = ACCEPT_EITHER;
|
|
|
|
while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
|
|
switch (optc)
|
|
{
|
|
case '0':
|
|
separator = 0;
|
|
print_quoted_filename = false; /* print filename 'raw'. */
|
|
break;
|
|
|
|
case 'A':
|
|
op_and = 1;
|
|
break;
|
|
|
|
case 'b':
|
|
basename_only = 1;
|
|
break;
|
|
|
|
case 'c':
|
|
just_count = 1;
|
|
break;
|
|
|
|
case 'd':
|
|
dbpath = optarg;
|
|
break;
|
|
|
|
case 'e':
|
|
check_existence = ACCEPT_EXISTING;
|
|
break;
|
|
|
|
case 'E':
|
|
check_existence = ACCEPT_NON_EXISTING;
|
|
break;
|
|
|
|
case 'i':
|
|
ignore_case = 1;
|
|
break;
|
|
|
|
case 'h':
|
|
usage (stdout);
|
|
return 0;
|
|
|
|
case 'p':
|
|
print = 1;
|
|
break;
|
|
|
|
case 'v':
|
|
printf (_("GNU locate version %s\n"), version_string);
|
|
return 0;
|
|
|
|
case 'w':
|
|
basename_only = 0;
|
|
break;
|
|
|
|
case 'r':
|
|
regex = 1;
|
|
break;
|
|
|
|
case REGEXTYPE_OPTION:
|
|
regex_options = get_regex_type(optarg);
|
|
break;
|
|
|
|
case 'S':
|
|
stats = 1;
|
|
break;
|
|
|
|
case 'L':
|
|
follow_symlinks = 1;
|
|
break;
|
|
|
|
/* In find, -P and -H differ in the way they handle paths
|
|
* given on the command line. This is not relevant for
|
|
* locate, but the -H option is supported because it is
|
|
* probably more intuitive to do so.
|
|
*/
|
|
case 'P':
|
|
case 'H':
|
|
follow_symlinks = 0;
|
|
break;
|
|
|
|
case 'l':
|
|
{
|
|
char *end = optarg;
|
|
strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
|
|
if (LONGINT_OK != err)
|
|
{
|
|
STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
|
|
}
|
|
use_limit = 1;
|
|
}
|
|
break;
|
|
|
|
case 's': /* use stdio */
|
|
case 'm': /* use mmap */
|
|
/* These options are implemented simply for
|
|
* compatibility with FreeBSD
|
|
*/
|
|
break;
|
|
|
|
default:
|
|
usage (stderr);
|
|
return 1;
|
|
}
|
|
|
|
if (!just_count && !stats)
|
|
print = 1;
|
|
|
|
if (stats)
|
|
{
|
|
if (optind == argc)
|
|
use_limit = 0;
|
|
}
|
|
else
|
|
{
|
|
if (!just_count && optind == argc)
|
|
{
|
|
usage (stderr);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
|
|
if (1 == isatty(STDOUT_FILENO))
|
|
stdout_is_a_tty = true;
|
|
else
|
|
stdout_is_a_tty = false;
|
|
|
|
next_element (dbpath, 0); /* Initialize. */
|
|
|
|
/* Bail out early if limit already reached. */
|
|
while ((e = next_element ((char *) NULL, 0)) != NULL &&
|
|
(!use_limit || limits.limit > limits.items_accepted))
|
|
{
|
|
statistics.compressed_bytes =
|
|
statistics.total_filename_count =
|
|
statistics.total_filename_length =
|
|
statistics.whitespace_count =
|
|
statistics.newline_count =
|
|
statistics.highbit_filename_count = 0u;
|
|
|
|
if (0 == strlen(e) || 0 == strcmp(e, "."))
|
|
{
|
|
/* Use the default database name instead (note: we
|
|
* don't use 'dbpath' since that might itself contain a
|
|
* colon-separated list.
|
|
*/
|
|
e = LOCATE_DB;
|
|
}
|
|
|
|
found = locate (argc - optind, &argv[optind], e, ignore_case, print, basename_only, use_limit, &limits, stats, op_and, regex, regex_options);
|
|
}
|
|
|
|
if (just_count)
|
|
{
|
|
printf("%ld\n", found);
|
|
}
|
|
|
|
if (found || (use_limit && (limits.limit==0)) || stats )
|
|
return 0;
|
|
else
|
|
return 1;
|
|
}
|