mirror of
https://https.git.savannah.gnu.org/git/findutils.git
synced 2026-01-26 15:39:06 +00:00
554 lines
14 KiB
C
554 lines
14 KiB
C
/* locate -- search databases for filenames that match patterns
|
|
Copyright (C) 1994 Free Software Foundation, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|
|
|
/* Usage: locate [options] pattern...
|
|
|
|
Scan a pathname list for the full pathname of a file, given only
|
|
a piece of the name (possibly containing shell globbing metacharacters).
|
|
The list has been processed with front-compression, which reduces
|
|
the list size by a factor of 4-5.
|
|
Recognizes two database formats, old and new. The old format is
|
|
bigram coded, which reduces space by a further 20-25% and uses the
|
|
following encoding of the database bytes:
|
|
|
|
0-28 likeliest differential counts + offset (14) to make nonnegative
|
|
30 escape code for out-of-range count to follow in next halfword
|
|
128-255 bigram codes (the 128 most common, as determined by `updatedb')
|
|
32-127 single character (printable) ASCII remainder
|
|
|
|
Uses a novel two-tiered string search technique:
|
|
|
|
First, match a metacharacter-free subpattern and a partial pathname
|
|
BACKWARDS to avoid full expansion of the pathname list.
|
|
The time savings is 40-50% over forward matching, which cannot efficiently
|
|
handle overlapped search patterns and compressed path remainders.
|
|
|
|
Then, match the actual shell glob-style regular expression (if in this form)
|
|
against the candidate pathnames using the slower shell filename
|
|
matching routines.
|
|
|
|
Described more fully in Usenix ;login:, Vol 8, No 1,
|
|
February/March, 1983, p. 8.
|
|
|
|
Written by James A. Woods <jwoods@adobe.com>.
|
|
Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
|
|
|
|
#define _GNU_SOURCE
|
|
#include <config.h>
|
|
#include <stdio.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <time.h>
|
|
#include <fnmatch.h>
|
|
#include <getopt.h>
|
|
|
|
#define NDEBUG
|
|
#include <assert.h>
|
|
|
|
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
|
|
#include <string.h>
|
|
#else
|
|
#include <strings.h>
|
|
#define strchr index
|
|
#endif
|
|
|
|
#ifdef STDC_HEADERS
|
|
#include <stdlib.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_ERRNO_H
|
|
#include <errno.h>
|
|
#else
|
|
extern int errno;
|
|
#endif
|
|
|
|
#ifdef HAVE_LOCALE_H
|
|
#include <locale.h>
|
|
#endif
|
|
|
|
#if ENABLE_NLS
|
|
# include <libintl.h>
|
|
# define _(Text) gettext (Text)
|
|
#else
|
|
# define _(Text) Text
|
|
#define textdomain(Domain)
|
|
#define bindtextdomain(Package, Directory)
|
|
#endif
|
|
#ifdef gettext_noop
|
|
# define N_(String) gettext_noop (String)
|
|
#else
|
|
# define N_(String) (String)
|
|
#endif
|
|
|
|
#include "locatedb.h"
|
|
#include <getline.h>
|
|
|
|
/* Note that this evaluates C many times. */
|
|
#ifdef _LIBC
|
|
# define TOUPPER(Ch) toupper (Ch)
|
|
# define TOLOWER(Ch) tolower (Ch)
|
|
#else
|
|
# define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
|
|
# define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
|
|
#endif
|
|
|
|
typedef enum {false, true} boolean;
|
|
|
|
/* Warn if a database is older than this. 8 days allows for a weekly
|
|
update that takes up to a day to perform. */
|
|
#define WARN_NUMBER_UNITS (8)
|
|
/* Printable name of units used in WARN_SECONDS */
|
|
static const char warn_name_units[] = N_("days");
|
|
#define SECONDS_PER_UNIT (60 * 60 * 24)
|
|
|
|
#define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
|
|
|
|
/* Check for existence of files before printing them out? */
|
|
int check_existence = 0;
|
|
|
|
char *next_element ();
|
|
char *xmalloc ();
|
|
char *xrealloc ();
|
|
void error ();
|
|
|
|
/* Read in a 16-bit int, high byte first (network byte order). */
|
|
|
|
static int
|
|
get_short (fp)
|
|
FILE *fp;
|
|
{
|
|
|
|
register short x;
|
|
|
|
x = fgetc (fp) << 8;
|
|
x |= (fgetc (fp) & 0xff);
|
|
return x;
|
|
}
|
|
|
|
/* Return a pointer to the last character in a static copy of the last
|
|
glob-free subpattern in NAME,
|
|
with '\0' prepended for a fast backwards pre-match. */
|
|
|
|
static char *
|
|
last_literal_end (name)
|
|
char *name;
|
|
{
|
|
static char *globfree = NULL; /* A copy of the subpattern in NAME. */
|
|
static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
|
|
register char *subp; /* Return value. */
|
|
register char *p; /* Search location in NAME. */
|
|
|
|
/* Find the end of the subpattern.
|
|
Skip trailing metacharacters and [] ranges. */
|
|
for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
|
|
p--)
|
|
{
|
|
if (*p == ']')
|
|
while (p >= name && *p != '[')
|
|
p--;
|
|
}
|
|
if (p < name)
|
|
p = name;
|
|
|
|
if (p - name + 3 > gfalloc)
|
|
{
|
|
gfalloc = p - name + 3 + 64; /* Room to grow. */
|
|
globfree = xrealloc (globfree, gfalloc);
|
|
}
|
|
subp = globfree;
|
|
*subp++ = '\0';
|
|
|
|
/* If the pattern has only metacharacters, make every path match the
|
|
subpattern, so it gets checked the slow way. */
|
|
if (p == name && strchr ("?*[]", *p) != NULL)
|
|
*subp++ = '/';
|
|
else
|
|
{
|
|
char *endmark;
|
|
/* Find the start of the metacharacter-free subpattern. */
|
|
for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
|
|
;
|
|
/* Copy the subpattern into globfree. */
|
|
for (++p; p <= endmark; )
|
|
*subp++ = *p++;
|
|
}
|
|
*subp-- = '\0'; /* Null terminate, though it's not needed. */
|
|
|
|
return subp;
|
|
}
|
|
|
|
/* getstr()
|
|
*
|
|
* Read bytes from FP into the buffer at offset OFFSET in (*BUF),
|
|
* until we reach DELIMITER or end-of-file. We reallocate the buffer
|
|
* as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
|
|
* is made regarding the content of the data (i.e. the implementation is
|
|
* 8-bit clean, the only delimiter is DELIMITER).
|
|
*
|
|
* Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
|
|
* has been removed from gnulib.
|
|
*
|
|
* We call the function locate_read_str() to avoid a name clash with the curses
|
|
* function getstr().
|
|
*/
|
|
static int locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
|
|
{
|
|
char * p = NULL;
|
|
size_t sz = 0;
|
|
int needed, nread;
|
|
|
|
nread = getdelim(&p, &sz, delimiter, fp);
|
|
if (nread >= 0)
|
|
{
|
|
assert(p != NULL);
|
|
|
|
needed = offs + nread;
|
|
if (needed > (*siz))
|
|
{
|
|
char *pnew = realloc(*buf, needed);
|
|
if (NULL == pnew)
|
|
{
|
|
return -1; /* FAIL */
|
|
}
|
|
else
|
|
{
|
|
*siz = needed;
|
|
*buf = pnew;
|
|
}
|
|
}
|
|
memcpy((*buf)+offs, p, nread);
|
|
free(p);
|
|
}
|
|
return nread;
|
|
}
|
|
|
|
|
|
/* Print the entries in DBFILE that match shell globbing pattern PATHPART.
|
|
Return the number of entries printed. */
|
|
|
|
static int
|
|
locate (pathpart, dbfile, ignore_case)
|
|
char *pathpart, *dbfile;
|
|
int ignore_case;
|
|
{
|
|
/* The pathname database. */
|
|
FILE *fp;
|
|
/* An input byte. */
|
|
int c;
|
|
/* Number of bytes read from an entry. */
|
|
int nread;
|
|
|
|
/* true if PATHPART contains globbing metacharacters. */
|
|
boolean globflag;
|
|
/* The end of the last glob-free subpattern in PATHPART. */
|
|
char *patend;
|
|
|
|
/* The current input database entry. */
|
|
char *path;
|
|
/* Amount allocated for it. */
|
|
size_t pathsize;
|
|
|
|
/* The length of the prefix shared with the previous database entry. */
|
|
int count = 0;
|
|
/* Where in `path' to stop the backward search for the last character
|
|
in the subpattern. Set according to `count'. */
|
|
char *cutoff;
|
|
|
|
/* true if we found a fast match (of patend) on the previous path. */
|
|
boolean prev_fast_match = false;
|
|
/* The return value. */
|
|
int printed = 0;
|
|
|
|
/* true if reading a bigram-encoded database. */
|
|
boolean old_format = false;
|
|
/* For the old database format,
|
|
the first and second characters of the most common bigrams. */
|
|
char bigram1[128], bigram2[128];
|
|
|
|
/* To check the age of the database. */
|
|
struct stat st;
|
|
time_t now;
|
|
|
|
if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
|
|
{
|
|
error (0, errno, "%s", dbfile);
|
|
return 0;
|
|
}
|
|
time(&now);
|
|
if (now - st.st_mtime > WARN_SECONDS)
|
|
{
|
|
/* For example:
|
|
warning: database `fred' is more than 8 days old */
|
|
error (0, 0, _("warning: database `%s' is more than %d %s old"),
|
|
dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
|
|
}
|
|
|
|
pathsize = 1026; /* Increased as necessary by locate_read_str. */
|
|
path = xmalloc (pathsize);
|
|
|
|
nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
|
|
if (nread != sizeof (LOCATEDB_MAGIC)
|
|
|| memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
|
|
{
|
|
int i;
|
|
/* Read the list of the most common bigrams in the database. */
|
|
fseek (fp, 0, 0);
|
|
for (i = 0; i < 128; i++)
|
|
{
|
|
bigram1[i] = getc (fp);
|
|
bigram2[i] = getc (fp);
|
|
}
|
|
old_format = true;
|
|
}
|
|
|
|
/* If we ignore case,
|
|
convert it to lower first so we don't have to do it every time */
|
|
if (ignore_case){
|
|
for (patend=pathpart;*patend;++patend){
|
|
*patend=TOLOWER(*patend);
|
|
}
|
|
}
|
|
|
|
|
|
globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
|
|
|| strchr (pathpart, '[');
|
|
|
|
patend = last_literal_end (pathpart);
|
|
|
|
c = getc (fp);
|
|
while (c != EOF)
|
|
{
|
|
register char *s; /* Scan the path we read in. */
|
|
|
|
if (old_format)
|
|
{
|
|
/* Get the offset in the path where this path info starts. */
|
|
if (c == LOCATEDB_OLD_ESCAPE)
|
|
count += getw (fp) - LOCATEDB_OLD_OFFSET;
|
|
else
|
|
count += c - LOCATEDB_OLD_OFFSET;
|
|
|
|
/* Overlay the old path with the remainder of the new. */
|
|
for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
|
|
if (c < 0200)
|
|
*s++ = c; /* An ordinary character. */
|
|
else
|
|
{
|
|
/* Bigram markers have the high bit set. */
|
|
c &= 0177;
|
|
*s++ = bigram1[c];
|
|
*s++ = bigram2[c];
|
|
}
|
|
*s-- = '\0';
|
|
}
|
|
else
|
|
{
|
|
if (c == LOCATEDB_ESCAPE)
|
|
count += get_short (fp);
|
|
else if (c > 127)
|
|
count += c - 256;
|
|
else
|
|
count += c;
|
|
|
|
/* Overlay the old path with the remainder of the new. */
|
|
nread = locate_read_str (&path, &pathsize, fp, 0, count);
|
|
if (nread < 0)
|
|
break;
|
|
c = getc (fp);
|
|
s = path + count + nread - 2; /* Move to the last char in path. */
|
|
assert (s[0] != '\0');
|
|
assert (s[1] == '\0'); /* Our terminator. */
|
|
assert (s[2] == '\0'); /* Added by locate_read_str. */
|
|
}
|
|
|
|
/* If the previous path matched, scan the whole path for the last
|
|
char in the subpattern. If not, the shared prefix doesn't match
|
|
the pattern, so don't scan it for the last char. */
|
|
cutoff = prev_fast_match ? path : path + count;
|
|
|
|
/* Search backward starting at the end of the path we just read in,
|
|
for the character at the end of the last glob-free subpattern
|
|
in PATHPART. */
|
|
if (ignore_case)
|
|
{
|
|
for (prev_fast_match = false; s >= cutoff; s--)
|
|
/* Fast first char check. */
|
|
if (TOLOWER(*s) == *patend)
|
|
{
|
|
char *s2; /* Scan the path we read in. */
|
|
register char *p2; /* Scan `patend'. */
|
|
|
|
for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && TOLOWER(*s2) == *p2;
|
|
s2--, p2--)
|
|
;
|
|
if (*p2 == '\0')
|
|
{
|
|
/* Success on the fast match. Compare the whole pattern
|
|
if it contains globbing characters. */
|
|
prev_fast_match = true;
|
|
if (globflag == false || fnmatch (pathpart, path, FNM_CASEFOLD) == 0)
|
|
{
|
|
if (!check_existence || stat(path, &st) == 0)
|
|
{
|
|
puts (path);
|
|
++printed;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
|
|
for (prev_fast_match = false; s >= cutoff; s--)
|
|
/* Fast first char check. */
|
|
if (*s == *patend)
|
|
{
|
|
char *s2; /* Scan the path we read in. */
|
|
register char *p2; /* Scan `patend'. */
|
|
|
|
for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
|
|
s2--, p2--)
|
|
;
|
|
if (*p2 == '\0')
|
|
{
|
|
/* Success on the fast match. Compare the whole pattern
|
|
if it contains globbing characters. */
|
|
prev_fast_match = true;
|
|
if (globflag == false || fnmatch (pathpart, path,
|
|
0) == 0)
|
|
{
|
|
if (!check_existence || stat(path, &st) == 0)
|
|
{
|
|
puts (path);
|
|
++printed;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (ferror (fp))
|
|
{
|
|
error (0, errno, "%s", dbfile);
|
|
return 0;
|
|
}
|
|
if (fclose (fp) == EOF)
|
|
{
|
|
error (0, errno, "%s", dbfile);
|
|
return 0;
|
|
}
|
|
|
|
return printed;
|
|
}
|
|
|
|
extern char *version_string;
|
|
|
|
/* The name this program was run with. */
|
|
char *program_name;
|
|
|
|
static void
|
|
usage (stream, status)
|
|
FILE *stream;
|
|
int status;
|
|
{
|
|
fprintf (stream, _("\
|
|
Usage: %s [-d path | --database=path] [-e | --existing]\n\
|
|
[-i | --ignore-case] [--version] [--help] pattern...\n"),
|
|
program_name);
|
|
fputs (_("\nReport bugs to <bug-findutils@gnu.org>."), stream);
|
|
exit (status);
|
|
}
|
|
|
|
static struct option const longopts[] =
|
|
{
|
|
{"database", required_argument, NULL, 'd'},
|
|
{"existing", no_argument, NULL, 'e'},
|
|
{"ignore-case", no_argument, NULL, 'i'},
|
|
{"help", no_argument, NULL, 'h'},
|
|
{"version", no_argument, NULL, 'v'},
|
|
{NULL, no_argument, NULL, 0}
|
|
};
|
|
|
|
int
|
|
main (argc, argv)
|
|
int argc;
|
|
char **argv;
|
|
{
|
|
char *dbpath;
|
|
int fnmatch_flags = 0;
|
|
int found = 0, optc;
|
|
int ignore_case = 0;
|
|
|
|
program_name = argv[0];
|
|
|
|
#ifdef HAVE_SETLOCALE
|
|
setlocale (LC_ALL, "");
|
|
#endif
|
|
bindtextdomain (PACKAGE, LOCALEDIR);
|
|
textdomain (PACKAGE);
|
|
|
|
dbpath = getenv ("LOCATE_PATH");
|
|
if (dbpath == NULL)
|
|
dbpath = LOCATE_DB;
|
|
|
|
check_existence = 0;
|
|
|
|
while ((optc = getopt_long (argc, argv, "d:ei", longopts, (int *) 0)) != -1)
|
|
switch (optc)
|
|
{
|
|
case 'd':
|
|
dbpath = optarg;
|
|
break;
|
|
|
|
case 'e':
|
|
check_existence = 1;
|
|
break;
|
|
|
|
case 'i':
|
|
ignore_case = 1;
|
|
fnmatch_flags |= FNM_CASEFOLD;
|
|
break;
|
|
|
|
case 'h':
|
|
usage (stdout, 0);
|
|
|
|
case 'v':
|
|
printf (_("GNU locate version %s\n"), version_string);
|
|
exit (0);
|
|
|
|
default:
|
|
usage (stderr, 1);
|
|
}
|
|
|
|
if (optind == argc)
|
|
usage (stderr, 1);
|
|
|
|
for (; optind < argc; optind++)
|
|
{
|
|
char *e;
|
|
next_element (dbpath); /* Initialize. */
|
|
while ((e = next_element ((char *) NULL)) != NULL)
|
|
found |= locate (argv[optind], e, ignore_case);
|
|
}
|
|
|
|
exit (!found);
|
|
}
|