findutils/locate/locate.c
2003-05-24 16:56:58 +00:00

554 lines
14 KiB
C

/* locate -- search databases for filenames that match patterns
Copyright (C) 1994 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Usage: locate [options] pattern...
Scan a pathname list for the full pathname of a file, given only
a piece of the name (possibly containing shell globbing metacharacters).
The list has been processed with front-compression, which reduces
the list size by a factor of 4-5.
Recognizes two database formats, old and new. The old format is
bigram coded, which reduces space by a further 20-25% and uses the
following encoding of the database bytes:
0-28 likeliest differential counts + offset (14) to make nonnegative
30 escape code for out-of-range count to follow in next halfword
128-255 bigram codes (the 128 most common, as determined by `updatedb')
32-127 single character (printable) ASCII remainder
Uses a novel two-tiered string search technique:
First, match a metacharacter-free subpattern and a partial pathname
BACKWARDS to avoid full expansion of the pathname list.
The time savings is 40-50% over forward matching, which cannot efficiently
handle overlapped search patterns and compressed path remainders.
Then, match the actual shell glob-style regular expression (if in this form)
against the candidate pathnames using the slower shell filename
matching routines.
Described more fully in Usenix ;login:, Vol 8, No 1,
February/March, 1983, p. 8.
Written by James A. Woods <jwoods@adobe.com>.
Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
#define _GNU_SOURCE
#include <config.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#include <fnmatch.h>
#include <getopt.h>
#define NDEBUG
#include <assert.h>
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
#else
#include <strings.h>
#define strchr index
#endif
#ifdef STDC_HEADERS
#include <stdlib.h>
#endif
#ifdef HAVE_ERRNO_H
#include <errno.h>
#else
extern int errno;
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#if ENABLE_NLS
# include <libintl.h>
# define _(Text) gettext (Text)
#else
# define _(Text) Text
#define textdomain(Domain)
#define bindtextdomain(Package, Directory)
#endif
#ifdef gettext_noop
# define N_(String) gettext_noop (String)
#else
# define N_(String) (String)
#endif
#include "locatedb.h"
#include <getline.h>
/* Note that this evaluates C many times. */
#ifdef _LIBC
# define TOUPPER(Ch) toupper (Ch)
# define TOLOWER(Ch) tolower (Ch)
#else
# define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
# define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
#endif
typedef enum {false, true} boolean;
/* Warn if a database is older than this. 8 days allows for a weekly
update that takes up to a day to perform. */
#define WARN_NUMBER_UNITS (8)
/* Printable name of units used in WARN_SECONDS */
static const char warn_name_units[] = N_("days");
#define SECONDS_PER_UNIT (60 * 60 * 24)
#define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
/* Check for existence of files before printing them out? */
int check_existence = 0;
char *next_element ();
char *xmalloc ();
char *xrealloc ();
void error ();
/* Read in a 16-bit int, high byte first (network byte order). */
static int
get_short (fp)
FILE *fp;
{
register short x;
x = fgetc (fp) << 8;
x |= (fgetc (fp) & 0xff);
return x;
}
/* Return a pointer to the last character in a static copy of the last
glob-free subpattern in NAME,
with '\0' prepended for a fast backwards pre-match. */
static char *
last_literal_end (name)
char *name;
{
static char *globfree = NULL; /* A copy of the subpattern in NAME. */
static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
register char *subp; /* Return value. */
register char *p; /* Search location in NAME. */
/* Find the end of the subpattern.
Skip trailing metacharacters and [] ranges. */
for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
p--)
{
if (*p == ']')
while (p >= name && *p != '[')
p--;
}
if (p < name)
p = name;
if (p - name + 3 > gfalloc)
{
gfalloc = p - name + 3 + 64; /* Room to grow. */
globfree = xrealloc (globfree, gfalloc);
}
subp = globfree;
*subp++ = '\0';
/* If the pattern has only metacharacters, make every path match the
subpattern, so it gets checked the slow way. */
if (p == name && strchr ("?*[]", *p) != NULL)
*subp++ = '/';
else
{
char *endmark;
/* Find the start of the metacharacter-free subpattern. */
for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
;
/* Copy the subpattern into globfree. */
for (++p; p <= endmark; )
*subp++ = *p++;
}
*subp-- = '\0'; /* Null terminate, though it's not needed. */
return subp;
}
/* getstr()
*
* Read bytes from FP into the buffer at offset OFFSET in (*BUF),
* until we reach DELIMITER or end-of-file. We reallocate the buffer
* as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
* is made regarding the content of the data (i.e. the implementation is
* 8-bit clean, the only delimiter is DELIMITER).
*
* Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
* has been removed from gnulib.
*
* We call the function locate_read_str() to avoid a name clash with the curses
* function getstr().
*/
static int locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
{
char * p = NULL;
size_t sz = 0;
int needed, nread;
nread = getdelim(&p, &sz, delimiter, fp);
if (nread >= 0)
{
assert(p != NULL);
needed = offs + nread;
if (needed > (*siz))
{
char *pnew = realloc(*buf, needed);
if (NULL == pnew)
{
return -1; /* FAIL */
}
else
{
*siz = needed;
*buf = pnew;
}
}
memcpy((*buf)+offs, p, nread);
free(p);
}
return nread;
}
/* Print the entries in DBFILE that match shell globbing pattern PATHPART.
Return the number of entries printed. */
static int
locate (pathpart, dbfile, ignore_case)
char *pathpart, *dbfile;
int ignore_case;
{
/* The pathname database. */
FILE *fp;
/* An input byte. */
int c;
/* Number of bytes read from an entry. */
int nread;
/* true if PATHPART contains globbing metacharacters. */
boolean globflag;
/* The end of the last glob-free subpattern in PATHPART. */
char *patend;
/* The current input database entry. */
char *path;
/* Amount allocated for it. */
size_t pathsize;
/* The length of the prefix shared with the previous database entry. */
int count = 0;
/* Where in `path' to stop the backward search for the last character
in the subpattern. Set according to `count'. */
char *cutoff;
/* true if we found a fast match (of patend) on the previous path. */
boolean prev_fast_match = false;
/* The return value. */
int printed = 0;
/* true if reading a bigram-encoded database. */
boolean old_format = false;
/* For the old database format,
the first and second characters of the most common bigrams. */
char bigram1[128], bigram2[128];
/* To check the age of the database. */
struct stat st;
time_t now;
if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
{
error (0, errno, "%s", dbfile);
return 0;
}
time(&now);
if (now - st.st_mtime > WARN_SECONDS)
{
/* For example:
warning: database `fred' is more than 8 days old */
error (0, 0, _("warning: database `%s' is more than %d %s old"),
dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
}
pathsize = 1026; /* Increased as necessary by locate_read_str. */
path = xmalloc (pathsize);
nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
if (nread != sizeof (LOCATEDB_MAGIC)
|| memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
{
int i;
/* Read the list of the most common bigrams in the database. */
fseek (fp, 0, 0);
for (i = 0; i < 128; i++)
{
bigram1[i] = getc (fp);
bigram2[i] = getc (fp);
}
old_format = true;
}
/* If we ignore case,
convert it to lower first so we don't have to do it every time */
if (ignore_case){
for (patend=pathpart;*patend;++patend){
*patend=TOLOWER(*patend);
}
}
globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
|| strchr (pathpart, '[');
patend = last_literal_end (pathpart);
c = getc (fp);
while (c != EOF)
{
register char *s; /* Scan the path we read in. */
if (old_format)
{
/* Get the offset in the path where this path info starts. */
if (c == LOCATEDB_OLD_ESCAPE)
count += getw (fp) - LOCATEDB_OLD_OFFSET;
else
count += c - LOCATEDB_OLD_OFFSET;
/* Overlay the old path with the remainder of the new. */
for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
if (c < 0200)
*s++ = c; /* An ordinary character. */
else
{
/* Bigram markers have the high bit set. */
c &= 0177;
*s++ = bigram1[c];
*s++ = bigram2[c];
}
*s-- = '\0';
}
else
{
if (c == LOCATEDB_ESCAPE)
count += get_short (fp);
else if (c > 127)
count += c - 256;
else
count += c;
/* Overlay the old path with the remainder of the new. */
nread = locate_read_str (&path, &pathsize, fp, 0, count);
if (nread < 0)
break;
c = getc (fp);
s = path + count + nread - 2; /* Move to the last char in path. */
assert (s[0] != '\0');
assert (s[1] == '\0'); /* Our terminator. */
assert (s[2] == '\0'); /* Added by locate_read_str. */
}
/* If the previous path matched, scan the whole path for the last
char in the subpattern. If not, the shared prefix doesn't match
the pattern, so don't scan it for the last char. */
cutoff = prev_fast_match ? path : path + count;
/* Search backward starting at the end of the path we just read in,
for the character at the end of the last glob-free subpattern
in PATHPART. */
if (ignore_case)
{
for (prev_fast_match = false; s >= cutoff; s--)
/* Fast first char check. */
if (TOLOWER(*s) == *patend)
{
char *s2; /* Scan the path we read in. */
register char *p2; /* Scan `patend'. */
for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && TOLOWER(*s2) == *p2;
s2--, p2--)
;
if (*p2 == '\0')
{
/* Success on the fast match. Compare the whole pattern
if it contains globbing characters. */
prev_fast_match = true;
if (globflag == false || fnmatch (pathpart, path, FNM_CASEFOLD) == 0)
{
if (!check_existence || stat(path, &st) == 0)
{
puts (path);
++printed;
}
}
break;
}
}
}
else {
for (prev_fast_match = false; s >= cutoff; s--)
/* Fast first char check. */
if (*s == *patend)
{
char *s2; /* Scan the path we read in. */
register char *p2; /* Scan `patend'. */
for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
s2--, p2--)
;
if (*p2 == '\0')
{
/* Success on the fast match. Compare the whole pattern
if it contains globbing characters. */
prev_fast_match = true;
if (globflag == false || fnmatch (pathpart, path,
0) == 0)
{
if (!check_existence || stat(path, &st) == 0)
{
puts (path);
++printed;
}
}
break;
}
}
}
}
if (ferror (fp))
{
error (0, errno, "%s", dbfile);
return 0;
}
if (fclose (fp) == EOF)
{
error (0, errno, "%s", dbfile);
return 0;
}
return printed;
}
extern char *version_string;
/* The name this program was run with. */
char *program_name;
static void
usage (stream, status)
FILE *stream;
int status;
{
fprintf (stream, _("\
Usage: %s [-d path | --database=path] [-e | --existing]\n\
[-i | --ignore-case] [--version] [--help] pattern...\n"),
program_name);
fputs (_("\nReport bugs to <bug-findutils@gnu.org>."), stream);
exit (status);
}
static struct option const longopts[] =
{
{"database", required_argument, NULL, 'd'},
{"existing", no_argument, NULL, 'e'},
{"ignore-case", no_argument, NULL, 'i'},
{"help", no_argument, NULL, 'h'},
{"version", no_argument, NULL, 'v'},
{NULL, no_argument, NULL, 0}
};
int
main (argc, argv)
int argc;
char **argv;
{
char *dbpath;
int fnmatch_flags = 0;
int found = 0, optc;
int ignore_case = 0;
program_name = argv[0];
#ifdef HAVE_SETLOCALE
setlocale (LC_ALL, "");
#endif
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
dbpath = getenv ("LOCATE_PATH");
if (dbpath == NULL)
dbpath = LOCATE_DB;
check_existence = 0;
while ((optc = getopt_long (argc, argv, "d:ei", longopts, (int *) 0)) != -1)
switch (optc)
{
case 'd':
dbpath = optarg;
break;
case 'e':
check_existence = 1;
break;
case 'i':
ignore_case = 1;
fnmatch_flags |= FNM_CASEFOLD;
break;
case 'h':
usage (stdout, 0);
case 'v':
printf (_("GNU locate version %s\n"), version_string);
exit (0);
default:
usage (stderr, 1);
}
if (optind == argc)
usage (stderr, 1);
for (; optind < argc; optind++)
{
char *e;
next_element (dbpath); /* Initialize. */
while ((e = next_element ((char *) NULL)) != NULL)
found |= locate (argv[optind], e, ignore_case);
}
exit (!found);
}