mirror of
https://https.git.savannah.gnu.org/git/grep.git
synced 2026-01-27 18:04:36 +00:00
Compare commits
267 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
071ac3aa76 | ||
|
|
c635f7dd92 | ||
|
|
37b95973aa | ||
|
|
22533e58ff | ||
|
|
e6d5e6809b | ||
|
|
07a3bb2b44 | ||
|
|
8185556858 | ||
|
|
275600f387 | ||
|
|
db5172dc2b | ||
|
|
1665c885f2 | ||
|
|
682f7f693d | ||
|
|
335fcd3f53 | ||
|
|
2e19d07ef1 | ||
|
|
b871c3e428 | ||
|
|
3f8c09ec19 | ||
|
|
bd7250ca36 | ||
|
|
f8bb8c519e | ||
|
|
082f068a5e | ||
|
|
dc292e8bb0 | ||
|
|
b1dee0f8b3 | ||
|
|
b197be563e | ||
|
|
52418599b3 | ||
|
|
a4628e58dd | ||
|
|
05f8c68183 | ||
|
|
2f5068b6ea | ||
|
|
50c4df64c1 | ||
|
|
6de7c9d48b | ||
|
|
ffb27fd225 | ||
|
|
920daa57a4 | ||
|
|
9863d53a5f | ||
|
|
ef595c086b | ||
|
|
5cc5251d5d | ||
|
|
3160603308 | ||
|
|
0747169015 | ||
|
|
006951de68 | ||
|
|
fc6aba9000 | ||
|
|
b1eaccd96d | ||
|
|
ba98ec78f5 | ||
|
|
ad030d9bbb | ||
|
|
6ee856200a | ||
|
|
19e301ad53 | ||
|
|
421b2993e2 | ||
|
|
aa203fdaa9 | ||
|
|
7ddaa55cab | ||
|
|
eaca869822 | ||
|
|
29a9b72db3 | ||
|
|
24deafb92f | ||
|
|
fce28c4a5e | ||
|
|
944c2eccc7 | ||
|
|
8fb15fb5bf | ||
|
|
288ea84c70 | ||
|
|
c89ce1cd48 | ||
|
|
37ed0f5621 | ||
|
|
cb83e12460 | ||
|
|
e7481a0939 | ||
|
|
08c4ce064b | ||
|
|
1c9e7544cf | ||
|
|
eda769be72 | ||
|
|
3612f5e218 | ||
|
|
37a1e07606 | ||
|
|
58d2475965 | ||
|
|
53b889155f | ||
|
|
1fa829d367 | ||
|
|
be9fcc2d2d | ||
|
|
b4dd3b00a5 | ||
|
|
3d900da3b5 | ||
|
|
b9a8047099 | ||
|
|
443961a929 | ||
|
|
e248db797a | ||
|
|
4810ea0838 | ||
|
|
102be2bfa5 | ||
|
|
dd8f04957c | ||
|
|
554e5b25fe | ||
|
|
052282642c | ||
|
|
f80b106d15 | ||
|
|
3e926715c8 | ||
|
|
1dbdcdc4c8 | ||
|
|
180e8dd674 | ||
|
|
13fd8279e5 | ||
|
|
d1c3fbe772 | ||
|
|
105e432d7f | ||
|
|
9e915da342 | ||
|
|
975378294a | ||
|
|
975ed119e9 | ||
|
|
7918c33702 | ||
|
|
ea3ec61613 | ||
|
|
481e6b4a3b | ||
|
|
68c7d2f51c | ||
|
|
6980733869 | ||
|
|
95553c0661 | ||
|
|
d59cbb36b9 | ||
|
|
f951840aa5 | ||
|
|
16f9ca8ed1 | ||
|
|
e43470dafc | ||
|
|
2ea9219797 | ||
|
|
c84a192000 | ||
|
|
fa4e6c8a77 | ||
|
|
8d3afeebcc | ||
|
|
c3259803fe | ||
|
|
7460d0f8b0 | ||
|
|
92585cde9b | ||
|
|
0f2c2c256f | ||
|
|
3bcc2d8900 | ||
|
|
6e7253de1d | ||
|
|
fd2d0f7165 | ||
|
|
3b15d73897 | ||
|
|
e4983bd587 | ||
|
|
85e0e4fdd4 | ||
|
|
88b2d37c0a | ||
|
|
19d2275fd1 | ||
|
|
488a115bfe | ||
|
|
c63a0950ff | ||
|
|
1d59f1b342 | ||
|
|
0119aa8df1 | ||
|
|
0035fb36eb | ||
|
|
86d7b53af1 | ||
|
|
30b80b654e | ||
|
|
15f1f50e20 | ||
|
|
98ee05b4dd | ||
|
|
99330c2b1d | ||
|
|
373b4434eb | ||
|
|
c83ffc197e | ||
|
|
7979ea7ddb | ||
|
|
3dc94feb2e | ||
|
|
9ef526a617 | ||
|
|
e875939d61 | ||
|
|
50dfb382e9 | ||
|
|
e00b27266a | ||
|
|
c8603c9faf | ||
|
|
20f372417c | ||
|
|
9e4247e10d | ||
|
|
f3f7e21274 | ||
|
|
fab6358d5a | ||
|
|
65751bd10d | ||
|
|
155cfb11e3 | ||
|
|
21bfaa6ff6 | ||
|
|
819b3b176f | ||
|
|
516e855773 | ||
|
|
b63a992346 | ||
|
|
d59fbb4146 | ||
|
|
4b60e9f353 | ||
|
|
72ccd15d5c | ||
|
|
eae77386eb | ||
|
|
6de66dd6be | ||
|
|
c9a77fa5bf | ||
|
|
231a3ea66d | ||
|
|
3635121123 | ||
|
|
8f6a1e90e4 | ||
|
|
5e3b760f65 | ||
|
|
45e1158a4b | ||
|
|
247e257563 | ||
|
|
29c3f5b9df | ||
|
|
908f30573a | ||
|
|
b061d24916 | ||
|
|
429b3497d1 | ||
|
|
1dd9bbe724 | ||
|
|
d14057a8b7 | ||
|
|
1b1b496eb2 | ||
|
|
34ba125628 | ||
|
|
29bc7988c9 | ||
|
|
b3cd2ee4ae | ||
|
|
216f754287 | ||
|
|
65e303a17e | ||
|
|
958bcc3ada | ||
|
|
dc00df27cb | ||
|
|
0acc194ae1 | ||
|
|
aa8ca91c08 | ||
|
|
b47a3fb155 | ||
|
|
90bc5d93f1 | ||
|
|
c9ac429ddd | ||
|
|
565678570c | ||
|
|
98376d7988 | ||
|
|
e2aec8c91e | ||
|
|
225d921887 | ||
|
|
c73b86f757 | ||
|
|
6f52ef30e5 | ||
|
|
739892e8d4 | ||
|
|
3b66aaf50a | ||
|
|
16469277b3 | ||
|
|
1e517bf6a2 | ||
|
|
0942f31bd8 | ||
|
|
5e3d207d5b | ||
|
|
d922927049 | ||
|
|
c8d89e8c34 | ||
|
|
656de767ee | ||
|
|
5b98e7b7c2 | ||
|
|
2ff819e750 | ||
|
|
d85711f694 | ||
|
|
4ac5fa8959 | ||
|
|
da07083481 | ||
|
|
1546617435 | ||
|
|
8e0c90966d | ||
|
|
e7f8e8eb1f | ||
|
|
42db5cc8f5 | ||
|
|
a860bd39e3 | ||
|
|
80bcb074ae | ||
|
|
d6276889a0 | ||
|
|
c831ffa1d9 | ||
|
|
a368a60eb8 | ||
|
|
2169fa36c9 | ||
|
|
e24ab83682 | ||
|
|
e4a71086bf | ||
|
|
a7c8349894 | ||
|
|
078987db6d | ||
|
|
5447010fdb | ||
|
|
ef6c7768b3 | ||
|
|
561cf64e4a | ||
|
|
743b1f6f5c | ||
|
|
dfcd2c9cc8 | ||
|
|
0ca5dcc1c5 | ||
|
|
70fc166b38 | ||
|
|
bc4241629c | ||
|
|
16b3c2f9f3 | ||
|
|
269795f3b8 | ||
|
|
9af9d51605 | ||
|
|
6e95551ad6 | ||
|
|
efe1e1543c | ||
|
|
1580562d51 | ||
|
|
c128fa57c6 | ||
|
|
5c3c427988 | ||
|
|
f31ae6d46d | ||
|
|
f9290127f3 | ||
|
|
1843c0b0c8 | ||
|
|
d30721074f | ||
|
|
abf2fa8efa | ||
|
|
95440891d0 | ||
|
|
fa48acda06 | ||
|
|
4396e12b8c | ||
|
|
4406a0d28f | ||
|
|
af79b17356 | ||
|
|
56762bfda5 | ||
|
|
7651f7b832 | ||
|
|
ad6e5cbcf5 | ||
|
|
99fcca954f | ||
|
|
a0feba0a48 | ||
|
|
ae9780c06b | ||
|
|
50d5fbb7c3 | ||
|
|
c6283a2c92 | ||
|
|
6e1450408a | ||
|
|
3935b2a4f6 | ||
|
|
6f84f3be1c | ||
|
|
e1394a6408 | ||
|
|
e0d39a9133 | ||
|
|
b07c82ccdb | ||
|
|
015d028d05 | ||
|
|
f585b6bb3b | ||
|
|
b3a85a1a8a | ||
|
|
c562691787 | ||
|
|
1ba972edec | ||
|
|
b3d082ce04 | ||
|
|
f0d97db2a2 | ||
|
|
fd72f5d2c2 | ||
|
|
e3694e90b4 | ||
|
|
b7d83f46d8 | ||
|
|
643e557388 | ||
|
|
869989fa83 | ||
|
|
70b84b9294 | ||
|
|
01b7b13f83 | ||
|
|
2b455da03f | ||
|
|
33b2d2eded | ||
|
|
ad6de316cc | ||
|
|
9f296c1238 | ||
|
|
0687c51c47 | ||
|
|
f3da64c603 | ||
|
|
a951562470 | ||
|
|
e87ccc7038 | ||
|
|
8b2c31b646 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -52,6 +52,7 @@
|
||||
/tests/cspatfile
|
||||
/tests/ere.script
|
||||
/tests/get-mb-cur-max
|
||||
/tests/init.sh
|
||||
/tests/khadafy.out
|
||||
/tests/patfile
|
||||
/tests/spencer1.script
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -1,3 +1,3 @@
|
||||
[submodule "gnulib"]
|
||||
path = gnulib
|
||||
url = git://git.sv.gnu.org/gnulib.git
|
||||
url = https://git.savannah.gnu.org/git/gnulib
|
||||
|
||||
@ -1 +1 @@
|
||||
3.6
|
||||
3.12
|
||||
|
||||
17
AUTHORS
17
AUTHORS
@ -1,4 +1,4 @@
|
||||
Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
@ -36,15 +36,20 @@ Sunday's excellent paper on fast string searching describes some of
|
||||
the history of the subject, as well as providing exhaustive
|
||||
performance analysis of various implementation alternatives.
|
||||
The inner loop of GNU grep is similar to Hume & Sunday's recommended
|
||||
"Tuned Boyer Moore" inner loop. See: Hume A, Sunday D.
|
||||
Fast string searching. Software Pract Exper. 1991;21(11):1221-48.
|
||||
https://doi.org/10.1002/spe.4380211105
|
||||
"Tuned Boyer Moore" inner loop (see the Hume & Sunday citation in
|
||||
the grep manual's "Performance" chapter).
|
||||
|
||||
Arnold Robbins contributed to improve dfa.[ch]. In fact
|
||||
it came straight from gawk-3.0.3 with small editing and fixes.
|
||||
|
||||
Many folks contributed. See THANKS; if I omitted someone please
|
||||
send me email.
|
||||
Norihiro Tanaka contributed many performance improvements and other
|
||||
fixes, particularly to multi-byte matchers.
|
||||
|
||||
Paul Eggert contributed support for recursive grep, as well as several
|
||||
performance improvements such as searching file holes efficiently.
|
||||
|
||||
Many other folks contributed. See THANKS; if someone is omitted
|
||||
please file a bug report.
|
||||
|
||||
Alain Magloire maintained GNU grep until version 2.5e.
|
||||
|
||||
|
||||
@ -1407,7 +1407,7 @@
|
||||
is put in different compiled structure patterns[]. The patterns
|
||||
are given to dfacomp() and kwsmusts() as is.
|
||||
(Ecompile): Likewised.
|
||||
(Fcompile): Reverse to the old behaviour of compiling the enire
|
||||
(Fcompile): Reverse to the old behaviour of compiling the entire
|
||||
patterns in one shot.
|
||||
(EGexecute): If falling to GNU regex for the matching, loop in the
|
||||
array of compile patterns[] to find a match.
|
||||
@ -1457,7 +1457,7 @@
|
||||
(xrealloc): Removed using lib/xmalloc.c.
|
||||
(xmalloc): Removed using lib/xmalloc.c
|
||||
(main): Register with atexit() to check for error on stdout.
|
||||
* configure.in: Check for atexit(), call jm_MALLOC, jm_RELLOC and
|
||||
* configure.in: Check for atexit(), call jm_MALLOC, jm_REALLOC and
|
||||
jm_PREREQ_ERROR.
|
||||
* tests/bre.awk: Removed the hack to drain the buffer since we
|
||||
always fclose(stdout) atexit.
|
||||
@ -1541,7 +1541,7 @@
|
||||
* src/exclude.h: New file.
|
||||
* src/grep.c (main): Took the GNU tar code to handle
|
||||
the option --include, --exclude, --exclude-from.
|
||||
Files are check for a match, with exlude_filename ().
|
||||
Files are check for a match, with exclude_filename ().
|
||||
New option --exclude-from.
|
||||
* src/savedir.c: Call exclude_filename() to check for
|
||||
file pattern exclusion or inclusion.
|
||||
@ -1592,7 +1592,7 @@
|
||||
|
||||
* m4/dosfile.m4 (AC_DOSFILE): Move AC_DEFINEs out of AC_CACHE_CHECK.
|
||||
|
||||
2001-02-17 Alain Malgoire
|
||||
2001-02-17 Alain Magloire
|
||||
|
||||
* doc/grep.texi: Document the new options and the new behaviour
|
||||
back-references are local. Use excerpt from Karl Berry regex
|
||||
@ -1699,8 +1699,8 @@
|
||||
(color): Rename color variable to color_option.
|
||||
Removed 'always|never|auto' arguments, not necessary for grep.
|
||||
(exclude_pattern): new variable, holder for the file pattern.
|
||||
(include_pattern): new variable, hoder for the file pattern.
|
||||
* src/savedir.c: Signature change, take two new argmuments.
|
||||
(include_pattern): new variable, holder for the file pattern.
|
||||
* src/savedir.c: Signature change, take two new arguments.
|
||||
* doc/grep.texi: Document, new options.
|
||||
* doc/grep.man: Document, new options.
|
||||
|
||||
@ -1712,7 +1712,7 @@
|
||||
|
||||
2001-02-09 Alain Magloire
|
||||
|
||||
Patch from Ulrich Drepper to provide hilighting.
|
||||
Patch from Ulrich Drepper to provide highlighting.
|
||||
|
||||
* src/grep.c: New option --color.
|
||||
(color): New static var.
|
||||
@ -1722,7 +1722,7 @@
|
||||
to find the offset of the matching string.
|
||||
* src/savedir.c: Take advantage of _DIRENT_HAVE_TYPE if supported.
|
||||
* src/search.c (EGexecute, Fexecute, Pexecute): Take a new argument
|
||||
when doing exact match for the color hiligting.
|
||||
when doing exact match for the color highlighting.
|
||||
|
||||
2000-09-01 Brian Youmans
|
||||
|
||||
@ -1792,7 +1792,7 @@
|
||||
|
||||
2000-06-02 Paul Eggert
|
||||
|
||||
Problen noted by Gerald Stoller <gerald_stoller@hotmail.com>
|
||||
Problem noted by Gerald Stoller <gerald_stoller@hotmail.com>
|
||||
|
||||
* src/grep.c (main): POSIX says that -q overrides -l, which
|
||||
in turn overrides the other output options. Fix grep to
|
||||
@ -2208,7 +2208,7 @@
|
||||
on pre-OpenVMS 7.x systems; general overhaul.
|
||||
* src/getpagesize.h: Reinstate support for different pagesizes on
|
||||
VAX and Alpha. Work around problem with DEC C compiler.
|
||||
* src/vms_fab.c: Cast to some assigments; fixed typo argcp vs. argp.
|
||||
* src/vms_fab.c: Cast to some assignments; fixed typo argcp vs. argp.
|
||||
* src/vms_fab.h: Added new include files to avoid warnings about
|
||||
undefined function prototypes.
|
||||
Those patches were provided by Martin P.J. Zinser (zinser@decus.de).
|
||||
@ -2670,7 +2670,7 @@
|
||||
|
||||
1999-03-16 Volker Borchert
|
||||
|
||||
* configure.in: Use case case ... esac for checking Visual C++.
|
||||
* configure.in: Use case ... esac for checking Visual C++.
|
||||
When ${CC} contains options it was not recognize.
|
||||
|
||||
1999-03-07 Paul Eggert
|
||||
@ -2764,7 +2764,7 @@
|
||||
|
||||
1999-02-10 Alain Magloire
|
||||
|
||||
* bootstrap/{Makefile{try,am},REAMDE} : skeleton
|
||||
* bootstrap/{Makefile{try,am},README} : skeleton
|
||||
provided for system lacking the tools to autoconfigure.
|
||||
|
||||
* src/{e,f,}grepmat.c: added guard [HAVE_CONFIG_H]
|
||||
@ -2858,7 +2858,7 @@
|
||||
* doc/Makefile.am djgpp/Makefile.am m4/Makefile.am vms/Makefile.am:
|
||||
New files.
|
||||
|
||||
* m4/progtest.m4: proctect '[]' from m4.
|
||||
* m4/progtest.m4: protect '[]' from m4.
|
||||
Noted by Eli Z.
|
||||
|
||||
* PATCHES-AC: New file, add the patch for autoconf in the dist.
|
||||
@ -3333,7 +3333,7 @@
|
||||
Suggested by Harald Hanche-Olsen.
|
||||
|
||||
* src/grep.c (main): '-f /dev/null' now specifies no patterns
|
||||
and therfore matches nothing.
|
||||
and therefore matches nothing.
|
||||
Reported by Jorge Stolfi.
|
||||
Patched by Paul Eggert.
|
||||
|
||||
@ -3368,7 +3368,7 @@
|
||||
* src/grep.c: reverse back to greping directories,
|
||||
One could skip the error message by defining
|
||||
SKIP_DIR_ERROR. There is no clear way of doing
|
||||
things, I hope to setle this on the next majore release
|
||||
things, I hope to settle this on the next major release
|
||||
Thanks Paul Eggert, Eli Zaretskii and gnits for the
|
||||
exchange.
|
||||
|
||||
@ -3427,7 +3427,7 @@
|
||||
(setmatcher) [HAVE_SETRLIMIT]: Set re_max_failures so that the
|
||||
matcher won't ever overflow the stack.
|
||||
(main) [__MSDOS__, _WIN32]: Handle backslashes and drive letters
|
||||
in argv[0], remove the .exe suffix, and downcase the prgram name.
|
||||
in argv[0], remove the .exe suffix, and downcase the program name.
|
||||
[O_BINARY]: Pass additional DOS-specific options to getopt_long
|
||||
and handle them. Call stat before attempting to open the file, in
|
||||
case it is a directory (DOS will fail the open call for
|
||||
@ -3497,7 +3497,7 @@
|
||||
regex package. Change the way the tests were done to be more
|
||||
conformant to automake.
|
||||
|
||||
* configure.in: added --disable-regex for folks with their own fuctions.
|
||||
* configure.in: added --disable-regex for folks with their own functions.
|
||||
|
||||
* grep-20d : available for testing
|
||||
|
||||
@ -3551,7 +3551,7 @@
|
||||
|
||||
* check.sh, scriptgen.awk: fix grep paths.
|
||||
|
||||
* change the directory strucure: grep is now in src to comply with
|
||||
* change the directory structure: grep is now in src to comply with
|
||||
gettext.m4.
|
||||
|
||||
* grep.c version.c [VERSION]: got rid of version.c,
|
||||
@ -3648,6 +3648,6 @@
|
||||
|
||||
* Version 2.0 released.
|
||||
|
||||
Copyright (C) 1998-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 1998-2026 Free Software Foundation, Inc.
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted provided the copyright notice and this notice are preserved.
|
||||
|
||||
8
HACKING
8
HACKING
@ -12,7 +12,7 @@ Use the latest upstream sources
|
||||
Base any changes you make on the latest upstream sources.
|
||||
You can get a copy of the latest with this command:
|
||||
|
||||
git clone git://git.sv.gnu.org/grep
|
||||
git clone https://git.savannah.gnu.org/git/grep
|
||||
|
||||
That downloads the entire repository, including revision control history.
|
||||
Once downloaded, you can get incremental updates by running one of
|
||||
@ -83,7 +83,7 @@ Make your changes on a private "topic" branch
|
||||
=============================================
|
||||
So you checked out grep like this:
|
||||
|
||||
git clone git://git.sv.gnu.org/grep
|
||||
git clone https://git.savannah.gnu.org/git/grep
|
||||
|
||||
Now, cd into the grep/ directory and run:
|
||||
|
||||
@ -468,7 +468,7 @@ you'd use doc/Copyright/request-assign.future:
|
||||
https://www.gnu.org/software/gnulib/Copyright/request-assign.future
|
||||
|
||||
You may make assignments for up to four projects at a time.
|
||||
[
|
||||
|
||||
In case you're wondering why we bother with all of this, read this:
|
||||
|
||||
https://www.gnu.org/licenses/why-assign.html
|
||||
@ -597,7 +597,7 @@ Then just open the index.html file (in the generated lcov-html directory)
|
||||
in your favorite web browser.
|
||||
|
||||
========================================================================
|
||||
Copyright (C) 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
Permission is granted to copy, distribute and/or modify this document
|
||||
under the terms of the GNU Free Documentation License, Version 1.3 or
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Process this file with automake to create Makefile.in
|
||||
#
|
||||
# Copyright 1997-1998, 2005-2021 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -66,13 +66,10 @@ gen-ChangeLog:
|
||||
# current locale considers to be equal.
|
||||
ASSORT = LC_ALL=C sort
|
||||
|
||||
# Extract all lines up to the first one starting with "##".
|
||||
prologue = perl -ne '/^\#\#/ and exit; print' $(srcdir)/THANKS.in
|
||||
|
||||
THANKS: THANKS.in Makefile.am .mailmap thanks-gen
|
||||
$(AM_V_GEN)rm -f $@-t $@; \
|
||||
{ \
|
||||
$(prologue); echo; \
|
||||
perl -ne '/^\#\#/ and exit; print' $(srcdir)/THANKS.in; echo; \
|
||||
{ perl -ne '/^$$/.../^$$/ and !/^$$/ and s/ +/\0/ and print' \
|
||||
$(srcdir)/THANKS.in; \
|
||||
git log --pretty=format:'%aN%x00%aE' \
|
||||
|
||||
137
NEWS
137
NEWS
@ -1,5 +1,140 @@
|
||||
GNU grep NEWS -*- outline -*-
|
||||
|
||||
* Noteworthy changes in release ?.? (????-??-??) [?]
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.12 (2025-04-10) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
Searching a directory with at least 100,000 entries no longer fails
|
||||
with "Operation not supported" and exit status 2. Now, this prints 1
|
||||
and no diagnostic, as expected:
|
||||
$ mkdir t && cd t && seq 100000|xargs touch && grep -r x .; echo $?
|
||||
1
|
||||
[bug introduced in grep 3.11]
|
||||
|
||||
-mN where 1 < N no longer mistakenly lseeks to end of input merely
|
||||
because standard output is /dev/null.
|
||||
|
||||
** Changes in behavior
|
||||
|
||||
The --unix-byte-offsets (-u) option is gone. In grep-3.7 (2021-08-14)
|
||||
it became a warning-only no-op. Before then, it was a Windows-only no-op.
|
||||
|
||||
On Windows platforms and on AIX in 32-bit mode, grep in some cases
|
||||
now supports Unicode characters outside the Basic Multilingual Plane.
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.11 (2023-05-13) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
With -P, patterns like [\d] now work again. Fixing this has caused
|
||||
grep to revert to the behavior of grep 3.8, in that patterns like \w
|
||||
and \b go back to using ASCII rather than Unicode interpretations.
|
||||
However, future versions of GNU grep and/or PCRE2 are likely to fix
|
||||
this and change the behavior of \w and \b back to Unicode again,
|
||||
without breaking [\d] as 3.10 did.
|
||||
[bug introduced in grep 3.10]
|
||||
|
||||
grep no longer fails on files dated after the year 2038,
|
||||
when running on 32-bit x86 and ARM hosts using glibc 2.34+.
|
||||
[bug introduced in grep 3.9]
|
||||
|
||||
grep -P no longer fails to match patterns using negated classes
|
||||
like \D or \W when linked with PCRE2 10.34 or newer.
|
||||
[bug introduced in grep 3.8]
|
||||
|
||||
|
||||
** Changes in behavior
|
||||
|
||||
grep --version now prints a line describing the version of PCRE2 it uses.
|
||||
For example, it prints this when built with the very latest from git:
|
||||
grep -P uses PCRE2 10.43-DEV 2023-04-14
|
||||
or this with what's currently available in Fedora 37:
|
||||
grep -P uses PCRE2 10.40 2022-04-14
|
||||
|
||||
previous versions of grep wouldn't respect the user provided settings for
|
||||
PCRE_CFLAGS and PCRE_LIBS when building if a libpcre2-8 pkg-config module
|
||||
was found.
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.10 (2023-03-22) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
With -P, \d now matches only ASCII digits, regardless of PCRE
|
||||
options/modes. The changes in grep-3.9 to make \b and \w work
|
||||
properly had the undesirable side effect of making \d also match
|
||||
e.g., the Arabic digits: ٠١٢٣٤٥٦٧٨٩. With grep-3.9, -P '\d+'
|
||||
would match that ten-digit (20-byte) string. Now, to match such
|
||||
a digit, you would use \p{Nd}. Similarly, \D is now mapped to [^0-9].
|
||||
[bug introduced in grep 3.9]
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.9 (2023-03-05) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
With -P, some non-ASCII UTF8 characters were not recognized as
|
||||
word-constituent due to our omission of the PCRE2_UCP flag. E.g.,
|
||||
given f(){ echo Perú|LC_ALL=en_US.UTF-8 grep -Po "$1"; } and
|
||||
this command, echo $(f 'r\w'):$(f '.\b'), before it would print ":r".
|
||||
After the fix, it prints the correct results: "rú:ú".
|
||||
|
||||
When given multiple patterns the last of which has a back-reference,
|
||||
grep no longer sometimes mistakenly matches lines in some cases.
|
||||
[Bug#36148#13 introduced in grep 3.4]
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.8 (2022-09-02) [stable]
|
||||
|
||||
** Changes in behavior
|
||||
|
||||
The -P option is now based on PCRE2 instead of the older PCRE,
|
||||
thanks to code contributed by Carlo Arenas.
|
||||
|
||||
The egrep and fgrep commands, which have been deprecated since
|
||||
release 2.5.3 (2007), now warn that they are obsolescent and should
|
||||
be replaced by grep -E and grep -F.
|
||||
|
||||
The confusing GREP_COLOR environment variable is now obsolescent.
|
||||
Instead of GREP_COLOR='xxx', use GREP_COLORS='mt=xxx'. grep now
|
||||
warns if GREP_COLOR is used and is not overridden by GREP_COLORS.
|
||||
Also, grep now treats GREP_COLOR like GREP_COLORS by silently
|
||||
ignoring it if it attempts to inject ANSI terminal escapes.
|
||||
|
||||
Regular expressions with stray backslashes now cause warnings, as
|
||||
their unspecified behavior can lead to unexpected results.
|
||||
For example, '\a' and 'a' are not always equivalent
|
||||
<https://bugs.gnu.org/39678>. Similarly, regular expressions or
|
||||
subexpressions that start with a repetition operator now also cause
|
||||
warnings due to their unspecified behavior; for example, *a(+b|{1}c)
|
||||
now has three reasons to warn. The warnings are intended as a
|
||||
transition aid; they are likely to be errors in future releases.
|
||||
|
||||
Regular expressions like [:space:] are now errors even if
|
||||
POSIXLY_CORRECT is set, since POSIX now allows the GNU behavior.
|
||||
|
||||
** Bug fixes
|
||||
|
||||
In locales using UTF-8 encoding, the regular expression '.' no
|
||||
longer sometimes fails to match Unicode characters U+D400 through
|
||||
U+D7FF (some Hangul Syllables, and Hangul Jamo Extended-B) and
|
||||
Unicode characters U+108000 through U+10FFFF (half of Supplemental
|
||||
Private Use Area plane B).
|
||||
[bug introduced in grep 3.4]
|
||||
|
||||
The -s option no longer suppresses "binary file matches" messages.
|
||||
[Bug#51860 introduced in grep 3.5]
|
||||
|
||||
** Documentation improvements
|
||||
|
||||
The manual now covers unspecified behavior in patterns like \x, (+),
|
||||
and range expressions outside the POSIX locale.
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.7 (2021-08-14) [stable]
|
||||
|
||||
** Changes in behavior
|
||||
@ -1276,7 +1411,7 @@ necessary to track the evolution of the regex package, and since
|
||||
I was changing it anyway I decided to do a general cleanup.
|
||||
|
||||
========================================================================
|
||||
Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
16
README
16
README
@ -1,4 +1,4 @@
|
||||
Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
@ -12,13 +12,13 @@ GNU grep is provided "as is" with no warranty. The exact terms
|
||||
under which you may use and (re)distribute this program are detailed
|
||||
in the GNU General Public License, in the file COPYING.
|
||||
|
||||
GNU grep is based on a fast lazy-state deterministic matcher (about
|
||||
twice as fast as stock Unix egrep) hybridized with a Boyer-Moore-Gosper
|
||||
search for a fixed string that eliminates impossible text from being
|
||||
considered by the full regexp matcher without necessarily having to
|
||||
look at every character. The result is typically many times faster
|
||||
than Unix grep or egrep. (Regular expressions containing back-references
|
||||
will run more slowly, however.)
|
||||
GNU grep is based on a fast lazy-state deterministic matcher
|
||||
hybridized with Boyer-Moore and Aho-Corasick searches for fixed
|
||||
strings that eliminate impossible text from being considered by the
|
||||
full regexp matcher without necessarily having to look at every
|
||||
character. The result is typically many times faster than traditional
|
||||
implementations. (Regular expressions containing back-references will
|
||||
run more slowly, however.)
|
||||
|
||||
See the files AUTHORS and THANKS for a list of authors and other contributors.
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,35 +1,47 @@
|
||||
-*- outline -*-
|
||||
Building from a Git repository -*- outline -*-
|
||||
|
||||
These notes intend to help people working on the checked-out sources.
|
||||
These requirements do not apply when building from a distribution tarball.
|
||||
If this package has a file HACKING, please also read that file for
|
||||
more detailed contribution guidelines.
|
||||
|
||||
* Requirements
|
||||
|
||||
We've opted to keep only the highest-level sources in the GIT repository.
|
||||
This eases our maintenance burden, (fewer merges etc.), but imposes more
|
||||
We've opted to keep only the highest-level sources in the Git repository.
|
||||
This eases our maintenance burden (fewer merges etc.), but imposes more
|
||||
requirements on anyone wishing to build from the just-checked-out sources.
|
||||
Note the requirements to build the released archive are much less and
|
||||
are just the requirements of the standard ./configure && make procedure.
|
||||
(The requirements to build from a release are much less and are just
|
||||
the requirements of the standard './configure && make' procedure.)
|
||||
Specific development tools and versions will be checked for and listed by
|
||||
the bootstrap script. See README-prereq for specific notes on obtaining
|
||||
these prerequisite tools.
|
||||
|
||||
Valgrind <http://valgrind.org/> is also highly recommended, if
|
||||
Valgrind supports your architecture. See also README-valgrind.
|
||||
Valgrind supports your architecture. See also README-valgrind
|
||||
(if present).
|
||||
|
||||
While building from a just-cloned source tree may require installing a
|
||||
few prerequisites, later, a plain 'git pull && make' should be sufficient.
|
||||
few prerequisites, later, a plain 'git pull && make' typically suffices.
|
||||
|
||||
* First GIT checkout
|
||||
* First Git checkout
|
||||
|
||||
You can get a copy of the source repository like this:
|
||||
|
||||
$ git clone git://git.sv.gnu.org/grep
|
||||
$ cd grep
|
||||
$ git clone https://git.savannah.gnu.org/git/<packagename>
|
||||
$ cd <packagename>
|
||||
|
||||
As an optional step, if you already have a copy of the gnulib git
|
||||
repository on your hard drive, then you can use it as a reference to
|
||||
reduce download time and disk space requirements:
|
||||
where '<packagename>' stands for 'coreutils' or whatever other package
|
||||
you are building.
|
||||
|
||||
To use the most-recent Gnulib (as opposed to the Gnulib version that
|
||||
the package last synchronized to), do this next:
|
||||
|
||||
$ git submodule foreach git pull origin master
|
||||
$ git commit -m 'build: update gnulib submodule to latest' gnulib
|
||||
|
||||
As an optional step, if you already have a copy of the Gnulib Git
|
||||
repository, then you can use it as a reference to reduce download
|
||||
time and file system space requirements:
|
||||
|
||||
$ export GNULIB_SRCDIR=/path/to/gnulib
|
||||
|
||||
@ -38,20 +50,14 @@ which are extracted from other source packages:
|
||||
|
||||
$ ./bootstrap
|
||||
|
||||
To use the most-recent gnulib (as opposed to the gnulib version that
|
||||
the package last synchronized to), do this next:
|
||||
|
||||
$ git submodule foreach git pull origin master
|
||||
$ git commit -m 'build: update gnulib submodule to latest' gnulib
|
||||
|
||||
And there you are! Just
|
||||
|
||||
$ ./configure --quiet #[--enable-gcc-warnings] [*]
|
||||
$ ./configure --quiet #[--disable-gcc-warnings] [*]
|
||||
$ make
|
||||
$ make check
|
||||
|
||||
At this point, there should be no difference between your local copy,
|
||||
and the GIT master copy:
|
||||
and the Git master copy:
|
||||
|
||||
$ git diff
|
||||
|
||||
@ -59,15 +65,43 @@ should output no difference.
|
||||
|
||||
Enjoy!
|
||||
|
||||
[*] The --enable-gcc-warnings option is useful only with glibc
|
||||
and with a very recent version of gcc. You'll probably also have
|
||||
to use recent system headers. If you configure with this option,
|
||||
and spot a problem, please be sure to send the report to the bug
|
||||
reporting address of this package, and not to that of gnulib, even
|
||||
if the problem seems to originate in a gnulib-provided file.
|
||||
[*] By default GCC warnings are enabled when building from Git.
|
||||
If you get warnings with recent GCC and Glibc with default
|
||||
configure-time options, please report the warnings to the bug
|
||||
reporting address of this package instead of to bug-gnulib,
|
||||
even if the problem seems to originate in a Gnulib-provided file.
|
||||
If you get warnings with other configurations, you can run
|
||||
'./configure --disable-gcc-warnings' or 'make WERROR_CFLAGS='
|
||||
to build quietly or verbosely, respectively.
|
||||
-----
|
||||
|
||||
Copyright (C) 2002-2021 Free Software Foundation, Inc.
|
||||
* Submitting patches
|
||||
|
||||
If you develop a fix or a new feature, please send it to the
|
||||
appropriate bug-reporting address as reported by the --help option of
|
||||
each program. One way to do this is to use vc-dwim
|
||||
<https://www.gnu.org/software/vc-dwim/>), as follows.
|
||||
|
||||
Run the command "vc-dwim --initialize" from the top-level directory
|
||||
of this package's git-cloned hierarchy.
|
||||
|
||||
Edit the (empty) ChangeLog file that this command creates, creating a
|
||||
properly-formatted entry according to the GNU coding standards
|
||||
<https://www.gnu.org/prep/standards/html_node/Change-Logs.html>.
|
||||
|
||||
Make your changes.
|
||||
|
||||
Run the command "vc-dwim" and make sure its output (the diff of all
|
||||
your changes) looks good.
|
||||
|
||||
Run "vc-dwim --commit".
|
||||
|
||||
Run the command "git format-patch --stdout -1", and email its output
|
||||
in, using the output's subject line.
|
||||
|
||||
-----
|
||||
|
||||
Copyright (C) 2002-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,63 +1,41 @@
|
||||
This gives some notes on obtaining the tools required for development.
|
||||
I.E. the tools checked for by the bootstrap script and include:
|
||||
These tools can be used by the 'bootstrap' and 'configure' scripts,
|
||||
as well as by 'make'. They include:
|
||||
|
||||
- Autoconf <https://www.gnu.org/software/autoconf/>
|
||||
- Automake <https://www.gnu.org/software/automake/>
|
||||
- Bison <https://www.gnu.org/software/bison/>
|
||||
- Gettext <https://www.gnu.org/software/gettext/>
|
||||
- Git <https://git-scm.com/>
|
||||
- Gperf <https://www.gnu.org/software/gperf/>
|
||||
- Gzip <https://www.gnu.org/software/gzip/>
|
||||
- Help2man <https://www.gnu.org/software/help2man/>
|
||||
- M4 <https://www.gnu.org/software/m4/>
|
||||
- Make <https://www.gnu.org/software/make/>
|
||||
- Perl <https://www.cpan.org/>
|
||||
- Pkg-config <https://www.freedesktop.org/wiki/Software/pkg-config/>
|
||||
- Rsync <https://rsync.samba.org/>
|
||||
- Tar <https://www.gnu.org/software/tar/>
|
||||
- Texinfo <https://www.gnu.org/software/texinfo/>
|
||||
- Wget <https://www.gnu.org/software/wget/>
|
||||
- XZ Utils <https://tukaani.org/xz/>
|
||||
|
||||
Note please try to install/build official packages for your system.
|
||||
If these programs are not available use the following instructions
|
||||
to build them and install the results into a directory that you will
|
||||
then use when building this package.
|
||||
It is generally better to use official packages for your system.
|
||||
If a package is not officially available you can build it from source
|
||||
and install it into a directory that you can then use to build this
|
||||
package. If some packages are available but are too old, install the
|
||||
too-old versions first as they may be needed to build newer versions.
|
||||
|
||||
Even if the official version of a package for your system is too old,
|
||||
please install it, as it may be required to build the newer versions.
|
||||
The examples below install into $HOME/grep/deps/, so if you are
|
||||
going to follow these instructions, first ensure that your $PATH is
|
||||
set correctly by running this command:
|
||||
Here is an example of how to build a program from source. This
|
||||
example is for Autoconf; a similar approach should work for the other
|
||||
developer prerequisites. This example assumes Autoconf 2.71; it
|
||||
should be OK to use a later version of Autoconf, if available.
|
||||
|
||||
prefix=$HOME/grep/deps
|
||||
prefix=$HOME/prefix # (or wherever else you choose)
|
||||
export PATH=$prefix/bin:$PATH
|
||||
|
||||
* autoconf *
|
||||
|
||||
# Note Autoconf 2.62 or newer is needed to build automake-1.11.1
|
||||
git clone --depth=1 git://git.sv.gnu.org/autoconf.git
|
||||
git checkout v2.62
|
||||
autoreconf -vi
|
||||
wget https://ftp.gnu.org/pub/gnu/autoconf/autoconf-2.71.tar.gz
|
||||
gzip -d <autoconf-2.71.tar.gz | tar xf -
|
||||
cd autoconf-2.71
|
||||
./configure --prefix=$prefix
|
||||
make install
|
||||
|
||||
* automake *
|
||||
|
||||
# Note help2man is required to build automake fully
|
||||
git clone git://git.sv.gnu.org/automake.git
|
||||
cd automake
|
||||
git checkout v1.11.1
|
||||
./bootstrap
|
||||
./configure --prefix=$prefix
|
||||
make install
|
||||
|
||||
This package uses XZ utils (successor to LZMA) to create
|
||||
a compressed distribution tarball. Using this feature of Automake
|
||||
requires version 1.10a or newer, as well as the xz program itself.
|
||||
|
||||
* xz *
|
||||
|
||||
git clone git://ctrl.tukaani.org/xz.git
|
||||
cd xz
|
||||
./autogen.sh
|
||||
./configure --prefix=$prefix
|
||||
make install
|
||||
|
||||
Now you can build this package as described in README-hacking.
|
||||
Once the prerequisites are installed, you can build this package as
|
||||
described in README-hacking.
|
||||
|
||||
@ -13,6 +13,7 @@ end of e.g., grep --help).
|
||||
Akim Demaille akim@epita.fr
|
||||
Andreas Schwab schwab@suse.de
|
||||
Andreas Ley andy@rz.uni-karlsruhe.de
|
||||
Anton Samokat samokat700@gmail.com
|
||||
Bastiaan "Darquan" Stougie darquan@zonnet.nl
|
||||
Ben Elliston bje@cygnus.com
|
||||
Bernd Strieder strieder@student.uni-kl.de
|
||||
@ -28,6 +29,7 @@ David J MacKenzie djm@catapult.va.pubnix.com
|
||||
David O'Brien obrien@freebsd.org
|
||||
'Drake' Daham Wang drakewang@gmail.com
|
||||
Egmont Koblinger egmont@gmail.com
|
||||
Emanuele Torre torreemanuele6@gmail.com
|
||||
Fernando Basso fernandobasso.br@gmail.com
|
||||
Florian La Roche laroche@redhat.com
|
||||
François Pinard pinard@iro.umontreal.ca
|
||||
@ -35,6 +37,7 @@ Gerald Stoller gerald_stoller@hotmail.com
|
||||
Grant McDorman grant@isgtec.com
|
||||
Greg Boyd gboyd.ccsf@gmail.com
|
||||
Greg Louis glouis@dynamicro.on.ca
|
||||
Gro-Tsen https://twitter.com/gro_tsen
|
||||
Guglielmo 'bond' Bondioni g.bondioni@libero.it
|
||||
H. Merijn Brand h.m.brand@hccnet.nl
|
||||
Harald Hanche-Olsen hanche@math.ntnu.no
|
||||
@ -50,9 +53,11 @@ Joel N. Weber II devnull@gnu.org
|
||||
John Hughes john@nitelite.calvacom.fr
|
||||
Jorge Stolfi stolfi@dcc.unicamp.br
|
||||
Karl Heuer kwzh@gnu.org
|
||||
Karl Pettersson karl.pettersson@klpn.se
|
||||
Kaveh R. Ghazi ghazi@caip.rutgers.edu
|
||||
Kazuro Furukawa furukawa@apricot.kek.jp
|
||||
Keith Bostic bostic@bsdi.com
|
||||
Koen Claessen koen@chalmers.se
|
||||
Krishna Sethuraman krishna@sgihub.corp.sgi.com
|
||||
Kurt D Schwehr kdschweh@insci14.ucsd.edu
|
||||
Ludovic Courtès ludo@gnu.org
|
||||
@ -77,6 +82,7 @@ Rainer Orth ro@cebitec.uni-bielefeld.de
|
||||
Roland Roberts rroberts@muller.com
|
||||
Ruslan Ermilov ru@freebsd.org
|
||||
Santiago Vila sanvila@unex.es
|
||||
Sebastian Carlos sebaaa1754@gmail.com
|
||||
Shannon Hill hill@synnet.com
|
||||
Sotiris Vassilopoulos Sotiris.Vassilopoulos@betatech.gr
|
||||
Standish Parsley adsspamtrap01@yahoo.com
|
||||
|
||||
6
TODO
6
TODO
@ -1,6 +1,6 @@
|
||||
Things to do for GNU grep
|
||||
|
||||
Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
@ -31,13 +31,13 @@ GNU grep originally did 32-bit arithmetic. Although it has moved to
|
||||
64-bit on 64-bit platforms by using types like ptrdiff_t and size_t,
|
||||
this conversion has not been entirely systematic and should be checked.
|
||||
|
||||
Lazy dynamic linking of libpcre. See Debian’s 03-397262-dlopen-pcre.patch.
|
||||
Lazy dynamic linking of the PCRE library.
|
||||
|
||||
Check FreeBSD’s integration of zgrep (-Z) and bzgrep (-J) in one
|
||||
binary. Is there a possibility of doing even better by automatically
|
||||
checking the magic of binary files ourselves (0x1F 0x8B for gzip, 0x1F
|
||||
0x9D for compress, and 0x42 0x5A 0x68 for bzip2)? Once what to do with
|
||||
libpcre is decided, do the same for libz and libbz2.
|
||||
the PCRE library is decided, do the same for libz and libbz2.
|
||||
|
||||
|
||||
===================
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Bootstrap configuration.
|
||||
|
||||
# Copyright (C) 2006-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2006-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -17,27 +17,31 @@
|
||||
|
||||
avoided_gnulib_modules='
|
||||
--avoid=lock-tests
|
||||
--avoid=mbuiter
|
||||
--avoid=mbuiterf
|
||||
--avoid=mbrlen-tests
|
||||
--avoid=mbrtowc-tests
|
||||
--avoid=update-copyright-tests
|
||||
'
|
||||
|
||||
# gnulib modules used by this package.
|
||||
gnulib_modules='
|
||||
alloca
|
||||
announce-gen
|
||||
argmatch
|
||||
assert-h
|
||||
c-ctype
|
||||
c-stack
|
||||
c-strcase
|
||||
c-strcasecmp
|
||||
c32isalnum
|
||||
c32rtomb
|
||||
closeout
|
||||
configmake
|
||||
dirname-lgpl
|
||||
dfa
|
||||
dirname-lgpl
|
||||
do-release-commit-and-tag
|
||||
error
|
||||
exclude
|
||||
fcntl-h
|
||||
fdl
|
||||
fnmatch
|
||||
fstatat
|
||||
fts
|
||||
@ -50,58 +54,58 @@ gitlog-to-changelog
|
||||
gnu-web-doc-update
|
||||
gnupload
|
||||
hash
|
||||
idx
|
||||
ignore-value
|
||||
intprops
|
||||
inttypes
|
||||
inttypes-h
|
||||
isatty
|
||||
isblank
|
||||
iswctype
|
||||
kwset
|
||||
largefile
|
||||
locale
|
||||
locale-h
|
||||
lseek
|
||||
maintainer-makefile
|
||||
malloc-gnu
|
||||
manywarnings
|
||||
mbrlen
|
||||
mbrtowc
|
||||
mbrtoc32-regular
|
||||
mbszero
|
||||
mcel-prefer
|
||||
memchr
|
||||
memchr2
|
||||
mempcpy
|
||||
minmax
|
||||
nullptr
|
||||
obstack
|
||||
openat-safer
|
||||
perl
|
||||
propername
|
||||
rawmemchr
|
||||
readme-release
|
||||
realloc-gnu
|
||||
realloc-posix
|
||||
regex
|
||||
safe-read
|
||||
same-inode
|
||||
ssize_t
|
||||
stddef
|
||||
stdlib
|
||||
stdckdint-h
|
||||
stddef-h
|
||||
stdlib-h
|
||||
stpcpy
|
||||
strerror
|
||||
string
|
||||
string-h
|
||||
strstr
|
||||
strtoull
|
||||
strtoumax
|
||||
sys_stat
|
||||
unistd
|
||||
sys_stat-h
|
||||
unistd-h
|
||||
unlocked-io
|
||||
update-copyright
|
||||
useless-if-before-free
|
||||
verify
|
||||
version-etc-fsf
|
||||
wchar
|
||||
wcrtomb
|
||||
wctob
|
||||
wctype-h
|
||||
wchar-single
|
||||
windows-stat-inodes
|
||||
xalloc
|
||||
xbinary-io
|
||||
xstrtoimax
|
||||
year2038
|
||||
'
|
||||
gnulib_name=libgreputils
|
||||
|
||||
@ -129,13 +133,16 @@ gnulib_tool_option_extras="--tests-base=gnulib-tests --with-tests --symlink\
|
||||
buildreq="\
|
||||
autoconf 2.62
|
||||
automake 1.11.1
|
||||
autopoint -
|
||||
autopoint 0.19.2
|
||||
gettext -
|
||||
git 1.4.4
|
||||
gzip -
|
||||
m4 -
|
||||
makeinfo -
|
||||
rsync -
|
||||
tar -
|
||||
texi2pdf 6.1
|
||||
wget -
|
||||
xz -
|
||||
"
|
||||
|
||||
bootstrap_post_import_hook ()
|
||||
@ -143,22 +150,27 @@ bootstrap_post_import_hook ()
|
||||
# Automake requires that ChangeLog exist.
|
||||
touch ChangeLog || return 1
|
||||
|
||||
# Copy tests/init.sh from Gnulib.
|
||||
$gnulib_tool --copy-file tests/init.sh
|
||||
|
||||
# Copy pkg-config's pkg.m4 so that our downstream users don't need to.
|
||||
local ac_dir=`aclocal --print-ac-dir`
|
||||
test -s "$ac_dir/dirlist" && ac_dir=$ac_dir:`tr '\n' : < "$ac_dir/dirlist"`
|
||||
oIFS=$IFS
|
||||
IFS=:
|
||||
local found=false
|
||||
for dir in \
|
||||
$ACLOCAL_PATH $ac_dir /usr/share/aclocal ''
|
||||
do
|
||||
IFS=$oIFS
|
||||
if test -n "$dir" && test -r "$dir/pkg.m4"; then
|
||||
cp "$dir/pkg.m4" m4/pkg.m4
|
||||
return
|
||||
found=:
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$oIFS
|
||||
die 'Cannot find pkg.m4; perhaps you need to install pkg-config'
|
||||
$found || die 'Cannot find pkg.m4; perhaps you need to install pkg-config'
|
||||
}
|
||||
|
||||
bootstrap_epilogue()
|
||||
|
||||
22
cfg.mk
22
cfg.mk
@ -1,5 +1,5 @@
|
||||
# Customize maint.mk -*- makefile -*-
|
||||
# Copyright (C) 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -30,7 +30,9 @@ url_dir_list = https://ftp.gnu.org/gnu/$(PACKAGE)
|
||||
|
||||
# Tests not to run as part of "make distcheck".
|
||||
local-checks-to-skip = \
|
||||
sc_texinfo_acronym
|
||||
sc_indent \
|
||||
sc_texinfo_acronym \
|
||||
sc_unportable_grep_q
|
||||
|
||||
# Tools used to bootstrap this package, used for "announcement".
|
||||
bootstrap-tools = autoconf,automake,gnulib
|
||||
@ -40,7 +42,14 @@ announcement_Cc_ = $(translation_project_), $(PACKAGE)-devel@gnu.org
|
||||
|
||||
# The tight_scope test gets confused about inline functions.
|
||||
# like 'to_uchar'.
|
||||
_gl_TS_unmarked_extern_functions = main usage mb_clen to_uchar dfaerror dfawarn
|
||||
_gl_TS_unmarked_extern_functions = \
|
||||
main usage mb_clen to_uchar dfaerror dfawarn imbrlen
|
||||
|
||||
# Write base64-encoded (not hex) checksums into the announcement.
|
||||
announce_gen_args = --cksum-checksums
|
||||
|
||||
# Add an exemption for sc_makefile_at_at_check.
|
||||
_makefile_at_at_check_exceptions = ' && !/MAKEINFO/'
|
||||
|
||||
# Now that we have better tests, make this the default.
|
||||
export VERBOSE = yes
|
||||
@ -65,7 +74,7 @@ export VERBOSE = yes
|
||||
# 1127556 9e
|
||||
export XZ_OPT = -6e
|
||||
|
||||
old_NEWS_hash = da867d7903ff4936e2ee6c173e20fb63
|
||||
old_NEWS_hash = 3713245f672c3a9d1b455d6cc410c9ec
|
||||
|
||||
# We prefer to spell it back-reference, as POSIX does.
|
||||
sc_prohibit_backref:
|
||||
@ -95,6 +104,7 @@ LINE_LEN_MAX = 80
|
||||
FILTER_LONG_LINES = \
|
||||
/^[^:]*\.diff:[^:]*:@@ / d; \
|
||||
\|^[^:]*TODO:| d; \
|
||||
\|^[^:]*doc/fdl.texi:| d; \
|
||||
\|^[^:]*man/help2man:| d; \
|
||||
\|^[^:]*tests/misc/sha[0-9]*sum.*\.pl[-:]| d; \
|
||||
\|^[^:]*tests/pr/|{ \|^[^:]*tests/pr/pr-tests:| !d; };
|
||||
@ -176,5 +186,7 @@ sc_timeout_prereq:
|
||||
| xargs grep -lw timeout \
|
||||
| xargs grep -FLw require_timeout_ \
|
||||
| $(GREP) . \
|
||||
&& { echo '$(ME): timeout withtout use of require_timeout_' \
|
||||
&& { echo '$(ME): timeout without use of require_timeout_' \
|
||||
1>&2; exit 1; } || :
|
||||
|
||||
codespell_ignore_words_list = clen,allo,Nd,abd,alph,debbugs,wee,UE,ois,creche
|
||||
|
||||
110
configure.ac
110
configure.ac
@ -1,7 +1,7 @@
|
||||
dnl
|
||||
dnl autoconf input file for GNU grep
|
||||
dnl
|
||||
dnl Copyright (C) 1997-2006, 2009-2021 Free Software Foundation, Inc.
|
||||
dnl Copyright (C) 1997-2006, 2009-2026 Free Software Foundation, Inc.
|
||||
dnl
|
||||
dnl This file is part of GNU grep.
|
||||
dnl
|
||||
@ -51,16 +51,15 @@ AC_PROG_INSTALL
|
||||
AC_PROG_CC
|
||||
gl_EARLY
|
||||
AC_PROG_RANLIB
|
||||
PKG_PROG_PKG_CONFIG([0.9.0])
|
||||
PKG_PROG_PKG_CONFIG([0.9.0], [PKG_CONFIG=false])
|
||||
|
||||
# grep never invokes mbrtowc or mbrlen on empty input,
|
||||
# so don't worry about this common bug,
|
||||
# as working around it would merely slow grep down.
|
||||
gl_cv_func_mbrtowc_empty_input='assume yes'
|
||||
gl_cv_func_mbrlen_empty_input='assume yes'
|
||||
|
||||
dnl Checks for typedefs, structures, and compiler characteristics.
|
||||
AC_TYPE_SIZE_T
|
||||
AC_C_CONST
|
||||
gl_INIT
|
||||
|
||||
# Ensure VLAs are not used.
|
||||
@ -68,60 +67,78 @@ gl_INIT
|
||||
AC_DEFINE([GNULIB_NO_VLA], [1], [Define to 1 to disable use of VLAs])
|
||||
|
||||
# The test suite needs to know if we have a working perl.
|
||||
# FIXME: this is suboptimal. Ideally, we would be able to call gl_PERL
|
||||
# with an ACTION-IF-NOT-FOUND argument ...
|
||||
cu_have_perl=yes
|
||||
case $PERL in *"/missing "*) cu_have_perl=no;; esac
|
||||
AM_CONDITIONAL([HAVE_PERL], [test $cu_have_perl = yes])
|
||||
AM_CONDITIONAL([HAVE_PERL], [test "$gl_cv_prog_perl" != no])
|
||||
|
||||
# gl_GCC_VERSION_IFELSE([major], [minor], [run-if-found], [run-if-not-found])
|
||||
# ------------------------------------------------
|
||||
# If $CPP is gcc-MAJOR.MINOR or newer, then run RUN-IF-FOUND.
|
||||
# Otherwise, run RUN-IF-NOT-FOUND.
|
||||
AC_DEFUN([gl_GCC_VERSION_IFELSE],
|
||||
[AC_PREPROC_IFELSE(
|
||||
[AC_LANG_PROGRAM(
|
||||
[[
|
||||
#if ($1) < __GNUC__ || (($1) == __GNUC__ && ($2) <= __GNUC_MINOR__)
|
||||
/* ok */
|
||||
#else
|
||||
# error "your version of gcc is older than $1.$2"
|
||||
#endif
|
||||
]]),
|
||||
], [$3], [$4])
|
||||
]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE([gcc-warnings],
|
||||
[AS_HELP_STRING([--enable-gcc-warnings],
|
||||
[turn on lots of GCC warnings (for developers)])],
|
||||
[AS_HELP_STRING([--enable-gcc-warnings@<:@=TYPE@:>@],
|
||||
[control generation of GCC warnings. The TYPE 'no' disables
|
||||
warnings (default for non-developer builds); 'yes' generates
|
||||
cheap warnings if available (default for developer builds);
|
||||
'expensive' in addition generates expensive-to-compute warnings
|
||||
if available.])],
|
||||
[case $enableval in
|
||||
yes|no) ;;
|
||||
no|yes|expensive) ;;
|
||||
*) AC_MSG_ERROR([bad value $enableval for gcc-warnings option]) ;;
|
||||
esac
|
||||
gl_gcc_warnings=$enableval],
|
||||
[gl_gcc_warnings=no
|
||||
if test "$GCC" = yes && test -d "$srcdir"/.git; then
|
||||
AC_COMPILE_IFELSE(
|
||||
[AC_LANG_PROGRAM([[
|
||||
#if ! (6 < __GNUC__ + (2 <= __GNUC_MINOR__))
|
||||
#error "--enable-gcc-warnings defaults to 'no' on older GCC"
|
||||
#endif
|
||||
]])],
|
||||
[gl_gcc_warnings=yes])
|
||||
fi]
|
||||
[
|
||||
# GCC provides fine-grained control over diagnostics which
|
||||
# is used in gnulib for example to suppress warnings from
|
||||
# certain sections of code. So if this is available and
|
||||
# we're running from a git repo, then auto enable the warnings.
|
||||
gl_gcc_warnings=no
|
||||
gl_GCC_VERSION_IFELSE([4], [6],
|
||||
[test -d "$srcdir"/.git \
|
||||
&& ! test -f "$srcdir"/.tarball-version \
|
||||
&& gl_gcc_warnings=yes])]
|
||||
)
|
||||
|
||||
if test "$gl_gcc_warnings" = yes; then
|
||||
if test $gl_gcc_warnings != no; then
|
||||
gl_WARN_ADD([-Werror], [WERROR_CFLAGS])
|
||||
AC_SUBST([WERROR_CFLAGS])
|
||||
|
||||
nw=
|
||||
ew=
|
||||
AS_IF([test $gl_gcc_warnings != expensive],
|
||||
[# -fanalyzer and related options slow GCC considerably.
|
||||
ew="$ew -fanalyzer -Wno-analyzer-double-free -Wno-analyzer-malloc-leak"
|
||||
ew="$ew -Wno-analyzer-null-dereference -Wno-analyzer-use-after-free"])
|
||||
|
||||
nw=$ew
|
||||
# This, $nw, is the list of warnings we disable.
|
||||
nw="$nw -Wdeclaration-after-statement" # too useful to forbid
|
||||
nw="$nw -Waggregate-return" # anachronistic
|
||||
nw="$nw -Wlong-long" # C90 is anachronistic (lib/gethrxtime.h)
|
||||
nw="$nw -Wc++-compat" # We don't care about C++ compilers
|
||||
nw="$nw -Wundef" # Warns on '#if GNULIB_FOO' etc in gnulib
|
||||
nw="$nw -Wvla" # suppress a warning in regexec.h
|
||||
nw="$nw -Winline" # suppress warnings from streq.h's streq5
|
||||
nw="$nw -Wsystem-headers" # Don't let system headers trigger warnings
|
||||
nw="$nw -Wpadded" # Our structs are not padded
|
||||
nw="$nw -Wstack-protector" # generates false alarms for useful code
|
||||
nw="$nw -Wswitch-default" # Too many warnings for now
|
||||
nw="$nw -Wunsafe-loop-optimizations" # OK to suppress unsafe optimizations
|
||||
nw="$nw -Winline" # streq.h's streq4, streq6 and strcaseeq6
|
||||
nw="$nw -Wstrict-overflow" # regexec.c
|
||||
|
||||
gl_MANYWARN_ALL_GCC([ws])
|
||||
gl_MANYWARN_COMPLEMENT([ws], [$ws], [$nw])
|
||||
for w in $ws; do
|
||||
gl_WARN_ADD([$w])
|
||||
done
|
||||
gl_WARN_ADD([-Wtrailing-whitespace]) # This project's coding style
|
||||
gl_WARN_ADD([-Wno-missing-field-initializers]) # We need this one
|
||||
gl_WARN_ADD([-Wno-sign-compare]) # Too many warnings for now
|
||||
gl_WARN_ADD([-Wno-unused-parameter]) # Too many warnings for now
|
||||
gl_WARN_ADD([-Wno-cast-function-type]) # sig-handler.h's sa_handler_t cast
|
||||
gl_WARN_ADD([-Wno-deprecated-declarations]) # clang complains about sprintf
|
||||
|
||||
# In spite of excluding -Wlogical-op above, it is enabled, as of
|
||||
# gcc 4.5.0 20090517, and it provokes warnings in cat.c, dd.c, truncate.c
|
||||
@ -149,14 +166,19 @@ if test "$gl_gcc_warnings" = yes; then
|
||||
AC_SUBST([GNULIB_WARN_CFLAGS])
|
||||
|
||||
# For gnulib-tests, the set is slightly smaller still.
|
||||
nw=
|
||||
nw="$nw -Wstrict-prototypes"
|
||||
# It's not worth being this picky about test programs.
|
||||
nw="$nw -Wsuggest-attribute=const"
|
||||
nw="$nw -Wsuggest-attribute=pure"
|
||||
nw="$nw -Wsuggest-attribute=format"
|
||||
nw=
|
||||
nw="$nw -Wformat-truncation=2" # False alarm in strerror_r.c
|
||||
nw="$nw -Wmissing-declarations"
|
||||
nw="$nw -Wmissing-prototypes"
|
||||
nw="$nw -Wmissing-variable-declarations"
|
||||
nw="$nw -Wnull-dereference"
|
||||
nw="$nw -Wold-style-definition"
|
||||
nw="$nw -Wstrict-prototypes"
|
||||
nw="$nw -Wsuggest-attribute=cold"
|
||||
nw="$nw -Wsuggest-attribute=const"
|
||||
nw="$nw -Wsuggest-attribute=format"
|
||||
nw="$nw -Wsuggest-attribute=pure"
|
||||
|
||||
# Disable to avoid warnings in e.g., test-intprops.c and test-limits-h.c
|
||||
# due to overlong expansions like this:
|
||||
@ -176,13 +198,7 @@ AC_DEFINE([ARGMATCH_DIE], [usage (EXIT_FAILURE)],
|
||||
AC_DEFINE([ARGMATCH_DIE_DECL], [void usage (int _e)],
|
||||
[Define to the declaration of the xargmatch failure function.])
|
||||
|
||||
dnl Checks for header files.
|
||||
AC_HEADER_DIRENT
|
||||
|
||||
dnl Checks for functions.
|
||||
AC_FUNC_CLOSEDIR_VOID
|
||||
|
||||
AC_CHECK_FUNCS_ONCE([isascii setlocale])
|
||||
AC_CHECK_FUNCS_ONCE([setlocale])
|
||||
|
||||
dnl I18N feature
|
||||
AM_GNU_GETTEXT_VERSION([0.18.2])
|
||||
@ -195,6 +211,8 @@ dnl then the installer should configure --with-included-regex.
|
||||
AM_CONDITIONAL([USE_INCLUDED_REGEX], [test "$ac_use_included_regex" = yes])
|
||||
if test "$ac_use_included_regex" = no; then
|
||||
AC_MSG_WARN([Included lib/regex.c not used])
|
||||
else
|
||||
AC_DEFINE([USE_INCLUDED_REGEX], 1, [building with included regex code])
|
||||
fi
|
||||
|
||||
gl_FUNC_PCRE
|
||||
|
||||
3
doc/.gitignore
vendored
3
doc/.gitignore
vendored
@ -1,6 +1,3 @@
|
||||
/egrep.1
|
||||
/fdl.texi
|
||||
/fgrep.1
|
||||
/gendocs_template
|
||||
/gendocs_template_min
|
||||
/grep.info*
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Process this file with automake to create Makefile.in
|
||||
# Makefile.am for grep/doc.
|
||||
#
|
||||
# Copyright 2008-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2008-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -16,23 +16,20 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# The customization variable CHECK_NORMAL_MENU_STRUCTURE is necessary with
|
||||
# makeinfo versions ≥ 6.8.
|
||||
MAKEINFO = @MAKEINFO@ -c CHECK_NORMAL_MENU_STRUCTURE=1
|
||||
|
||||
info_TEXINFOS = grep.texi
|
||||
grep_TEXINFOS = fdl.texi
|
||||
|
||||
man_MANS = grep.1 fgrep.1 egrep.1
|
||||
man_MANS = grep.1
|
||||
|
||||
EXTRA_DIST = grep.in.1
|
||||
CLEANFILES = grep.1 egrep.1 fgrep.1
|
||||
CLEANFILES = grep.1
|
||||
|
||||
grep.1: grep.in.1
|
||||
$(AM_V_GEN)rm -f $@-t $@
|
||||
$(AM_V_at)sed 's/@''VERSION@/$(VERSION)/' $(srcdir)/grep.in.1 > $@-t
|
||||
$(AM_V_at)chmod a=r $@-t
|
||||
$(AM_V_at)mv -f $@-t $@
|
||||
|
||||
egrep.1 fgrep.1: Makefile.am
|
||||
$(AM_V_GEN)rm -f $@-t $@
|
||||
$(AM_V_at)inst=`echo grep | sed '$(transform)'`.1 \
|
||||
&& echo ".so man1/$$inst" > $@-t
|
||||
$(AM_V_at)chmod a=r $@-t
|
||||
$(AM_V_at)mv -f $@-t $@
|
||||
|
||||
506
doc/fdl.texi
Normal file
506
doc/fdl.texi
Normal file
@ -0,0 +1,506 @@
|
||||
@c The GNU Free Documentation License.
|
||||
@center Version 1.3, 3 November 2008
|
||||
|
||||
@c This file is intended to be included within another document,
|
||||
@c hence no sectioning command or @node.
|
||||
|
||||
@display
|
||||
Copyright @copyright{} 2000--2002, 2007--2008, 2023--2026 Free Software
|
||||
Foundation, Inc.
|
||||
@uref{https://fsf.org/}
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
@end display
|
||||
|
||||
@enumerate 0
|
||||
@item
|
||||
PREAMBLE
|
||||
|
||||
The purpose of this License is to make a manual, textbook, or other
|
||||
functional and useful document @dfn{free} in the sense of freedom: to
|
||||
assure everyone the effective freedom to copy and redistribute it,
|
||||
with or without modifying it, either commercially or noncommercially.
|
||||
Secondarily, this License preserves for the author and publisher a way
|
||||
to get credit for their work, while not being considered responsible
|
||||
for modifications made by others.
|
||||
|
||||
This License is a kind of ``copyleft'', which means that derivative
|
||||
works of the document must themselves be free in the same sense. It
|
||||
complements the GNU General Public License, which is a copyleft
|
||||
license designed for free software.
|
||||
|
||||
We have designed this License in order to use it for manuals for free
|
||||
software, because free software needs free documentation: a free
|
||||
program should come with manuals providing the same freedoms that the
|
||||
software does. But this License is not limited to software manuals;
|
||||
it can be used for any textual work, regardless of subject matter or
|
||||
whether it is published as a printed book. We recommend this License
|
||||
principally for works whose purpose is instruction or reference.
|
||||
|
||||
@item
|
||||
APPLICABILITY AND DEFINITIONS
|
||||
|
||||
This License applies to any manual or other work, in any medium, that
|
||||
contains a notice placed by the copyright holder saying it can be
|
||||
distributed under the terms of this License. Such a notice grants a
|
||||
world-wide, royalty-free license, unlimited in duration, to use that
|
||||
work under the conditions stated herein. The ``Document'', below,
|
||||
refers to any such manual or work. Any member of the public is a
|
||||
licensee, and is addressed as ``you''. You accept the license if you
|
||||
copy, modify or distribute the work in a way requiring permission
|
||||
under copyright law.
|
||||
|
||||
A ``Modified Version'' of the Document means any work containing the
|
||||
Document or a portion of it, either copied verbatim, or with
|
||||
modifications and/or translated into another language.
|
||||
|
||||
A ``Secondary Section'' is a named appendix or a front-matter section
|
||||
of the Document that deals exclusively with the relationship of the
|
||||
publishers or authors of the Document to the Document's overall
|
||||
subject (or to related matters) and contains nothing that could fall
|
||||
directly within that overall subject. (Thus, if the Document is in
|
||||
part a textbook of mathematics, a Secondary Section may not explain
|
||||
any mathematics.) The relationship could be a matter of historical
|
||||
connection with the subject or with related matters, or of legal,
|
||||
commercial, philosophical, ethical or political position regarding
|
||||
them.
|
||||
|
||||
The ``Invariant Sections'' are certain Secondary Sections whose titles
|
||||
are designated, as being those of Invariant Sections, in the notice
|
||||
that says that the Document is released under this License. If a
|
||||
section does not fit the above definition of Secondary then it is not
|
||||
allowed to be designated as Invariant. The Document may contain zero
|
||||
Invariant Sections. If the Document does not identify any Invariant
|
||||
Sections then there are none.
|
||||
|
||||
The ``Cover Texts'' are certain short passages of text that are listed,
|
||||
as Front-Cover Texts or Back-Cover Texts, in the notice that says that
|
||||
the Document is released under this License. A Front-Cover Text may
|
||||
be at most 5 words, and a Back-Cover Text may be at most 25 words.
|
||||
|
||||
A ``Transparent'' copy of the Document means a machine-readable copy,
|
||||
represented in a format whose specification is available to the
|
||||
general public, that is suitable for revising the document
|
||||
straightforwardly with generic text editors or (for images composed of
|
||||
pixels) generic paint programs or (for drawings) some widely available
|
||||
drawing editor, and that is suitable for input to text formatters or
|
||||
for automatic translation to a variety of formats suitable for input
|
||||
to text formatters. A copy made in an otherwise Transparent file
|
||||
format whose markup, or absence of markup, has been arranged to thwart
|
||||
or discourage subsequent modification by readers is not Transparent.
|
||||
An image format is not Transparent if used for any substantial amount
|
||||
of text. A copy that is not ``Transparent'' is called ``Opaque''.
|
||||
|
||||
Examples of suitable formats for Transparent copies include plain
|
||||
ASCII without markup, Texinfo input format, La@TeX{} input
|
||||
format, SGML or XML using a publicly available
|
||||
DTD, and standard-conforming simple HTML,
|
||||
PostScript or PDF designed for human modification. Examples
|
||||
of transparent image formats include PNG, XCF and
|
||||
JPG@. Opaque formats include proprietary formats that can be
|
||||
read and edited only by proprietary word processors, SGML or
|
||||
XML for which the DTD and/or processing tools are
|
||||
not generally available, and the machine-generated HTML,
|
||||
PostScript or PDF produced by some word processors for
|
||||
output purposes only.
|
||||
|
||||
The ``Title Page'' means, for a printed book, the title page itself,
|
||||
plus such following pages as are needed to hold, legibly, the material
|
||||
this License requires to appear in the title page. For works in
|
||||
formats which do not have any title page as such, ``Title Page'' means
|
||||
the text near the most prominent appearance of the work's title,
|
||||
preceding the beginning of the body of the text.
|
||||
|
||||
The ``publisher'' means any person or entity that distributes copies
|
||||
of the Document to the public.
|
||||
|
||||
A section ``Entitled XYZ'' means a named subunit of the Document whose
|
||||
title either is precisely XYZ or contains XYZ in parentheses following
|
||||
text that translates XYZ in another language. (Here XYZ stands for a
|
||||
specific section name mentioned below, such as ``Acknowledgements'',
|
||||
``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title''
|
||||
of such a section when you modify the Document means that it remains a
|
||||
section ``Entitled XYZ'' according to this definition.
|
||||
|
||||
The Document may include Warranty Disclaimers next to the notice which
|
||||
states that this License applies to the Document. These Warranty
|
||||
Disclaimers are considered to be included by reference in this
|
||||
License, but only as regards disclaiming warranties: any other
|
||||
implication that these Warranty Disclaimers may have is void and has
|
||||
no effect on the meaning of this License.
|
||||
|
||||
@item
|
||||
VERBATIM COPYING
|
||||
|
||||
You may copy and distribute the Document in any medium, either
|
||||
commercially or noncommercially, provided that this License, the
|
||||
copyright notices, and the license notice saying this License applies
|
||||
to the Document are reproduced in all copies, and that you add no other
|
||||
conditions whatsoever to those of this License. You may not use
|
||||
technical measures to obstruct or control the reading or further
|
||||
copying of the copies you make or distribute. However, you may accept
|
||||
compensation in exchange for copies. If you distribute a large enough
|
||||
number of copies you must also follow the conditions in section 3.
|
||||
|
||||
You may also lend copies, under the same conditions stated above, and
|
||||
you may publicly display copies.
|
||||
|
||||
@item
|
||||
COPYING IN QUANTITY
|
||||
|
||||
If you publish printed copies (or copies in media that commonly have
|
||||
printed covers) of the Document, numbering more than 100, and the
|
||||
Document's license notice requires Cover Texts, you must enclose the
|
||||
copies in covers that carry, clearly and legibly, all these Cover
|
||||
Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
|
||||
the back cover. Both covers must also clearly and legibly identify
|
||||
you as the publisher of these copies. The front cover must present
|
||||
the full title with all words of the title equally prominent and
|
||||
visible. You may add other material on the covers in addition.
|
||||
Copying with changes limited to the covers, as long as they preserve
|
||||
the title of the Document and satisfy these conditions, can be treated
|
||||
as verbatim copying in other respects.
|
||||
|
||||
If the required texts for either cover are too voluminous to fit
|
||||
legibly, you should put the first ones listed (as many as fit
|
||||
reasonably) on the actual cover, and continue the rest onto adjacent
|
||||
pages.
|
||||
|
||||
If you publish or distribute Opaque copies of the Document numbering
|
||||
more than 100, you must either include a machine-readable Transparent
|
||||
copy along with each Opaque copy, or state in or with each Opaque copy
|
||||
a computer-network location from which the general network-using
|
||||
public has access to download using public-standard network protocols
|
||||
a complete Transparent copy of the Document, free of added material.
|
||||
If you use the latter option, you must take reasonably prudent steps,
|
||||
when you begin distribution of Opaque copies in quantity, to ensure
|
||||
that this Transparent copy will remain thus accessible at the stated
|
||||
location until at least one year after the last time you distribute an
|
||||
Opaque copy (directly or through your agents or retailers) of that
|
||||
edition to the public.
|
||||
|
||||
It is requested, but not required, that you contact the authors of the
|
||||
Document well before redistributing any large number of copies, to give
|
||||
them a chance to provide you with an updated version of the Document.
|
||||
|
||||
@item
|
||||
MODIFICATIONS
|
||||
|
||||
You may copy and distribute a Modified Version of the Document under
|
||||
the conditions of sections 2 and 3 above, provided that you release
|
||||
the Modified Version under precisely this License, with the Modified
|
||||
Version filling the role of the Document, thus licensing distribution
|
||||
and modification of the Modified Version to whoever possesses a copy
|
||||
of it. In addition, you must do these things in the Modified Version:
|
||||
|
||||
@enumerate A
|
||||
@item
|
||||
Use in the Title Page (and on the covers, if any) a title distinct
|
||||
from that of the Document, and from those of previous versions
|
||||
(which should, if there were any, be listed in the History section
|
||||
of the Document). You may use the same title as a previous version
|
||||
if the original publisher of that version gives permission.
|
||||
|
||||
@item
|
||||
List on the Title Page, as authors, one or more persons or entities
|
||||
responsible for authorship of the modifications in the Modified
|
||||
Version, together with at least five of the principal authors of the
|
||||
Document (all of its principal authors, if it has fewer than five),
|
||||
unless they release you from this requirement.
|
||||
|
||||
@item
|
||||
State on the Title page the name of the publisher of the
|
||||
Modified Version, as the publisher.
|
||||
|
||||
@item
|
||||
Preserve all the copyright notices of the Document.
|
||||
|
||||
@item
|
||||
Add an appropriate copyright notice for your modifications
|
||||
adjacent to the other copyright notices.
|
||||
|
||||
@item
|
||||
Include, immediately after the copyright notices, a license notice
|
||||
giving the public permission to use the Modified Version under the
|
||||
terms of this License, in the form shown in the Addendum below.
|
||||
|
||||
@item
|
||||
Preserve in that license notice the full lists of Invariant Sections
|
||||
and required Cover Texts given in the Document's license notice.
|
||||
|
||||
@item
|
||||
Include an unaltered copy of this License.
|
||||
|
||||
@item
|
||||
Preserve the section Entitled ``History'', Preserve its Title, and add
|
||||
to it an item stating at least the title, year, new authors, and
|
||||
publisher of the Modified Version as given on the Title Page. If
|
||||
there is no section Entitled ``History'' in the Document, create one
|
||||
stating the title, year, authors, and publisher of the Document as
|
||||
given on its Title Page, then add an item describing the Modified
|
||||
Version as stated in the previous sentence.
|
||||
|
||||
@item
|
||||
Preserve the network location, if any, given in the Document for
|
||||
public access to a Transparent copy of the Document, and likewise
|
||||
the network locations given in the Document for previous versions
|
||||
it was based on. These may be placed in the ``History'' section.
|
||||
You may omit a network location for a work that was published at
|
||||
least four years before the Document itself, or if the original
|
||||
publisher of the version it refers to gives permission.
|
||||
|
||||
@item
|
||||
For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve
|
||||
the Title of the section, and preserve in the section all the
|
||||
substance and tone of each of the contributor acknowledgements and/or
|
||||
dedications given therein.
|
||||
|
||||
@item
|
||||
Preserve all the Invariant Sections of the Document,
|
||||
unaltered in their text and in their titles. Section numbers
|
||||
or the equivalent are not considered part of the section titles.
|
||||
|
||||
@item
|
||||
Delete any section Entitled ``Endorsements''. Such a section
|
||||
may not be included in the Modified Version.
|
||||
|
||||
@item
|
||||
Do not retitle any existing section to be Entitled ``Endorsements'' or
|
||||
to conflict in title with any Invariant Section.
|
||||
|
||||
@item
|
||||
Preserve any Warranty Disclaimers.
|
||||
@end enumerate
|
||||
|
||||
If the Modified Version includes new front-matter sections or
|
||||
appendices that qualify as Secondary Sections and contain no material
|
||||
copied from the Document, you may at your option designate some or all
|
||||
of these sections as invariant. To do this, add their titles to the
|
||||
list of Invariant Sections in the Modified Version's license notice.
|
||||
These titles must be distinct from any other section titles.
|
||||
|
||||
You may add a section Entitled ``Endorsements'', provided it contains
|
||||
nothing but endorsements of your Modified Version by various
|
||||
parties---for example, statements of peer review or that the text has
|
||||
been approved by an organization as the authoritative definition of a
|
||||
standard.
|
||||
|
||||
You may add a passage of up to five words as a Front-Cover Text, and a
|
||||
passage of up to 25 words as a Back-Cover Text, to the end of the list
|
||||
of Cover Texts in the Modified Version. Only one passage of
|
||||
Front-Cover Text and one of Back-Cover Text may be added by (or
|
||||
through arrangements made by) any one entity. If the Document already
|
||||
includes a cover text for the same cover, previously added by you or
|
||||
by arrangement made by the same entity you are acting on behalf of,
|
||||
you may not add another; but you may replace the old one, on explicit
|
||||
permission from the previous publisher that added the old one.
|
||||
|
||||
The author(s) and publisher(s) of the Document do not by this License
|
||||
give permission to use their names for publicity for or to assert or
|
||||
imply endorsement of any Modified Version.
|
||||
|
||||
@item
|
||||
COMBINING DOCUMENTS
|
||||
|
||||
You may combine the Document with other documents released under this
|
||||
License, under the terms defined in section 4 above for modified
|
||||
versions, provided that you include in the combination all of the
|
||||
Invariant Sections of all of the original documents, unmodified, and
|
||||
list them all as Invariant Sections of your combined work in its
|
||||
license notice, and that you preserve all their Warranty Disclaimers.
|
||||
|
||||
The combined work need only contain one copy of this License, and
|
||||
multiple identical Invariant Sections may be replaced with a single
|
||||
copy. If there are multiple Invariant Sections with the same name but
|
||||
different contents, make the title of each such section unique by
|
||||
adding at the end of it, in parentheses, the name of the original
|
||||
author or publisher of that section if known, or else a unique number.
|
||||
Make the same adjustment to the section titles in the list of
|
||||
Invariant Sections in the license notice of the combined work.
|
||||
|
||||
In the combination, you must combine any sections Entitled ``History''
|
||||
in the various original documents, forming one section Entitled
|
||||
``History''; likewise combine any sections Entitled ``Acknowledgements'',
|
||||
and any sections Entitled ``Dedications''. You must delete all
|
||||
sections Entitled ``Endorsements.''
|
||||
|
||||
@item
|
||||
COLLECTIONS OF DOCUMENTS
|
||||
|
||||
You may make a collection consisting of the Document and other documents
|
||||
released under this License, and replace the individual copies of this
|
||||
License in the various documents with a single copy that is included in
|
||||
the collection, provided that you follow the rules of this License for
|
||||
verbatim copying of each of the documents in all other respects.
|
||||
|
||||
You may extract a single document from such a collection, and distribute
|
||||
it individually under this License, provided you insert a copy of this
|
||||
License into the extracted document, and follow this License in all
|
||||
other respects regarding verbatim copying of that document.
|
||||
|
||||
@item
|
||||
AGGREGATION WITH INDEPENDENT WORKS
|
||||
|
||||
A compilation of the Document or its derivatives with other separate
|
||||
and independent documents or works, in or on a volume of a storage or
|
||||
distribution medium, is called an ``aggregate'' if the copyright
|
||||
resulting from the compilation is not used to limit the legal rights
|
||||
of the compilation's users beyond what the individual works permit.
|
||||
When the Document is included in an aggregate, this License does not
|
||||
apply to the other works in the aggregate which are not themselves
|
||||
derivative works of the Document.
|
||||
|
||||
If the Cover Text requirement of section 3 is applicable to these
|
||||
copies of the Document, then if the Document is less than one half of
|
||||
the entire aggregate, the Document's Cover Texts may be placed on
|
||||
covers that bracket the Document within the aggregate, or the
|
||||
electronic equivalent of covers if the Document is in electronic form.
|
||||
Otherwise they must appear on printed covers that bracket the whole
|
||||
aggregate.
|
||||
|
||||
@item
|
||||
TRANSLATION
|
||||
|
||||
Translation is considered a kind of modification, so you may
|
||||
distribute translations of the Document under the terms of section 4.
|
||||
Replacing Invariant Sections with translations requires special
|
||||
permission from their copyright holders, but you may include
|
||||
translations of some or all Invariant Sections in addition to the
|
||||
original versions of these Invariant Sections. You may include a
|
||||
translation of this License, and all the license notices in the
|
||||
Document, and any Warranty Disclaimers, provided that you also include
|
||||
the original English version of this License and the original versions
|
||||
of those notices and disclaimers. In case of a disagreement between
|
||||
the translation and the original version of this License or a notice
|
||||
or disclaimer, the original version will prevail.
|
||||
|
||||
If a section in the Document is Entitled ``Acknowledgements'',
|
||||
``Dedications'', or ``History'', the requirement (section 4) to Preserve
|
||||
its Title (section 1) will typically require changing the actual
|
||||
title.
|
||||
|
||||
@item
|
||||
TERMINATION
|
||||
|
||||
You may not copy, modify, sublicense, or distribute the Document
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense, or distribute it is void, and
|
||||
will automatically terminate your rights under this License.
|
||||
|
||||
However, if you cease all violation of this License, then your license
|
||||
from a particular copyright holder is reinstated (a) provisionally,
|
||||
unless and until the copyright holder explicitly and finally
|
||||
terminates your license, and (b) permanently, if the copyright holder
|
||||
fails to notify you of the violation by some reasonable means prior to
|
||||
60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, receipt of a copy of some or all of the same material does
|
||||
not give you any rights to use it.
|
||||
|
||||
@item
|
||||
FUTURE REVISIONS OF THIS LICENSE
|
||||
|
||||
The Free Software Foundation may publish new, revised versions
|
||||
of the GNU Free Documentation License from time to time. Such new
|
||||
versions will be similar in spirit to the present version, but may
|
||||
differ in detail to address new problems or concerns. See
|
||||
@uref{https://www.gnu.org/licenses/}.
|
||||
|
||||
Each version of the License is given a distinguishing version number.
|
||||
If the Document specifies that a particular numbered version of this
|
||||
License ``or any later version'' applies to it, you have the option of
|
||||
following the terms and conditions either of that specified version or
|
||||
of any later version that has been published (not as a draft) by the
|
||||
Free Software Foundation. If the Document does not specify a version
|
||||
number of this License, you may choose any version ever published (not
|
||||
as a draft) by the Free Software Foundation. If the Document
|
||||
specifies that a proxy can decide which future versions of this
|
||||
License can be used, that proxy's public statement of acceptance of a
|
||||
version permanently authorizes you to choose that version for the
|
||||
Document.
|
||||
|
||||
@item
|
||||
RELICENSING
|
||||
|
||||
``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any
|
||||
World Wide Web server that publishes copyrightable works and also
|
||||
provides prominent facilities for anybody to edit those works. A
|
||||
public wiki that anybody can edit is an example of such a server. A
|
||||
``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the
|
||||
site means any set of copyrightable works thus published on the MMC
|
||||
site.
|
||||
|
||||
``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0
|
||||
license published by Creative Commons Corporation, a not-for-profit
|
||||
corporation with a principal place of business in San Francisco,
|
||||
California, as well as future copyleft versions of that license
|
||||
published by that same organization.
|
||||
|
||||
``Incorporate'' means to publish or republish a Document, in whole or
|
||||
in part, as part of another Document.
|
||||
|
||||
An MMC is ``eligible for relicensing'' if it is licensed under this
|
||||
License, and if all works that were first published under this License
|
||||
somewhere other than this MMC, and subsequently incorporated in whole
|
||||
or in part into the MMC, (1) had no cover texts or invariant sections,
|
||||
and (2) were thus incorporated prior to November 1, 2008.
|
||||
|
||||
The operator of an MMC Site may republish an MMC contained in the site
|
||||
under CC-BY-SA on the same site at any time before August 1, 2009,
|
||||
provided the MMC is eligible for relicensing.
|
||||
|
||||
@end enumerate
|
||||
|
||||
@page
|
||||
@heading ADDENDUM: How to use this License for your documents
|
||||
|
||||
To use this License in a document you have written, include a copy of
|
||||
the License in the document and put the following copyright and
|
||||
license notices just after the title page:
|
||||
|
||||
@smallexample
|
||||
@group
|
||||
Copyright (C) @var{year} @var{your name}.
|
||||
Permission is granted to copy, distribute and/or modify this document
|
||||
under the terms of the GNU Free Documentation License, Version 1.3
|
||||
or any later version published by the Free Software Foundation;
|
||||
with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
|
||||
Texts. A copy of the license is included in the section entitled ``GNU
|
||||
Free Documentation License''.
|
||||
@end group
|
||||
@end smallexample
|
||||
|
||||
If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
|
||||
replace the ``with@dots{}Texts.''@: line with this:
|
||||
|
||||
@smallexample
|
||||
@group
|
||||
with the Invariant Sections being @var{list their titles}, with
|
||||
the Front-Cover Texts being @var{list}, and with the Back-Cover Texts
|
||||
being @var{list}.
|
||||
@end group
|
||||
@end smallexample
|
||||
|
||||
If you have Invariant Sections without Cover Texts, or some other
|
||||
combination of the three, merge those two alternatives to suit the
|
||||
situation.
|
||||
|
||||
If your document contains nontrivial examples of program code, we
|
||||
recommend releasing these examples in parallel under your choice of
|
||||
free software license, such as the GNU General Public License,
|
||||
to permit their use in free software.
|
||||
|
||||
@c Local Variables:
|
||||
@c ispell-local-pdict: "ispell-dict"
|
||||
@c End:
|
||||
359
doc/grep.in.1
359
doc/grep.in.1
@ -2,7 +2,7 @@
|
||||
.de dT
|
||||
.ds Dt \\$2
|
||||
..
|
||||
.dT Time-stamp: "2019-12-29"
|
||||
.dT Time-stamp: "2025-03-21"
|
||||
.\" Update the above date whenever a change to either this file or
|
||||
.\" grep.c's 'usage' function results in a nontrivial change to the man page.
|
||||
.\" In Emacs, you can update the date by running 'M-x time-stamp'
|
||||
@ -11,8 +11,10 @@
|
||||
.
|
||||
.TH GREP 1 \*(Dt "GNU grep @VERSION@" "User Commands"
|
||||
.
|
||||
.if !\w|\*(lq| \{\
|
||||
.\" groff an-old.tmac does not seem to be in use, so define lq and rq.
|
||||
.ie \n(.g .ds ' \(aq
|
||||
.el .ds ' '
|
||||
.if !\w@\*(lq@ \{\
|
||||
.\" The implementation lacks \*(lq and presumably \*(rq.
|
||||
. ie \n(.g \{\
|
||||
. ds lq \(lq\"
|
||||
. ds rq \(rq\"
|
||||
@ -23,148 +25,147 @@
|
||||
. \}
|
||||
.\}
|
||||
.
|
||||
.if !\w|\*(la| \{\
|
||||
.as mC
|
||||
.if !\w@\*(mC@ \{\
|
||||
.\" groff an-ext.tmac does not seem to be in use, so define the parts of
|
||||
.\" it that are used below. For a copy of groff an-ext.tmac, please see:
|
||||
.\" https://git.savannah.gnu.org/cgit/groff.git/plain/tmac/an-ext.tmac
|
||||
.\" --- Start of lines taken from groff an-ext.tmac
|
||||
.\" it that are used below, taken from groff 1.23.0. For a copy, please see:
|
||||
.\" https://git.savannah.gnu.org/cgit/groff.git/plain/tmac/an-ext.tmac?id=1.23.0
|
||||
.nr mG \n(.g-1
|
||||
.\" --- Start of lines taken from groff an-ext.tmac,
|
||||
.\" except with "nr mH 14" replaced by "nr mH 0"
|
||||
.\" and with mS, SY, YS definitions omitted.
|
||||
.
|
||||
.\" Check whether we are using grohtml.
|
||||
.nr mH 0
|
||||
.if \n(.g \
|
||||
. if '\*(.T'html' \
|
||||
. nr mH 1
|
||||
.\" Define this to your implementation's constant-width typeface.
|
||||
.ds mC CW
|
||||
.if n .ds mC R
|
||||
.
|
||||
.\" Save the automatic hyphenation mode.
|
||||
.\"
|
||||
.\" In AT&T troff, there was no register exposing the hyphenation mode,
|
||||
.\" and no way to save and restore it. Set `mH` to a reasonable value
|
||||
.\" for your implementation and preference.
|
||||
.de mY
|
||||
. ie !\\n(.g \
|
||||
. nr mH 0
|
||||
. el \
|
||||
. do nr mH \\n[.hy] \" groff extension register
|
||||
..
|
||||
.
|
||||
.nr mE 0 \" in an example (EX/EE)?
|
||||
.
|
||||
.\" Prepare link text for mail/web hyperlinks. `MT` and `UR` call this.
|
||||
.de mV
|
||||
. ds m1 \\$1\"
|
||||
..
|
||||
.
|
||||
.
|
||||
.\" Map mono-width fonts to standard fonts for groff's TTY device.
|
||||
.if n \{\
|
||||
. do ftr CR R
|
||||
. do ftr CI I
|
||||
. do ftr CB B
|
||||
.\}
|
||||
.\" Emit hyperlink. The optional argument supplies trailing punctuation
|
||||
.\" after link text. `ME` and `UE` call this.
|
||||
.de mQ
|
||||
. mY
|
||||
. nh
|
||||
<\\*(m1>\\$1
|
||||
. hy \\n(mH
|
||||
..
|
||||
.
|
||||
.\" groff has glyph entities for angle brackets.
|
||||
.ie \n(.g \{\
|
||||
. ds la \(la\"
|
||||
. ds ra \(ra\"
|
||||
.\}
|
||||
.el \{\
|
||||
. ds la <\"
|
||||
. ds ra >\"
|
||||
. \" groff's man macros control hyphenation with this register.
|
||||
. nr HY 1
|
||||
.\}
|
||||
.
|
||||
.\" Start URL.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de UR
|
||||
. ds m1 \\$1\"
|
||||
. nh
|
||||
. if \\n(mH \{\
|
||||
. \" Start diversion in a new environment.
|
||||
. do ev URL-div
|
||||
. do di URL-div
|
||||
. \}
|
||||
. mV \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" End URL.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de UE
|
||||
. ie \\n(mH \{\
|
||||
. br
|
||||
. di
|
||||
. ev
|
||||
.
|
||||
. \" Has there been one or more input lines for the link text?
|
||||
. ie \\n(dn \{\
|
||||
. do HTML-NS "<a href=""\\*(m1"">"
|
||||
. \" Yes, strip off final newline of diversion and emit it.
|
||||
. do chop URL-div
|
||||
. do URL-div
|
||||
\c
|
||||
. do HTML-NS </a>
|
||||
. \}
|
||||
. el \
|
||||
. do HTML-NS "<a href=""\\*(m1"">\\*(m1</a>"
|
||||
\&\\$*\"
|
||||
. \}
|
||||
. el \
|
||||
\\*(la\\*(m1\\*(ra\\$*\"
|
||||
.
|
||||
. hy \\n(HY
|
||||
. mQ \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" Start email address.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de MT
|
||||
. ds m1 \\$1\"
|
||||
. nh
|
||||
. if \\n(mH \{\
|
||||
. \" Start diversion in a new environment.
|
||||
. do ev URL-div
|
||||
. do di URL-div
|
||||
. \}
|
||||
. mV \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" End email address.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de ME
|
||||
. ie \\n(mH \{\
|
||||
. br
|
||||
. di
|
||||
. ev
|
||||
.
|
||||
. \" Has there been one or more input lines for the link text?
|
||||
. ie \\n(dn \{\
|
||||
. do HTML-NS "<a href=""mailto:\\*(m1"">"
|
||||
. \" Yes, strip off final newline of diversion and emit it.
|
||||
. do chop URL-div
|
||||
. do URL-div
|
||||
\c
|
||||
. do HTML-NS </a>
|
||||
. \}
|
||||
. el \
|
||||
. do HTML-NS "<a href=""mailto:\\*(m1"">\\*(m1</a>"
|
||||
\&\\$*\"
|
||||
. \}
|
||||
. el \
|
||||
\\*(la\\*(m1\\*(ra\\$*\"
|
||||
.
|
||||
. hy \\n(HY
|
||||
. mQ \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" Start example.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de EX
|
||||
. br
|
||||
. if !\\n(mE \{\
|
||||
. nr mF \\n(.f
|
||||
. nr mP \\n(PD
|
||||
. nr PD 1v
|
||||
. nf
|
||||
. ft \\*(mC
|
||||
. nr mE 1
|
||||
. \}
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" End example.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de EE
|
||||
. br
|
||||
. if \\n(mE \{\
|
||||
. ft \\n(mF
|
||||
. nr PD \\n(mP
|
||||
. fi
|
||||
. nr mE 0
|
||||
. \}
|
||||
..
|
||||
.\}
|
||||
.\" --- End of lines taken from groff an-ext.tmac
|
||||
.\}
|
||||
.
|
||||
.hy 0
|
||||
.
|
||||
.SH NAME
|
||||
grep, egrep, fgrep \- print lines that match patterns
|
||||
grep \- print lines that match patterns
|
||||
.
|
||||
.SH SYNOPSIS
|
||||
.B grep
|
||||
.RI [ OPTION .\|.\|.]\&
|
||||
.RI [ OPTION ].\|.\|.\&
|
||||
.I PATTERNS
|
||||
.RI [ FILE .\|.\|.]
|
||||
.RI [ FILE ].\|.\|.
|
||||
.br
|
||||
.B grep
|
||||
.RI [ OPTION .\|.\|.]\&
|
||||
.RI [ OPTION ].\|.\|.\&
|
||||
.B \-e
|
||||
.I PATTERNS
|
||||
\&.\|.\|.\&
|
||||
.RI [ FILE .\|.\|.]
|
||||
.RI [ FILE ].\|.\|.
|
||||
.br
|
||||
.B grep
|
||||
.RI [ OPTION .\|.\|.]\&
|
||||
.RI [ OPTION ].\|.\|.\&
|
||||
.B \-f
|
||||
.I PATTERN_FILE
|
||||
\&.\|.\|.\&
|
||||
.RI [ FILE .\|.\|.]
|
||||
.RI [ FILE ].\|.\|.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.B grep
|
||||
searches for
|
||||
.I PATTERNS
|
||||
in each
|
||||
searches for patterns in each
|
||||
.IR FILE .
|
||||
In the synopsis's first form, which is used if no
|
||||
.B \-e
|
||||
or
|
||||
.B \-f
|
||||
options are present, the first operand
|
||||
.I PATTERNS
|
||||
is one or more patterns separated by newline characters, and
|
||||
.B grep
|
||||
@ -184,17 +185,6 @@ If no
|
||||
.I FILE
|
||||
is given, recursive searches examine the working directory,
|
||||
and nonrecursive searches read standard input.
|
||||
.PP
|
||||
In addition, the variant programs
|
||||
.B egrep
|
||||
and
|
||||
.B fgrep
|
||||
are the same as
|
||||
.B "grep\ \-E"
|
||||
and
|
||||
.BR "grep\ \-F" ,
|
||||
respectively.
|
||||
These variants are deprecated, but are provided for backward compatibility.
|
||||
.
|
||||
.SH OPTIONS
|
||||
.SS "Generic Program Information"
|
||||
@ -225,7 +215,9 @@ as basic regular expressions (BREs, see below).
|
||||
This is the default.
|
||||
.TP
|
||||
.BR \-P ", " \-\^\-perl\-regexp
|
||||
Interpret I<PATTERNS> as Perl-compatible regular expressions (PCREs).
|
||||
Interpret
|
||||
.I PATTERNS
|
||||
as Perl-compatible regular expressions (PCREs).
|
||||
This option is experimental when combined with the
|
||||
.B \-z
|
||||
.RB ( \-\^\-null\-data )
|
||||
@ -253,6 +245,11 @@ If this option is used multiple times or is combined with the
|
||||
.RB ( \-\^\-regexp )
|
||||
option, search for all patterns given.
|
||||
The empty file contains zero patterns, and therefore matches nothing.
|
||||
If
|
||||
.I FILE
|
||||
is
|
||||
.B \-
|
||||
, read patterns from standard input.
|
||||
.TP
|
||||
.BR \-i ", " \-\^\-ignore\-case
|
||||
Ignore case distinctions in patterns and input data,
|
||||
@ -288,10 +285,6 @@ pattern and then surrounding it with
|
||||
.B ^
|
||||
and
|
||||
.BR $ .
|
||||
.TP
|
||||
.B \-y
|
||||
Obsolete synonym for
|
||||
.BR \-i .
|
||||
.SS "General Output Control"
|
||||
.TP
|
||||
.BR \-c ", " \-\^\-count
|
||||
@ -299,7 +292,7 @@ Suppress normal output; instead print a count of
|
||||
matching lines for each input file.
|
||||
With the
|
||||
.BR \-v ", " \-\^\-invert\-match
|
||||
option (see below), count non-matching lines.
|
||||
option (see above), count non-matching lines.
|
||||
.TP
|
||||
.BR \-\^\-color [ =\fIWHEN\fP "], " \-\^\-colour [ =\fIWHEN\fP ]
|
||||
Surround the matched (non-empty) strings, matching lines, context lines,
|
||||
@ -308,9 +301,6 @@ groups of context lines) with escape sequences to display them in color
|
||||
on the terminal.
|
||||
The colors are defined by the environment variable
|
||||
.BR GREP_COLORS .
|
||||
The deprecated environment variable
|
||||
.B GREP_COLOR
|
||||
is still supported, but its setting does not have priority.
|
||||
.I WHEN
|
||||
is
|
||||
.BR never ", " always ", or " auto .
|
||||
@ -330,6 +320,16 @@ Scanning each input file stops upon first match.
|
||||
Stop reading a file after
|
||||
.I NUM
|
||||
matching lines.
|
||||
If
|
||||
.I NUM
|
||||
is zero,
|
||||
.B grep
|
||||
stops right away without reading input.
|
||||
A
|
||||
.I NUM
|
||||
of \-1 is treated as infinity and
|
||||
.B grep
|
||||
does not stop; this is the default.
|
||||
If the input is standard input from a regular file,
|
||||
and
|
||||
.I NUM
|
||||
@ -405,7 +405,7 @@ Display input actually coming from standard input as input coming from file
|
||||
This can be useful for commands that transform a file's contents
|
||||
before searching,
|
||||
e.g.,
|
||||
.BR "gzip \-cd foo.gz | grep \-\^\-label=foo \-H 'some pattern'" .
|
||||
.BR "gzip \-cd foo.gz | grep \-\^\-label=foo \-H \*'some pattern\*'" .
|
||||
See also the
|
||||
.B \-H
|
||||
option.
|
||||
@ -531,7 +531,7 @@ and suppresses output lines that contain improperly encoded data.
|
||||
When some output is suppressed,
|
||||
.B grep
|
||||
follows any output
|
||||
with a one-line message saying that a binary file matches.
|
||||
with a message to standard error saying that a binary file matches.
|
||||
.IP
|
||||
If
|
||||
.I TYPE
|
||||
@ -596,7 +596,7 @@ On the other hand, when reading files whose text encodings are
|
||||
unknown, it can be helpful to use
|
||||
.B \-a
|
||||
or to set
|
||||
.B LC_ALL='C'
|
||||
.B LC_ALL=\*'C\*'
|
||||
in the environment, in order to find more matches even if the matches
|
||||
are unsafe for direct display.
|
||||
.TP
|
||||
@ -675,7 +675,7 @@ whose base name matches
|
||||
Ignore any redundant trailing slashes in
|
||||
.IR GLOB .
|
||||
.TP
|
||||
.BR \-I
|
||||
.B \-I
|
||||
Process a binary file as if it did not contain matching data; this is
|
||||
equivalent to the
|
||||
.B \-\^\-binary\-files=without\-match
|
||||
@ -701,7 +701,9 @@ options match, a file is included unless the first such option is
|
||||
.BR \-r ", " \-\^\-recursive
|
||||
Read all files under each directory, recursively,
|
||||
following symbolic links only if they are on the command line.
|
||||
Note that if no file operand is given, B<grep> searches the working directory.
|
||||
Note that if no file operand is given,
|
||||
.B grep
|
||||
searches the working directory.
|
||||
This is equivalent to the
|
||||
.B "\-d recurse"
|
||||
option.
|
||||
@ -719,12 +721,12 @@ This can cause a performance penalty.
|
||||
.BR \-U ", " \-\^\-binary
|
||||
Treat the file(s) as binary.
|
||||
By default, under MS-DOS and MS-Windows,
|
||||
.BR grep
|
||||
.B grep
|
||||
guesses whether a file is text or binary as described for the
|
||||
.B \-\^\-binary\-files
|
||||
option.
|
||||
If
|
||||
.BR grep
|
||||
.B grep
|
||||
decides the file is a text file, it strips the CR characters from the
|
||||
original file contents (to make regular expressions with
|
||||
.B ^
|
||||
@ -748,7 +750,7 @@ Like the
|
||||
or
|
||||
.B \-\^\-null
|
||||
option, this option can be used with commands like
|
||||
.B sort -z
|
||||
.B "sort \-z"
|
||||
to process arbitrary file names.
|
||||
.
|
||||
.SH "REGULAR EXPRESSIONS"
|
||||
@ -760,15 +762,19 @@ expressions, by using various operators to combine smaller expressions.
|
||||
understands three different versions of regular expression syntax:
|
||||
\*(lqbasic\*(rq (BRE), \*(lqextended\*(rq (ERE) and \*(lqperl\*(rq (PCRE).
|
||||
In GNU
|
||||
.B grep
|
||||
there is no difference in available functionality between basic and
|
||||
extended syntaxes.
|
||||
In other implementations, basic regular expressions are less powerful.
|
||||
.BR grep ,
|
||||
basic and extended regular expressions are merely different notations
|
||||
for the same pattern-matching functionality.
|
||||
In other implementations, basic regular expressions are ordinarily
|
||||
less powerful than extended, though occasionally it is the other way around.
|
||||
The following description applies to extended regular expressions;
|
||||
differences for basic regular expressions are summarized afterwards.
|
||||
Perl-compatible regular expressions give additional functionality, and are
|
||||
documented in B<pcresyntax>(3) and B<pcrepattern>(3), but work only if
|
||||
PCRE support is enabled.
|
||||
Perl-compatible regular expressions have different functionality, and are
|
||||
documented in
|
||||
.BR pcre2syntax (3)
|
||||
and
|
||||
.BR pcre2pattern (3),
|
||||
but work only if PCRE support is enabled.
|
||||
.PP
|
||||
The fundamental building blocks are the regular expressions
|
||||
that match a single character.
|
||||
@ -803,19 +809,21 @@ matches any single digit.
|
||||
Within a bracket expression, a
|
||||
.I "range expression"
|
||||
consists of two characters separated by a hyphen.
|
||||
It matches any single character that sorts between the two characters,
|
||||
inclusive, using the locale's collating sequence and character set.
|
||||
For example, in the default C locale,
|
||||
In the default C locale, it matches any single character that appears
|
||||
between the two characters in ASCII order, inclusive.
|
||||
For example,
|
||||
.B [a\-d]
|
||||
is equivalent to
|
||||
.BR [abcd] .
|
||||
Many locales sort characters in dictionary order, and in these locales
|
||||
In other locales the behavior is unspecified:
|
||||
.B [a\-d]
|
||||
is typically not equivalent to
|
||||
.BR [abcd] ;
|
||||
it might be equivalent to
|
||||
.BR [aBbCcDd] ,
|
||||
for example.
|
||||
might be equivalent to
|
||||
.B [abcd]
|
||||
or
|
||||
.B [aBbCcDd]
|
||||
or some other bracket expression,
|
||||
or it might fail to match any character, or the set of
|
||||
characters that it matches might be erratic, or it might be invalid.
|
||||
To obtain the traditional interpretation of bracket expressions,
|
||||
you can use the C locale by setting the
|
||||
.B LC_ALL
|
||||
@ -955,7 +963,7 @@ In basic regular expressions the meta-characters
|
||||
.BR | ,
|
||||
.BR ( ,
|
||||
and
|
||||
.BR )
|
||||
.B )
|
||||
lose their special meaning; instead use the backslashed
|
||||
versions
|
||||
.BR \e? ,
|
||||
@ -1007,34 +1015,10 @@ The shell command
|
||||
.B "locale \-a"
|
||||
lists locales that are currently available.
|
||||
.TP
|
||||
.B GREP_COLOR
|
||||
This variable specifies the color used to highlight matched (non-empty) text.
|
||||
It is deprecated in favor of
|
||||
.BR GREP_COLORS ,
|
||||
but still supported.
|
||||
The
|
||||
.BR mt ,
|
||||
.BR ms ,
|
||||
and
|
||||
.B mc
|
||||
capabilities of
|
||||
.B GREP_COLORS
|
||||
have priority over it.
|
||||
It can only specify the color used to highlight
|
||||
the matching non-empty text in any matching line
|
||||
(a selected line when the
|
||||
.B \-v
|
||||
command-line option is omitted,
|
||||
or a context line when
|
||||
.B \-v
|
||||
is specified).
|
||||
The default is
|
||||
.BR 01;31 ,
|
||||
which means a bold red foreground text on the terminal's default background.
|
||||
.TP
|
||||
.B GREP_COLORS
|
||||
Specifies the colors and other attributes
|
||||
used to highlight various parts of the output.
|
||||
Controls how the
|
||||
.B \-\^\-color
|
||||
option highlights output.
|
||||
Its value is a colon-separated list of capabilities
|
||||
that defaults to
|
||||
.B ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36
|
||||
@ -1268,38 +1252,13 @@ front of the operand list and are treated as options.
|
||||
Also, POSIX requires that unrecognized options be diagnosed as
|
||||
\*(lqillegal\*(rq, but since they are not really against the law the default
|
||||
is to diagnose them as \*(lqinvalid\*(rq.
|
||||
.B POSIXLY_CORRECT
|
||||
also disables \fB_\fP\fIN\fP\fB_GNU_nonoption_argv_flags_\fP,
|
||||
described below.
|
||||
.TP
|
||||
\fB_\fP\fIN\fP\fB_GNU_nonoption_argv_flags_\fP
|
||||
(Here
|
||||
.I N
|
||||
is
|
||||
.BR grep 's
|
||||
numeric process ID.) If the
|
||||
.IR i th
|
||||
character of this environment variable's value is
|
||||
.BR 1 ,
|
||||
do not consider the
|
||||
.IR i th
|
||||
operand of
|
||||
.B grep
|
||||
to be an option, even if it appears to be one.
|
||||
A shell can put this variable in the environment for each command it runs,
|
||||
specifying which operands are the results of file name wildcard
|
||||
expansion and therefore should not be treated as options.
|
||||
This behavior is available only with the GNU C library, and only
|
||||
when
|
||||
.B POSIXLY_CORRECT
|
||||
is not set.
|
||||
.
|
||||
.SH NOTES
|
||||
This man page is maintained only fitfully;
|
||||
the full documentation is often more up-to-date.
|
||||
.
|
||||
.SH COPYRIGHT
|
||||
Copyright 1998-2000, 2002, 2005-2021 Free Software Foundation, Inc.
|
||||
Copyright 1998\(en2000, 2002, 2005\(en2026 Free Software Foundation, Inc.
|
||||
.PP
|
||||
This is free software;
|
||||
see the source for copying conditions.
|
||||
@ -1343,7 +1302,7 @@ contain \*(lqg\*(rq and end in \*(lq.h\*(rq.
|
||||
The
|
||||
.B \-n
|
||||
option outputs line numbers, the
|
||||
.B \-\-
|
||||
.B \-\^\-
|
||||
argument treats expansions of \*(lq*g*.h\*(rq starting with \*(lq\-\*(rq
|
||||
as file names not options,
|
||||
and the empty file /dev/null causes file names to be output
|
||||
@ -1351,7 +1310,7 @@ even if only one file name happens to be of the form \*(lq*g*.h\*(rq.
|
||||
.PP
|
||||
.in +2n
|
||||
.EX
|
||||
$ \fBgrep\fP \-n \-\- 'f.*\e.c$' *g*.h /dev/null
|
||||
$ \fBgrep\fP \-n \-\^\- \*'f.*\e.c$\*' *g*.h /dev/null
|
||||
argmatch.h:1:/* definitions and prototypes for argmatch.c
|
||||
.EE
|
||||
.in
|
||||
@ -1371,9 +1330,9 @@ from the globbing syntax that the shell uses to match file names.
|
||||
.BR sort (1),
|
||||
.BR xargs (1),
|
||||
.BR read (2),
|
||||
.BR pcre (3),
|
||||
.BR pcresyntax (3),
|
||||
.BR pcrepattern (3),
|
||||
.BR pcre2 (3),
|
||||
.BR pcre2syntax (3),
|
||||
.BR pcre2pattern (3),
|
||||
.BR terminfo (5),
|
||||
.BR glob (7),
|
||||
.BR regex (7)
|
||||
|
||||
658
doc/grep.texi
658
doc/grep.texi
@ -30,7 +30,7 @@
|
||||
@copying
|
||||
This manual is for @command{grep}, a pattern matching engine.
|
||||
|
||||
Copyright @copyright{} 1999--2002, 2005, 2008--2021 Free Software Foundation,
|
||||
Copyright @copyright{} 1999--2002, 2005, 2008--2026 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
@quotation
|
||||
@ -109,7 +109,7 @@ there is no way to match newline characters in a text.
|
||||
The general synopsis of the @command{grep} command line is
|
||||
|
||||
@example
|
||||
grep [@var{option}...] [@var{patterns}] [@var{file}...]
|
||||
grep [@var{option}]@dots{} [@var{patterns}] [@var{file}]@dots{}
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
@ -202,6 +202,7 @@ in a shell command.
|
||||
Obtain patterns from @var{file}, one per line.
|
||||
If this option is used multiple times or is combined with the
|
||||
@option{-e} (@option{--regexp}) option, search for all patterns given.
|
||||
When @var{file} is @samp{-}, read patterns from standard input.
|
||||
The empty file contains zero patterns, and therefore matches nothing.
|
||||
(@option{-f} is specified by POSIX.)
|
||||
|
||||
@ -217,13 +218,25 @@ so that characters that differ only in case
|
||||
match each other. Although this is straightforward when letters
|
||||
differ in case only via lowercase-uppercase pairs, the behavior is
|
||||
unspecified in other situations. For example, uppercase ``S'' has an
|
||||
unusual lowercase counterpart ``ſ'' (Unicode character U+017F, LATIN
|
||||
unusual lowercase counterpart
|
||||
@c This does not display correctly in PDF with texinfo 7.1
|
||||
@c and pdfTeX 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40).
|
||||
@ifnottex
|
||||
``ſ''
|
||||
@end ifnottex
|
||||
(Unicode character U+017F LATIN
|
||||
SMALL LETTER LONG S) in many locales, and it is unspecified whether
|
||||
this unusual character matches ``S'' or ``s'' even though uppercasing
|
||||
it yields ``S''. Another example: the lowercase German letter ``ß''
|
||||
(U+00DF, LATIN SMALL LETTER SHARP S) is normally capitalized as the
|
||||
(U+00DF LATIN SMALL LETTER SHARP S) is normally capitalized as the
|
||||
two-character string ``SS'' but it does not match ``SS'', and it might
|
||||
not match the uppercase letter ``ẞ'' (U+1E9E, LATIN CAPITAL LETTER
|
||||
not match the uppercase letter
|
||||
@c This does not display correctly in PDF with texinfo 7.1
|
||||
@c and pdfTeX 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40).
|
||||
@ifnottex
|
||||
``ẞ''
|
||||
@end ifnottex
|
||||
(U+1E9E LATIN CAPITAL LETTER
|
||||
SHARP S) even though lowercasing the latter yields the former.
|
||||
|
||||
@option{-y} is an obsolete synonym that is provided for compatibility.
|
||||
@ -265,8 +278,7 @@ begin and end with word constituents, it differs from surrounding a
|
||||
regular expression with @samp{\<} and @samp{\>}. For example, although
|
||||
@samp{grep -w @@} matches a line containing only @samp{@@}, @samp{grep
|
||||
'\<@@\>'} cannot match any line because @samp{@@} is not a
|
||||
word constituent. @xref{The Backslash Character and Special
|
||||
Expressions}.
|
||||
word constituent. @xref{Special Backslash Expressions}.
|
||||
|
||||
@item -x
|
||||
@itemx --line-regexp
|
||||
@ -301,7 +313,7 @@ count non-matching lines.
|
||||
@opindex --color
|
||||
@opindex --colour
|
||||
@cindex highlight, color, colour
|
||||
Surround the matched (non-empty) strings, matching lines, context lines,
|
||||
Surround matched non-empty strings, matching lines, context lines,
|
||||
file names, line numbers, byte offsets, and separators (for fields and
|
||||
groups of context lines) with escape sequences to display them in color
|
||||
on the terminal.
|
||||
@ -309,11 +321,14 @@ The colors are defined by the environment variable @env{GREP_COLORS}
|
||||
and default to @samp{ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36}
|
||||
for bold red matched text, magenta file names, green line numbers,
|
||||
green byte offsets, cyan separators, and default terminal colors otherwise.
|
||||
The deprecated environment variable @env{GREP_COLOR} is still supported,
|
||||
but its setting does not have priority;
|
||||
it defaults to @samp{01;31} (bold red)
|
||||
which only covers the color for matched text.
|
||||
@var{WHEN} is @samp{never}, @samp{always}, or @samp{auto}.
|
||||
@xref{Environment Variables}.
|
||||
|
||||
@var{WHEN} is @samp{always} to use colors, @samp{never} to not use
|
||||
colors, or @samp{auto} to use colors if standard output is associated
|
||||
with a terminal device and the @env{TERM} environment variable's value
|
||||
suggests that the terminal supports colors.
|
||||
Plain @option{--color} is treated like @option{--color=auto};
|
||||
if no @option{--color} option is given, the default is @option{--color=never}.
|
||||
|
||||
@item -L
|
||||
@itemx --files-without-match
|
||||
@ -341,6 +356,10 @@ Scanning each input file stops upon first match.
|
||||
@opindex --max-count
|
||||
@cindex max-count
|
||||
Stop after the first @var{num} selected lines.
|
||||
If @var{num} is zero, @command{grep} stops right away without reading input.
|
||||
A @var{num} of @minus{}1 is treated as infinity and @command{grep}
|
||||
does not stop; this is the default.
|
||||
|
||||
If the input is standard input from a regular file,
|
||||
and @var{num} selected lines are output,
|
||||
@command{grep} ensures that the standard input is positioned
|
||||
@ -381,7 +400,7 @@ When the @option{-v} or @option{--invert-match} option is also used,
|
||||
@opindex -o
|
||||
@opindex --only-matching
|
||||
@cindex only matching
|
||||
Print only the matched (non-empty) parts of matching lines,
|
||||
Print only the matched non-empty parts of matching lines,
|
||||
with each such part on a separate output line.
|
||||
Output lines use the same delimiters as input, and delimiters are null
|
||||
bytes if @option{-z} (@option{--null-data}) is also used (@pxref{Other
|
||||
@ -398,6 +417,9 @@ Quiet; do not write anything to standard output.
|
||||
Exit immediately with zero status if any match is found,
|
||||
even if an error was detected.
|
||||
Also see the @option{-s} or @option{--no-messages} option.
|
||||
Portability note: Solaris 10 @command{grep} lacks @option{-q};
|
||||
portable shell scripts typically can redirect standard output to
|
||||
@file{/dev/null} instead of using @option{-q}.
|
||||
(@option{-q} is specified by POSIX.)
|
||||
|
||||
@item -s
|
||||
@ -406,18 +428,6 @@ Also see the @option{-s} or @option{--no-messages} option.
|
||||
@opindex --no-messages
|
||||
@cindex suppress error messages
|
||||
Suppress error messages about nonexistent or unreadable files.
|
||||
Portability note:
|
||||
unlike GNU @command{grep},
|
||||
7th Edition Unix @command{grep} did not conform to POSIX,
|
||||
because it lacked @option{-q}
|
||||
and its @option{-s} option behaved like
|
||||
GNU @command{grep}'s @option{-q} option.@footnote{Of course, 7th Edition
|
||||
Unix predated POSIX by several years!}
|
||||
USG-style @command{grep} also lacked @option{-q}
|
||||
but its @option{-s} option behaved like GNU @command{grep}'s.
|
||||
Portable shell scripts should avoid both
|
||||
@option{-q} and @option{-s} and should redirect
|
||||
standard and error output to @file{/dev/null} instead.
|
||||
(@option{-s} is specified by POSIX.)
|
||||
|
||||
@end table
|
||||
@ -551,7 +561,7 @@ Print @var{num} lines of leading and trailing output context.
|
||||
@opindex --group-separator
|
||||
@cindex group separator
|
||||
When @option{-A}, @option{-B} or @option{-C} are in use,
|
||||
print @var{string} instead of @option{--} between groups of lines.
|
||||
print @var{string} instead of @samp{--} between groups of lines.
|
||||
|
||||
@item --no-group-separator
|
||||
@opindex --group-separator
|
||||
@ -624,12 +634,14 @@ By default, @var{type} is @samp{binary}, and @command{grep}
|
||||
suppresses output after null input binary data is discovered,
|
||||
and suppresses output lines that contain improperly encoded data.
|
||||
When some output is suppressed, @command{grep} follows any output
|
||||
with a one-line message saying that a binary file matches.
|
||||
with a message to standard error saying that a binary file matches.
|
||||
|
||||
If @var{type} is @samp{without-match},
|
||||
when @command{grep} discovers null input binary data
|
||||
it assumes that the rest of the file does not match;
|
||||
when @command{grep} discovers null binary data in an input file
|
||||
it assumes that any unprocessed input does not match;
|
||||
this is equivalent to the @option{-I} option.
|
||||
In this case the region of unprocessed input starts no later than the
|
||||
null binary data, and continues to end of file.
|
||||
|
||||
If @var{type} is @samp{text},
|
||||
@command{grep} processes binary data as if it were text;
|
||||
@ -645,6 +657,16 @@ is not matched when @var{type} is @samp{text}. Conversely, when
|
||||
@var{type} is @samp{binary} the pattern @samp{.} (period) might not
|
||||
match a null byte.
|
||||
|
||||
The heuristic that @command{grep} uses to intuit whether input is
|
||||
binary is specific to @command{grep} and may well be unsuitable for
|
||||
other applications, as it depends on command-line options, on locale,
|
||||
and on hardware and operating system characteristics such as system
|
||||
page size and input buffering. For example, if the input consists of
|
||||
a matching text line followed by nonmatching data that contains a null
|
||||
byte, @command{grep} might either output the matching line or treat
|
||||
the file as binary, depending on whether the unprocessed input happens
|
||||
to include the matching text line.
|
||||
|
||||
@emph{Warning:} The @option{-a} (@option{--binary-files=text}) option
|
||||
might output binary garbage, which can have nasty side effects if the
|
||||
output is a terminal and if the terminal driver interprets some of it
|
||||
@ -698,7 +720,7 @@ immediately after a slash (@samp{/}) in the name.
|
||||
When searching recursively, skip any subfile whose base
|
||||
name matches @var{glob}; the base name is the part after the last
|
||||
slash. A pattern can use
|
||||
@samp{*}, @samp{?}, and @samp{[}...@samp{]} as wildcards,
|
||||
@samp{*}, @samp{?}, and @samp{[}@dots{}@samp{]} as wildcards,
|
||||
and @code{\} to quote a wildcard or backslash character literally.
|
||||
|
||||
@item --exclude-from=@var{file}
|
||||
@ -766,10 +788,10 @@ directory, recursively, following all symbolic links.
|
||||
@item --
|
||||
@opindex --
|
||||
@cindex option delimiter
|
||||
Delimit the option list. Later arguments, if any, are treated as
|
||||
operands even if they begin with @samp{-}. For example, @samp{grep PAT --
|
||||
-file1 file2} searches for the pattern PAT in the files named @file{-file1}
|
||||
and @file{file2}.
|
||||
Delimit the option list. Any later argument is not treated as an
|
||||
option even if it begins with @samp{-}. For example,
|
||||
@samp{grep -- -PAT -file1 file2} searches for the pattern @samp{-PAT}
|
||||
in the files named @file{-file1} and @file{file2}.
|
||||
|
||||
@item --line-buffered
|
||||
@opindex --line-buffered
|
||||
@ -822,8 +844,10 @@ this option can be used with commands like
|
||||
@node Environment Variables
|
||||
@section Environment Variables
|
||||
|
||||
The behavior of @command{grep} is affected
|
||||
by the following environment variables.
|
||||
The behavior of @command{grep} is affected by several environment
|
||||
variables, the most important of which control the locale, which
|
||||
specifies how @command{grep} interprets characters in its patterns and
|
||||
data.
|
||||
|
||||
@vindex LANGUAGE @r{environment variable}
|
||||
@vindex LC_ALL @r{environment variable}
|
||||
@ -835,8 +859,8 @@ is specified by examining the three environment variables
|
||||
in that order.
|
||||
The first of these variables that is set specifies the locale.
|
||||
For example, if @env{LC_ALL} is not set,
|
||||
but @env{LC_COLLATE} is set to @samp{pt_BR},
|
||||
then the Brazilian Portuguese locale is used
|
||||
but @env{LC_COLLATE} is set to @samp{pt_BR.UTF-8},
|
||||
then a Brazilian Portuguese locale is used
|
||||
for the @env{LC_COLLATE} category.
|
||||
As a special case for @env{LC_MESSAGES} only, the environment variable
|
||||
@env{LANGUAGE} can contain a colon-separated list of languages that
|
||||
@ -848,7 +872,33 @@ or if @command{grep} was not compiled
|
||||
with national language support (NLS).
|
||||
The shell command @code{locale -a} lists locales that are currently available.
|
||||
|
||||
Many of the environment variables in the following list let you
|
||||
@cindex environment variables
|
||||
The following environment variables affect the behavior of @command{grep}.
|
||||
|
||||
@table @env
|
||||
|
||||
@item GREP_COLOR
|
||||
@vindex GREP_COLOR @r{environment variable}
|
||||
@cindex highlight markers
|
||||
This obsolescent variable interacts with @env{GREP_COLORS}
|
||||
confusingly, and @command{grep} warns if it is set and is not
|
||||
overridden by @env{GREP_COLORS}. Instead of
|
||||
@samp{GREP_COLOR='@var{color}'}, you can use
|
||||
@samp{GREP_COLORS='mt=@var{color}'}.
|
||||
|
||||
@item GREP_COLORS
|
||||
@vindex GREP_COLORS @r{environment variable}
|
||||
@cindex highlight markers
|
||||
This variable controls how the @option{--color} option highlights output.
|
||||
Its value is a colon-separated list of @code{terminfo} capabilities
|
||||
that defaults to @samp{ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36}
|
||||
with the @samp{rv} and @samp{ne} boolean capabilities omitted (i.e., false).
|
||||
The two-letter capability names
|
||||
refer to terminal ``capabilities,'' the ability
|
||||
of a terminal to highlight text, or change its color, and so on.
|
||||
These capabilities are stored in an online database and accessed by
|
||||
the @code{terminfo} library.
|
||||
Non-empty capability values
|
||||
control highlighting using
|
||||
Select Graphic Rendition (SGR)
|
||||
commands interpreted by the terminal or terminal emulator.
|
||||
@ -859,7 +909,7 @@ for permitted values and their meanings as character attributes.)
|
||||
These substring values are integers in decimal representation
|
||||
and can be concatenated with semicolons.
|
||||
@command{grep} takes care of assembling the result
|
||||
into a complete SGR sequence (@samp{\33[}...@samp{m}).
|
||||
into a complete SGR sequence (@samp{\33[}@dots{}@samp{m}).
|
||||
Common values to concatenate include
|
||||
@samp{1} for bold,
|
||||
@samp{4} for underline,
|
||||
@ -876,38 +926,6 @@ for 88-color and 256-color modes foreground colors,
|
||||
and @samp{48;5;0} to @samp{48;5;255}
|
||||
for 88-color and 256-color modes background colors.
|
||||
|
||||
The two-letter names used in the @env{GREP_COLORS} environment variable
|
||||
(and some of the others) refer to terminal ``capabilities,'' the ability
|
||||
of a terminal to highlight text, or change its color, and so on.
|
||||
These capabilities are stored in an online database and accessed by
|
||||
the @code{terminfo} library.
|
||||
|
||||
@cindex environment variables
|
||||
|
||||
@table @env
|
||||
|
||||
@item GREP_COLOR
|
||||
@vindex GREP_COLOR @r{environment variable}
|
||||
@cindex highlight markers
|
||||
This variable specifies the color used to highlight matched (non-empty) text.
|
||||
It is deprecated in favor of @env{GREP_COLORS}, but still supported.
|
||||
The @samp{mt}, @samp{ms}, and @samp{mc} capabilities of @env{GREP_COLORS}
|
||||
have priority over it.
|
||||
It can only specify the color used to highlight
|
||||
the matching non-empty text in any matching line
|
||||
(a selected line when the @option{-v} command-line option is omitted,
|
||||
or a context line when @option{-v} is specified).
|
||||
The default is @samp{01;31},
|
||||
which means a bold red foreground text on the terminal's default background.
|
||||
|
||||
@item GREP_COLORS
|
||||
@vindex GREP_COLORS @r{environment variable}
|
||||
@cindex highlight markers
|
||||
This variable specifies the colors and other attributes
|
||||
used to highlight various parts of the output.
|
||||
Its value is a colon-separated list of @code{terminfo} capabilities
|
||||
that defaults to @samp{ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36}
|
||||
with the @samp{rv} and @samp{ne} boolean capabilities omitted (i.e., false).
|
||||
Supported capabilities are as follows.
|
||||
|
||||
@table @code
|
||||
@ -1004,7 +1022,7 @@ or when EL is too slow or causes too much flicker.
|
||||
The default is false (i.e., the capability is omitted).
|
||||
@end table
|
||||
|
||||
Note that boolean capabilities have no @samp{=}... part.
|
||||
Note that boolean capabilities have no @samp{=}@dots{} part.
|
||||
They are omitted (i.e., false) by default and become true when specified.
|
||||
|
||||
|
||||
@ -1018,7 +1036,7 @@ They are omitted (i.e., false) by default and become true when specified.
|
||||
@cindex national language support
|
||||
@cindex NLS
|
||||
These variables specify the locale for the @env{LC_COLLATE} category,
|
||||
which might affect how range expressions like @samp{[a-z]} are
|
||||
which might affect how range expressions like @samp{a-z} are
|
||||
interpreted.
|
||||
|
||||
@item LC_ALL
|
||||
@ -1061,20 +1079,11 @@ follow file names must be treated as file names;
|
||||
by default,
|
||||
such options are permuted to the front of the operand list
|
||||
and are treated as options.
|
||||
Also, @env{POSIXLY_CORRECT} disables special handling of an
|
||||
invalid bracket expression. @xref{invalid-bracket-expr}.
|
||||
|
||||
@item _@var{N}_GNU_nonoption_argv_flags_
|
||||
@vindex _@var{N}_GNU_nonoption_argv_flags_ @r{environment variable}
|
||||
(Here @code{@var{N}} is @command{grep}'s numeric process ID.)
|
||||
If the @var{i}th character of this environment variable's value is @samp{1},
|
||||
do not consider the @var{i}th operand of @command{grep} to be an option,
|
||||
even if it appears to be one.
|
||||
A shell can put this variable in the environment for each command it runs,
|
||||
specifying which operands are the results of file name wildcard expansion
|
||||
and therefore should not be treated as options.
|
||||
This behavior is available only with the GNU C library,
|
||||
and only when @env{POSIXLY_CORRECT} is not set.
|
||||
@item TERM
|
||||
@vindex TERM @r{environment variable}
|
||||
This variable specifies the output terminal type, which can affect
|
||||
what the @option{--color} option does. @xref{General Output Control}.
|
||||
|
||||
@end table
|
||||
|
||||
@ -1152,22 +1161,77 @@ Interpret patterns as fixed strings, not regular expressions.
|
||||
@opindex --perl-regexp
|
||||
@cindex matching Perl-compatible regular expressions
|
||||
Interpret patterns as Perl-compatible regular expressions (PCREs).
|
||||
PCRE support is here to stay, but consider this option experimental when
|
||||
combined with the @option{-z} (@option{--null-data}) option, and note that
|
||||
@samp{grep@ -P} may warn of unimplemented features.
|
||||
@xref{Other Options}.
|
||||
|
||||
For documentation, refer to @url{https://www.pcre.org/}, with these caveats:
|
||||
@itemize
|
||||
@item
|
||||
In a UTF-8 locale, Perl treats data as UTF-8 only under certain
|
||||
conditions, e.g., if @command{perl} is invoked with the @option{-C}
|
||||
option or the @env{PERL_UNICODE} environment variable set appropriately.
|
||||
Similarly, @command{pcre2grep} treats data as UTF-8 only if
|
||||
invoked with @option{-u} or @option{-U}.
|
||||
In contrast, in a UTF-8 locale @command{grep} and @command{git grep}
|
||||
always treat data as UTF-8.
|
||||
|
||||
@item
|
||||
In Perl and @command{git grep -P}, @samp{\d} matches all Unicode digits,
|
||||
even if they are not ASCII.
|
||||
For example, @samp{\d} matches
|
||||
@ifnottex
|
||||
``٣''
|
||||
@end ifnottex
|
||||
(U+0663 ARABIC-INDIC DIGIT THREE).
|
||||
In contrast, in @samp{grep -P}, @samp{\d} matches only
|
||||
the ten ASCII digits, regardless of locale.
|
||||
In @command{pcre2grep}, @samp{\d} ordinarily behaves like Perl and
|
||||
@command{git grep -P}, but when given the @option{--posix-digit} option
|
||||
it behaves like @samp{grep -P}.
|
||||
(On all platforms, @samp{\D} matches the complement of @samp{\d}.)
|
||||
|
||||
@item
|
||||
The pattern @samp{[[:digit:]]} matches all Unicode digits
|
||||
in Perl, @samp{grep -P}, @command{git grep -P}, and @command{pcre2grep},
|
||||
so you can use it
|
||||
to get the effect of Perl's @samp{\d} on all these platforms.
|
||||
In other words, in Perl and @command{git grep -P},
|
||||
@samp{\d} is equivalent to @samp{[[:digit:]]},
|
||||
whereas in @samp{grep -P}, @samp{\d} is equivalent to @samp{[0-9]},
|
||||
and @command{pcre2grep} ordinarily follows Perl but
|
||||
when given @option{--posix-digit} it follows @samp{grep -P}.
|
||||
|
||||
(On all these platforms, @samp{[[:digit:]]} is equivalent to @samp{\p@{Nd@}}
|
||||
and to @samp{\p@{General_Category: Decimal_Number@}}.)
|
||||
|
||||
@item
|
||||
If @command{grep} is built with PCRE2 version 10.43 (2024) or later,
|
||||
@samp{(?aD)} causes @samp{\d} to behave like @samp{[0-9]} and
|
||||
@samp{(?-aD)} causes it to behave like @samp{[[:digit:]]}.
|
||||
|
||||
@item
|
||||
Although PCRE tracks the syntax and semantics of Perl's regular
|
||||
expressions, the match is not always exact. Perl
|
||||
evolves and a Perl installation may predate or postdate the PCRE2
|
||||
installation on the same host, or their Unicode versions may differ,
|
||||
or Perl and PCRE2 may disagree about an obscure construct.
|
||||
|
||||
@item
|
||||
By default, @command{grep} applies each regexp to a line at a time,
|
||||
so the @samp{(?s)} directive (making @samp{.} match line breaks)
|
||||
is generally ineffective.
|
||||
However, with @option{-z} (@option{--null-data}) it can work:
|
||||
@example
|
||||
$ printf 'a\nb\n' |grep -zP '(?s)a.b'
|
||||
a
|
||||
b
|
||||
@end example
|
||||
But beware: with the @option{-z} (@option{--null-data}) and a file
|
||||
containing no NUL byte, grep must read the entire file into memory
|
||||
before processing any of it.
|
||||
Thus, it will exhaust memory and fail for some large files.
|
||||
@end itemize
|
||||
|
||||
@end table
|
||||
|
||||
In addition,
|
||||
two variant programs @command{egrep} and @command{fgrep} are available.
|
||||
@command{egrep} is the same as @samp{grep@ -E}.
|
||||
@command{fgrep} is the same as @samp{grep@ -F}.
|
||||
Direct invocation as either
|
||||
@command{egrep} or @command{fgrep} is deprecated,
|
||||
but is provided to allow historical applications
|
||||
that rely on them to run unmodified.
|
||||
|
||||
|
||||
@node Regular Expressions
|
||||
@chapter Regular Expressions
|
||||
@ -1180,22 +1244,24 @@ by using various operators to combine smaller expressions.
|
||||
three different versions of regular expression syntax:
|
||||
basic (BRE), extended (ERE), and Perl-compatible (PCRE).
|
||||
In GNU @command{grep},
|
||||
there is no difference in available functionality between the basic and
|
||||
extended syntaxes.
|
||||
In other implementations, basic regular expressions are less powerful.
|
||||
basic and extended regular expressions are merely different notations
|
||||
for the same pattern-matching functionality.
|
||||
In other implementations, basic regular expressions are ordinarily
|
||||
less powerful than extended, though occasionally it is the other way around.
|
||||
The following description applies to extended regular expressions;
|
||||
differences for basic regular expressions are summarized afterwards.
|
||||
Perl-compatible regular expressions give additional functionality, and
|
||||
are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual
|
||||
Perl-compatible regular expressions have different functionality, and
|
||||
are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual
|
||||
pages, but work only if PCRE is available in the system.
|
||||
|
||||
@menu
|
||||
* Fundamental Structure::
|
||||
* Character Classes and Bracket Expressions::
|
||||
* The Backslash Character and Special Expressions::
|
||||
* Special Backslash Expressions::
|
||||
* Anchoring::
|
||||
* Back-references and Subexpressions::
|
||||
* Basic vs Extended::
|
||||
* Problematic Expressions::
|
||||
* Character Encoding::
|
||||
* Matching Non-ASCII::
|
||||
@end menu
|
||||
@ -1275,9 +1341,10 @@ the resulting regular expression
|
||||
matches any string formed by concatenating two substrings
|
||||
that respectively match the concatenated expressions.
|
||||
|
||||
Two regular expressions may be joined by the infix operator @samp{|};
|
||||
the resulting regular expression
|
||||
matches any string matching either alternate expression.
|
||||
@cindex alternatives in regular expressions
|
||||
Two regular expressions may be joined by the infix operator @samp{|}.
|
||||
The resulting regular expression matches any string matching either of
|
||||
the two expressions, which are called @dfn{alternatives}.
|
||||
|
||||
Repetition takes precedence over concatenation,
|
||||
which in turn takes precedence over alternation.
|
||||
@ -1285,6 +1352,9 @@ A whole expression may be enclosed in parentheses
|
||||
to override these precedence rules and form a subexpression.
|
||||
An unmatched @samp{)} matches just itself.
|
||||
|
||||
Not every character string is a valid regular expression.
|
||||
@xref{Problematic Expressions}.
|
||||
|
||||
@node Character Classes and Bracket Expressions
|
||||
@section Character Classes and Bracket Expressions
|
||||
|
||||
@ -1305,14 +1375,14 @@ encoding error.
|
||||
@cindex range expression
|
||||
Within a bracket expression, a @dfn{range expression} consists of two
|
||||
characters separated by a hyphen.
|
||||
It matches any single character that
|
||||
sorts between the two characters, inclusive.
|
||||
In the default C locale, the sorting sequence is the native character
|
||||
order; for example, @samp{[a-d]} is equivalent to @samp{[abcd]}.
|
||||
In other locales, the sorting sequence is not specified, and
|
||||
@samp{[a-d]} might be equivalent to @samp{[abcd]} or to
|
||||
@samp{[aBbCcDd]}, or it might fail to match any character, or the set of
|
||||
characters that it matches might even be erratic.
|
||||
In the default C locale, it matches any single character that appears
|
||||
between the two characters in ASCII order, inclusive.
|
||||
For example, @samp{[a-d]} is equivalent to @samp{[abcd]}.
|
||||
In other locales the behavior is unspecified:
|
||||
@samp{[a-d]} might be equivalent to
|
||||
@samp{[abcd]} or @samp{[aBbCcDd]} or some other bracket expression,
|
||||
or it might fail to match any character, or the set of
|
||||
characters that it matches might be erratic, or it might be invalid.
|
||||
To obtain the traditional interpretation
|
||||
of bracket expressions, you can use the @samp{C} locale by setting the
|
||||
@env{LC_ALL} environment variable to the value @samp{C}.
|
||||
@ -1415,12 +1485,10 @@ Note that the brackets in these class names are
|
||||
part of the symbolic names, and must be included in addition to
|
||||
the brackets delimiting the bracket expression.
|
||||
|
||||
@anchor{invalid-bracket-expr}
|
||||
If you mistakenly omit the outer brackets, and search for say, @samp{[:upper:]},
|
||||
GNU @command{grep} prints a diagnostic and exits with status 2, on
|
||||
the assumption that you did not intend to search for the nominally
|
||||
equivalent regular expression: @samp{[:epru]}.
|
||||
Set the @env{POSIXLY_CORRECT} environment variable to disable this feature.
|
||||
the assumption that you did not intend to search for the
|
||||
regular expression @samp{[:epru]}.
|
||||
|
||||
Special characters lose their special meaning inside bracket expressions.
|
||||
|
||||
@ -1451,7 +1519,7 @@ represents the close character class symbol.
|
||||
|
||||
@item -
|
||||
represents the range if it's not first or last in a list or the ending point
|
||||
of a range.
|
||||
of a range. To make the @samp{-} a list item, it is best to put it last.
|
||||
|
||||
@item ^
|
||||
represents the characters not in the list.
|
||||
@ -1460,8 +1528,8 @@ character a list item, place it anywhere but first.
|
||||
|
||||
@end table
|
||||
|
||||
@node The Backslash Character and Special Expressions
|
||||
@section The Backslash Character and Special Expressions
|
||||
@node Special Backslash Expressions
|
||||
@section Special Backslash Expressions
|
||||
@cindex backslash
|
||||
|
||||
The @samp{\} character followed by a special character is a regular
|
||||
@ -1496,11 +1564,24 @@ Match whitespace, it is a synonym for @samp{[[:space:]]}.
|
||||
@item \S
|
||||
Match non-whitespace, it is a synonym for @samp{[^[:space:]]}.
|
||||
|
||||
@item \]
|
||||
Match @samp{]}.
|
||||
|
||||
@item \@}
|
||||
Match @samp{@}}.
|
||||
|
||||
@end table
|
||||
|
||||
For example, @samp{\brat\b} matches the separate word @samp{rat},
|
||||
@samp{\Brat\B} matches @samp{crate} but not @samp{furry rat}.
|
||||
|
||||
The behavior of @command{grep} is unspecified if a unescaped backslash
|
||||
is not followed by a special character, a nonzero digit, or a
|
||||
character in the above list. Although @command{grep} might issue a
|
||||
diagnostic and/or give the backslash an interpretation now, its
|
||||
behavior may change if the syntax of regular expressions is extended
|
||||
in future versions.
|
||||
|
||||
@node Anchoring
|
||||
@section Anchoring
|
||||
@cindex anchoring
|
||||
@ -1536,52 +1617,182 @@ back-references are local to each expression.
|
||||
@section Basic vs Extended Regular Expressions
|
||||
@cindex basic regular expressions
|
||||
|
||||
In basic regular expressions the characters @samp{?}, @samp{+},
|
||||
Basic regular expressions differ from extended regular expressions
|
||||
in the following ways:
|
||||
|
||||
@itemize
|
||||
@item
|
||||
The characters @samp{?}, @samp{+},
|
||||
@samp{@{}, @samp{|}, @samp{(}, and @samp{)} lose their special meaning;
|
||||
instead use the backslashed versions @samp{\?}, @samp{\+}, @samp{\@{},
|
||||
@samp{\|}, @samp{\(}, and @samp{\)}. Also, a backslash is needed
|
||||
before an interval expression's closing @samp{@}}, and an unmatched
|
||||
@code{\)} is invalid.
|
||||
before an interval expression's closing @samp{@}}.
|
||||
|
||||
Portable scripts should avoid the following constructs, as
|
||||
POSIX says they produce undefined results:
|
||||
@item
|
||||
An unmatched @samp{\)} is invalid.
|
||||
|
||||
@item
|
||||
If an unescaped @samp{^} appears neither first, nor directly after
|
||||
@samp{\(} or @samp{\|}, it is treated like an ordinary character and
|
||||
is not an anchor.
|
||||
|
||||
@item
|
||||
If an unescaped @samp{$} appears neither last, nor directly before
|
||||
@samp{\|} or @samp{\)}, it is treated like an ordinary character and
|
||||
is not an anchor.
|
||||
|
||||
@item
|
||||
If an unescaped @samp{*} appears first, or appears directly after
|
||||
@samp{\(} or @samp{\|} or anchoring @samp{^}, it is treated like an
|
||||
ordinary character and is not a repetition operator.
|
||||
@end itemize
|
||||
|
||||
@node Problematic Expressions
|
||||
@section Problematic Regular Expressions
|
||||
|
||||
@cindex invalid regular expressions
|
||||
@cindex unspecified behavior in regular expressions
|
||||
Some strings are @dfn{invalid regular expressions} and cause
|
||||
@command{grep} to issue a diagnostic and fail. For example, @samp{xy\1}
|
||||
is invalid because there is no parenthesized subexpression for the
|
||||
back-reference @samp{\1} to refer to.
|
||||
|
||||
Also, some regular expressions have @dfn{unspecified behavior} and
|
||||
should be avoided even if @command{grep} does not currently diagnose
|
||||
them. For example, @samp{xy\0} has unspecified behavior because
|
||||
@samp{0} is not a special character and @samp{\0} is not a special
|
||||
backslash expression (@pxref{Special Backslash Expressions}).
|
||||
Unspecified behavior can be particularly problematic because the set
|
||||
of matched strings might be only partially specified, or not be
|
||||
specified at all, or the expression might even be invalid.
|
||||
|
||||
The following regular expression constructs are invalid on all
|
||||
platforms conforming to POSIX, so portable scripts can assume that
|
||||
@command{grep} rejects these constructs:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
Extended regular expressions that use back-references.
|
||||
A basic regular expression containing a back-reference @samp{\@var{n}}
|
||||
preceded by fewer than @var{n} closing parentheses. For example,
|
||||
@samp{\(a\)\2} is invalid.
|
||||
|
||||
@item
|
||||
Basic regular expressions that use @samp{\?}, @samp{\+}, or @samp{\|}.
|
||||
@item
|
||||
Empty parenthesized regular expressions like @samp{()}.
|
||||
@item
|
||||
Empty alternatives (as in, e.g, @samp{a|}).
|
||||
@item
|
||||
Repetition operators that immediately follow empty expressions,
|
||||
unescaped @samp{$}, or other repetition operators.
|
||||
@item
|
||||
A backslash escaping an ordinary character (e.g., @samp{\S}),
|
||||
unless it is a back-reference.
|
||||
@item
|
||||
An unescaped @samp{[} that is not part of a bracket expression.
|
||||
@item
|
||||
In extended regular expressions, an unescaped @samp{@{} that is not
|
||||
part of an interval expression.
|
||||
A bracket expression containing @samp{[:} that does not start a
|
||||
character class; and similarly for @samp{[=} and @samp{[.}. For
|
||||
example, @samp{[a[:b]} and @samp{[a[:ouch:]b]} are invalid.
|
||||
@end itemize
|
||||
|
||||
@cindex interval expressions
|
||||
Traditional @command{egrep} did not support interval expressions and
|
||||
some @command{egrep} implementations use @samp{\@{} and @samp{\@}} instead, so
|
||||
portable scripts should avoid interval expressions in @samp{grep@ -E} patterns
|
||||
and should use @samp{[@{]} to match a literal @samp{@{}.
|
||||
GNU @command{grep} treats the following constructs as invalid.
|
||||
However, other @command{grep} implementations might allow them, so
|
||||
portable scripts should not rely on their being invalid:
|
||||
|
||||
GNU @command{grep@ -E} attempts to support traditional usage by
|
||||
assuming that @samp{@{} is not special if it would be the start of an
|
||||
invalid interval expression.
|
||||
For example, the command
|
||||
@samp{grep@ -E@ '@{1'} searches for the two-character string @samp{@{1}
|
||||
instead of reporting a syntax error in the regular expression.
|
||||
POSIX allows this behavior as an extension, but portable scripts
|
||||
should avoid it.
|
||||
@itemize @bullet
|
||||
@item
|
||||
Unescaped @samp{\} at the end of a regular expression.
|
||||
|
||||
@item
|
||||
Unescaped @samp{[} that does not start a bracket expression.
|
||||
|
||||
@item
|
||||
A @samp{\@{} in a basic regular expression that does not start an
|
||||
interval expression.
|
||||
|
||||
@item
|
||||
A basic regular expression with unbalanced @samp{\(} or @samp{\)},
|
||||
or an extended regular expression with unbalanced @samp{(}.
|
||||
|
||||
@item
|
||||
In the POSIX locale, a range expression like @samp{z-a} that
|
||||
represents zero elements. A non-GNU @command{grep} might treat it as
|
||||
a valid range that never matches.
|
||||
|
||||
@item
|
||||
An interval expression with a repetition count greater than 32767.
|
||||
(The portable POSIX limit is 255, and even interval expressions with
|
||||
smaller counts can be impractically slow on all known implementations.)
|
||||
|
||||
@item
|
||||
A bracket expression that contains at least three elements, the first
|
||||
and last of which are both @samp{:}, or both @samp{.}, or both
|
||||
@samp{=}. For example, a non-GNU @command{grep} might treat
|
||||
@samp{[:alpha:]} like @samp{[[:alpha:]]}, or like @samp{[:ahlp]}.
|
||||
@end itemize
|
||||
|
||||
The following constructs have well-defined behavior in GNU
|
||||
@command{grep}. However, they have unspecified behavior elsewhere, so
|
||||
portable scripts should avoid them:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
Special backslash expressions like @samp{\b}, @samp{\<}, and @samp{\]}.
|
||||
@xref{Special Backslash Expressions}.
|
||||
|
||||
@item
|
||||
A basic regular expression that uses @samp{\?}, @samp{\+}, or @samp{\|}.
|
||||
|
||||
@item
|
||||
An extended regular expression that uses back-references.
|
||||
|
||||
@item
|
||||
An empty regular expression, subexpression, or alternative. For
|
||||
example, @samp{(a|bc|)} is not portable; a portable equivalent is
|
||||
@samp{(a|bc)?}.
|
||||
|
||||
@item
|
||||
In a basic regular expression, an anchoring @samp{^} that appears
|
||||
directly after @samp{\(}, or an anchoring @samp{$} that appears
|
||||
directly before @samp{\)}.
|
||||
|
||||
@item
|
||||
In a basic regular expression, a repetition operator that
|
||||
directly follows another repetition operator.
|
||||
|
||||
@item
|
||||
In an extended regular expression, unescaped @samp{@{}
|
||||
that does not begin a valid interval expression.
|
||||
GNU @command{grep} treats the @samp{@{} as an ordinary character.
|
||||
|
||||
@item
|
||||
A null character or an encoding error in either pattern or input data.
|
||||
@xref{Character Encoding}.
|
||||
|
||||
@item
|
||||
An input file that ends in a non-newline character,
|
||||
where GNU @command{grep} silently supplies a newline.
|
||||
@end itemize
|
||||
|
||||
The following constructs have unspecified behavior, in both GNU
|
||||
and other @command{grep} implementations. Scripts should avoid
|
||||
them whenever possible.
|
||||
|
||||
@itemize
|
||||
@item
|
||||
A backslash escaping an ordinary character, unless it is a
|
||||
back-reference like @samp{\1} or a special backslash expression like
|
||||
@samp{\<} or @samp{\b}. @xref{Special Backslash Expressions}. For
|
||||
example, @samp{\x} has unspecified behavior now, and a future version
|
||||
of @command{grep} might specify @samp{\x} to have a new behavior.
|
||||
|
||||
@item
|
||||
A repetition operator that appears directly after an anchor, or at the
|
||||
start of a complete regular expression, parenthesized subexpression,
|
||||
or alternative. For example, @samp{+|^*(+a|?-b)} has unspecified
|
||||
behavior, whereas @samp{\+|^\*(\+a|\?-b)} is portable.
|
||||
|
||||
@item
|
||||
A range expression outside the POSIX locale. For example, in some
|
||||
locales @samp{[a-z]} might match some characters that are not
|
||||
lowercase letters, or might not match some lowercase letters, or might
|
||||
be invalid. With GNU @command{grep} it is not documented whether
|
||||
these range expressions use native code points, or use the collation
|
||||
sequence specified by the @env{LC_COLLATE} category, or use the
|
||||
collation ordering used by @command{sort} and @code{strcoll}, or have some
|
||||
other interpretation. Outside the POSIX locale, it is portable to use
|
||||
@samp{[[:lower:]]} to match a lower-case letter, or
|
||||
@samp{[abcdefghijklmnopqrstuvwxyz]} to match an ASCII lower-case
|
||||
letter.
|
||||
|
||||
@end itemize
|
||||
|
||||
@node Character Encoding
|
||||
@section Character Encoding
|
||||
@ -1741,25 +1952,39 @@ grep -r --include='*.c' 'hello' /home/gigi
|
||||
|
||||
@item
|
||||
What if a pattern or file has a leading @samp{-}?
|
||||
For example:
|
||||
|
||||
@example
|
||||
grep -- '--cut here--' *
|
||||
grep "$pattern" *
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
searches for all lines matching @samp{--cut here--}.
|
||||
Without @option{--},
|
||||
@command{grep} would attempt to parse @samp{--cut here--} as a list of
|
||||
options, and there would be similar problems with any file names
|
||||
beginning with @samp{-}.
|
||||
|
||||
Alternatively, you can prevent misinterpretation of leading @samp{-}
|
||||
by using @option{-e} for patterns and leading @samp{./} for files:
|
||||
can behave unexpectedly if the value of @samp{pattern} begins with @samp{-},
|
||||
or if the @samp{*} expands to a file name with leading @samp{-}.
|
||||
To avoid the problem, you can use @option{-e} for patterns and leading
|
||||
@samp{./} for files:
|
||||
|
||||
@example
|
||||
grep -e '--cut here--' ./*
|
||||
grep -e "$pattern" ./*
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
searches for all lines matching the pattern in all the working
|
||||
directory's files whose names do not begin with @samp{.}.
|
||||
Without the @option{-e}, @command{grep} might treat the pattern as an
|
||||
option if it begins with @samp{-}. Without the @samp{./}, there might
|
||||
be similar problems with file names beginning with @samp{-}.
|
||||
|
||||
Alternatively, you can use @samp{--} before the pattern and file names:
|
||||
|
||||
@example
|
||||
grep -- "$pattern" *
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
This also fixes the problem, except that if there is a file named @samp{-},
|
||||
@command{grep} misinterprets the @samp{-} as standard input.
|
||||
|
||||
@item
|
||||
Suppose I want to search for a whole word, not a part of a word?
|
||||
|
||||
@ -1829,7 +2054,7 @@ Note that on some platforms,
|
||||
except the available memory.
|
||||
|
||||
@item
|
||||
Why does @command{grep} report ``Binary file matches''?
|
||||
Why does @command{grep} report ``binary file matches''?
|
||||
|
||||
If @command{grep} listed all matching ``lines'' from a binary file, it
|
||||
would probably generate output that is not useful, and it might even
|
||||
@ -1841,8 +2066,7 @@ to output lines even from files that appear to be binary, use the
|
||||
@option{-a} or @samp{--binary-files=text} option.
|
||||
To eliminate the
|
||||
``Binary file matches'' messages, use the @option{-I} or
|
||||
@samp{--binary-files=without-match} option,
|
||||
or the @option{-s} or @option{--no-messages} option.
|
||||
@samp{--binary-files=without-match} option.
|
||||
|
||||
@item
|
||||
Why doesn't @samp{grep -lv} print non-matching file names?
|
||||
@ -1874,7 +2098,10 @@ other patterns cause @command{grep} to match every line.
|
||||
|
||||
To match empty lines, use the pattern @samp{^$}. To match blank
|
||||
lines, use the pattern @samp{^[[:blank:]]*$}. To match no lines at
|
||||
all, use the command @samp{grep -f /dev/null}.
|
||||
all, use an extended regular expression like @samp{a^} or @samp{$a}.
|
||||
To match every line, a portable script should use a pattern like
|
||||
@samp{^} instead of the empty pattern, as POSIX does not specify the
|
||||
behavior of the empty pattern.
|
||||
|
||||
@item
|
||||
How can I search in both standard input and in files?
|
||||
@ -1885,6 +2112,22 @@ Use the special file name @samp{-}:
|
||||
cat /etc/passwd | grep 'alain' - /etc/motd
|
||||
@end example
|
||||
|
||||
@item
|
||||
Why can't I combine the shell's @samp{set -e} with @command{grep}?
|
||||
|
||||
The @command{grep} command follows the convention of programs like
|
||||
@command{cmp} and @command{diff} where an exit status of 1 is not an
|
||||
error. The shell command @samp{set -e} causes the shell to exit if
|
||||
any subcommand exits with nonzero status, and this will cause the
|
||||
shell to exit merely because @command{grep} selected no lines,
|
||||
which is ordinarily not what you want.
|
||||
|
||||
There is a related problem with Bash's @command{set -e -o pipefail}.
|
||||
Since @command{grep} does not always read all its input, a command
|
||||
outputting to a pipe read by @command{grep} can fail when
|
||||
@command{grep} exits before reading all its input, and the command's
|
||||
failure can cause Bash to exit.
|
||||
|
||||
@item
|
||||
Why is this back-reference failing?
|
||||
|
||||
@ -1918,7 +2161,7 @@ before giving it to @command{grep}, or turn to @command{awk},
|
||||
designed to operate across lines.
|
||||
|
||||
@item
|
||||
What do @command{grep}, @command{fgrep}, and @command{egrep} stand for?
|
||||
What do @command{grep}, @option{-E}, and @option{-F} stand for?
|
||||
|
||||
The name @command{grep} comes from the way line editing was done on Unix.
|
||||
For example,
|
||||
@ -1930,8 +2173,29 @@ global/regular expression/print
|
||||
g/re/p
|
||||
@end example
|
||||
|
||||
@command{fgrep} stands for Fixed @command{grep};
|
||||
@command{egrep} stands for Extended @command{grep}.
|
||||
The @option{-E} option stands for Extended @command{grep}.
|
||||
The @option{-F} option stands for Fixed @command{grep};
|
||||
|
||||
@item
|
||||
What happened to @command{egrep} and @command{fgrep}?
|
||||
|
||||
7th Edition Unix had commands @command{egrep} and @command{fgrep}
|
||||
that were the counterparts of the modern @samp{grep -E} and @samp{grep -F}.
|
||||
Although breaking up @command{grep} into three programs was perhaps
|
||||
useful on the small computers of the 1970s, @command{egrep} and
|
||||
@command{fgrep} were deemed obsolescent by POSIX in 1992,
|
||||
removed from POSIX in 2001, deprecated by GNU Grep 2.5.3 in 2007,
|
||||
and changed to issue obsolescence warnings by GNU Grep 3.8 in 2022;
|
||||
eventually, they are planned to be removed entirely.
|
||||
|
||||
If you prefer the old names, you can use your own substitutes,
|
||||
such as a shell script named @command{egrep} with the following
|
||||
contents:
|
||||
|
||||
@example
|
||||
#!/bin/sh
|
||||
exec grep -E "$@@"
|
||||
@end example
|
||||
|
||||
@end enumerate
|
||||
|
||||
@ -1968,6 +2232,17 @@ bracket expressions like @samp{[a-z]} and @samp{[[=a=]b]}, can be
|
||||
surprisingly inefficient due to difficulties in fast portable access to
|
||||
concepts like multi-character collating elements.
|
||||
|
||||
@cindex interval expressions
|
||||
Interval expressions may be implemented internally via repetition.
|
||||
For example, @samp{^(a|bc)@{2,4@}$} might be implemented as
|
||||
@samp{^(a|bc)(a|bc)((a|bc)(a|bc)?)?$}. A large repetition count may
|
||||
exhaust memory or greatly slow matching. Even small counts can cause
|
||||
problems if cascaded; for example, @samp{grep -E
|
||||
".*@{10,@}@{10,@}@{10,@}@{10,@}@{10,@}"} is likely to overflow a
|
||||
stack. Fortunately, regular expressions like these are typically
|
||||
artificial, and cascaded repetitions do not conform to POSIX so cannot
|
||||
be used in portable programs anyway.
|
||||
|
||||
@cindex back-references
|
||||
A back-reference such as @samp{\1} can hurt performance significantly
|
||||
in some cases, since back-references cannot in general be implemented
|
||||
@ -1989,6 +2264,14 @@ needing to read the zeros. This optimization is not available if the
|
||||
Directory Selection}), unless the @option{-z} (@option{--null-data})
|
||||
option is also used (@pxref{Other Options}).
|
||||
|
||||
@cindex pipelines and reading
|
||||
For efficiency @command{grep} does not always read all its input.
|
||||
For example, the shell command @samp{sed '/^...$/d' | grep -q X} can
|
||||
cause @command{grep} to exit immediately after reading a line
|
||||
containing @samp{X}, without bothering to read the rest of its input data.
|
||||
This in turn can cause @command{sed} to exit with a nonzero status because
|
||||
@command{sed} cannot write to its output pipe after @command{grep} exits.
|
||||
|
||||
For more about the algorithms used by @command{grep} and about
|
||||
related string matching algorithms, see:
|
||||
|
||||
@ -2004,22 +2287,37 @@ used by @command{grep}.
|
||||
@item
|
||||
Aho AV, Corasick MJ. Efficient string matching: an aid to bibliographic search.
|
||||
@emph{CACM}. 1975;18(6):333--40.
|
||||
@url{https://dx.doi.org/10.1145/360825.360855}.
|
||||
@url{https://doi.org/10.1145/360825.360855}.
|
||||
This introduces the Aho--Corasick algorithm.
|
||||
|
||||
@item
|
||||
Boyer RS, Moore JS. A fast string searching algorithm.
|
||||
@emph{CACM}. 1977;20(10):762--72.
|
||||
@url{https://dx.doi.org/10.1145/359842.359859}.
|
||||
@url{https://doi.org/10.1145/359842.359859}.
|
||||
This introduces the Boyer--Moore algorithm.
|
||||
|
||||
@item
|
||||
Faro S, Lecroq T. The exact online string matching problem: a review
|
||||
of the most recent results.
|
||||
@emph{ACM Comput Surv}. 2013;45(2):13.
|
||||
@url{https://dx.doi.org/10.1145/2431211.2431212}.
|
||||
@url{https://doi.org/10.1145/2431211.2431212}.
|
||||
This surveys string matching algorithms that might help improve the
|
||||
performance of @command{grep} in the future.
|
||||
|
||||
@item
|
||||
Hakak SI, Kamsin A, Shivakumara P, Gilkar GA, Khan WZ, Imran M.
|
||||
Exact string matching algorithms: survey issues, and future research directions.
|
||||
@emph{IEEE Access}. 2019;7:69614--37.
|
||||
@url{https://doi.org/10.1109/ACCESS.2019.2914071}.
|
||||
This survey is more recent than Faro & Lecroq,
|
||||
and focuses on taxonomy instead of performance.
|
||||
|
||||
@item
|
||||
Hume A, Sunday D. Fast string search.
|
||||
@emph{Software Pract Exper}. 1991;21(11):1221--48.
|
||||
@url{https://doi.org/10.1002/spe.4380211105}.
|
||||
This excellent albeit now-dated survey aided the initial development
|
||||
of @command{grep}.
|
||||
@end itemize
|
||||
@frenchspacing off
|
||||
|
||||
|
||||
2
gnulib
2
gnulib
@ -1 +1 @@
|
||||
Subproject commit 1cb09be022ba08d3794fbff6e0e49816c9a4056e
|
||||
Subproject commit 4f6ac2c3c689cd7312b5f9da97791b14bbc2ee53
|
||||
@ -1,4 +1,3 @@
|
||||
AM_CFLAGS =
|
||||
include gnulib.mk
|
||||
AM_CFLAGS = $(GNULIB_TEST_WARN_CFLAGS) $(WERROR_CFLAGS)
|
||||
|
||||
AM_CFLAGS += $(GNULIB_TEST_WARN_CFLAGS) $(WERROR_CFLAGS)
|
||||
include gnulib.mk
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright 1997-1998, 2005-2021 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Output colorization.
|
||||
Copyright 2011-2021 Free Software Foundation, Inc.
|
||||
Copyright 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Without this pragma, gcc 4.7.0 20120102 suggests that the
|
||||
init_colorize function might be candidate for attribute 'const' */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Output colorization on MS-Windows.
|
||||
Copyright 2011-2021 Free Software Foundation, Inc.
|
||||
Copyright 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Eli Zaretskii. */
|
||||
|
||||
@ -96,7 +94,7 @@ w32_sgr2attr (const char *sgr_seq)
|
||||
{
|
||||
if (*p == ';' || *p == '\0')
|
||||
{
|
||||
code = strtol (s, NULL, 10);
|
||||
code = strtol (s, nullptr, 10);
|
||||
s = p + (*p != '\0');
|
||||
|
||||
switch (code)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* Output colorization.
|
||||
|
||||
Copyright 2011-2021 Free Software Foundation, Inc.
|
||||
Copyright 2011-2026 Free Software Foundation, Inc.
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
extern int should_colorize (void);
|
||||
extern void init_colorize (void);
|
||||
|
||||
54
m4/pcre.m4
54
m4/pcre.m4
@ -1,6 +1,6 @@
|
||||
# pcre.m4 - check for libpcre support
|
||||
# pcre.m4 - check for PCRE library support
|
||||
|
||||
# Copyright (C) 2010-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2010-2026 Free Software Foundation, Inc.
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE],
|
||||
[
|
||||
AC_ARG_ENABLE([perl-regexp],
|
||||
AS_HELP_STRING([--disable-perl-regexp],
|
||||
[disable perl-regexp (pcre) support]),
|
||||
[disable perl-regexp (PCRE) support]),
|
||||
[case $enableval in
|
||||
yes|no) test_pcre=$enableval;;
|
||||
*) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);;
|
||||
@ -21,36 +21,54 @@ AC_DEFUN([gl_FUNC_PCRE],
|
||||
use_pcre=no
|
||||
|
||||
if test $test_pcre != no; then
|
||||
PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}])
|
||||
|
||||
AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile],
|
||||
AS_CASE([${PCRE_CFLAGS+set}@${PCRE_LIBS+set}@$PKG_CONFIG],
|
||||
[@@false], [],
|
||||
[@@*], [PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [:])])
|
||||
|
||||
AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile],
|
||||
[pcre_saved_CFLAGS=$CFLAGS
|
||||
pcre_saved_LIBS=$LIBS
|
||||
CFLAGS="$CFLAGS $PCRE_CFLAGS"
|
||||
LIBS="$PCRE_LIBS $LIBS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <pcre.h>
|
||||
]],
|
||||
[[pcre *p = pcre_compile (0, 0, 0, 0, 0);
|
||||
return !p;]])],
|
||||
[pcre_cv_have_pcre_compile=yes],
|
||||
[pcre_cv_have_pcre_compile=no])
|
||||
pcre_cv_have_pcre2_compile=no
|
||||
|
||||
while
|
||||
CFLAGS="$pcre_saved_CFLAGS $PCRE_CFLAGS"
|
||||
LIBS="$pcre_saved_LIBS $PCRE_LIBS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
]],
|
||||
[[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0);
|
||||
return !p;]])],
|
||||
[pcre_cv_have_pcre2_compile=yes])
|
||||
test $pcre_cv_have_pcre2_compile = no
|
||||
do
|
||||
AS_CASE([$PCRE_CFLAGS@$PCRE_LIBS],
|
||||
[@-lpcre2-8],
|
||||
[# Even the fallback setting fails; give up.
|
||||
PCRE_LIBS=
|
||||
break])
|
||||
# Fallback setting.
|
||||
PCRE_CFLAGS=
|
||||
PCRE_LIBS=-lpcre2-8
|
||||
done
|
||||
|
||||
CFLAGS=$pcre_saved_CFLAGS
|
||||
LIBS=$pcre_saved_LIBS])
|
||||
|
||||
if test "$pcre_cv_have_pcre_compile" = yes; then
|
||||
if test "$pcre_cv_have_pcre2_compile" = yes; then
|
||||
use_pcre=yes
|
||||
elif test $test_pcre = maybe; then
|
||||
AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.])
|
||||
AC_MSG_WARN([AC_PACKAGE_NAME will be built without PCRE support.])
|
||||
else
|
||||
AC_MSG_ERROR([pcre support not available])
|
||||
AC_MSG_ERROR([PCRE support not available])
|
||||
fi
|
||||
fi
|
||||
|
||||
if test $use_pcre = yes; then
|
||||
AC_DEFINE([HAVE_LIBPCRE], [1],
|
||||
[Define to 1 if you have the Perl Compatible Regular Expressions
|
||||
library (-lpcre).])
|
||||
library.])
|
||||
else
|
||||
PCRE_CFLAGS=
|
||||
PCRE_LIBS=
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# List of files which containing translatable strings.
|
||||
#
|
||||
# Copyright 1997-1998, 2005-2021 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -29,5 +29,6 @@ lib/quotearg.c
|
||||
lib/regcomp.c
|
||||
lib/version-etc.c
|
||||
lib/xalloc-die.c
|
||||
src/dfasearch.c
|
||||
src/grep.c
|
||||
src/pcresearch.c
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
## Process this file with automake to create Makefile.in
|
||||
# Copyright 1997-1998, 2005-2021 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -28,13 +28,12 @@ grep_SOURCES = \
|
||||
die.h \
|
||||
grep.c \
|
||||
kwsearch.c \
|
||||
kwset.c \
|
||||
searchutils.c
|
||||
if USE_PCRE
|
||||
grep_SOURCES += pcresearch.c
|
||||
endif
|
||||
|
||||
noinst_HEADERS = grep.h kwset.h search.h system.h
|
||||
noinst_HEADERS = grep.h search.h system.h
|
||||
|
||||
# Sometimes, the expansion of $(LIBINTL) includes -lc which may
|
||||
# include modules defining variables like 'optind', so libgreputils.a
|
||||
@ -42,7 +41,9 @@ noinst_HEADERS = grep.h kwset.h search.h system.h
|
||||
# But libgreputils.a must also follow $(LIBINTL), since libintl uses
|
||||
# replacement functions defined in libgreputils.a.
|
||||
LDADD = \
|
||||
../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a $(LIBICONV) \
|
||||
../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a \
|
||||
$(HARD_LOCALE_LIB) $(LIBC32CONV) \
|
||||
$(LIBSIGSEGV) $(LIBUNISTRING) $(MBRTOWC_LIB) $(SETLOCALE_NULL_LIB) \
|
||||
$(LIBTHREAD)
|
||||
|
||||
grep_LDADD = $(LDADD) $(PCRE_LIBS) $(LIBCSTACK)
|
||||
@ -54,11 +55,11 @@ EXTRA_DIST = egrep.sh
|
||||
egrep fgrep: egrep.sh Makefile
|
||||
$(AM_V_GEN)grep=`echo grep | sed -e '$(transform)'` && \
|
||||
case $@ in egrep) option=-E;; fgrep) option=-F;; esac && \
|
||||
shell_does_substrings='set x/y && d=$${1%/*} && test "$$d" = x' && \
|
||||
shell_does_substrings='set x/y && d=$${1##*/} && test "$$d" = y' && \
|
||||
if $(SHELL) -c "$$shell_does_substrings" 2>/dev/null; then \
|
||||
edit_substring='s,X,X,'; \
|
||||
else \
|
||||
edit_substring='s,\$${0%/\*},`expr "X$$0" : '\''X\\(.*\\)/'\''`,g'; \
|
||||
edit_substring='s,\$${0##\*/},`expr "X$$0" : '\''X\\(.*\\)/'\''`,g'; \
|
||||
fi && \
|
||||
sed -e 's|[@]SHELL@|$(SHELL)|g' \
|
||||
-e "$$edit_substring" \
|
||||
|
||||
155
src/dfasearch.c
155
src/dfasearch.c
@ -1,5 +1,5 @@
|
||||
/* dfasearch.c - searching subroutines using dfa and regex for grep.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,15 +12,13 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written August 1992 by Mike Haertel. */
|
||||
|
||||
#include <config.h>
|
||||
#include "intprops.h"
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
#include "die.h"
|
||||
#include <error.h>
|
||||
|
||||
@ -36,13 +34,13 @@ struct dfa_comp
|
||||
|
||||
/* Regex compiled regexps. */
|
||||
struct re_pattern_buffer *patterns;
|
||||
size_t pcount;
|
||||
idx_t pcount;
|
||||
struct re_registers regs;
|
||||
|
||||
/* Number of compiled fixed strings known to exactly match the regexp.
|
||||
If kwsexec returns < kwset_exact_matches, then we don't need to
|
||||
call the regexp matcher at all. */
|
||||
ptrdiff_t kwset_exact_matches;
|
||||
idx_t kwset_exact_matches;
|
||||
|
||||
bool begline;
|
||||
};
|
||||
@ -53,14 +51,10 @@ dfaerror (char const *mesg)
|
||||
die (EXIT_TROUBLE, 0, "%s", mesg);
|
||||
}
|
||||
|
||||
/* For now, the sole dfawarn-eliciting condition (use of a regexp
|
||||
like '[:lower:]') is unequivocally an error, so treat it as such,
|
||||
when possible. */
|
||||
void
|
||||
dfawarn (char const *mesg)
|
||||
{
|
||||
if (!getenv ("POSIXLY_CORRECT"))
|
||||
dfaerror (mesg);
|
||||
error (0, 0, _("warning: %s"), mesg);
|
||||
}
|
||||
|
||||
/* If the DFA turns out to have some set of fixed strings one of
|
||||
@ -80,9 +74,9 @@ kwsmusts (struct dfa_comp *dc)
|
||||
The kwset matcher will return the index of the matching
|
||||
string that it chooses. */
|
||||
++dc->kwset_exact_matches;
|
||||
ptrdiff_t old_len = strlen (dm->must);
|
||||
ptrdiff_t new_len = old_len + dm->begline + dm->endline;
|
||||
char *must = xmalloc (new_len);
|
||||
idx_t old_len = strlen (dm->must);
|
||||
idx_t new_len = old_len + dm->begline + dm->endline;
|
||||
char *must = ximalloc (new_len);
|
||||
char *mp = must;
|
||||
*mp = eolbyte;
|
||||
mp += dm->begline;
|
||||
@ -108,7 +102,7 @@ kwsmusts (struct dfa_comp *dc)
|
||||
BS_SAFE is true of encodings where a backslash cannot appear as the
|
||||
last byte of a multibyte character. */
|
||||
static bool _GL_ATTRIBUTE_PURE
|
||||
possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
|
||||
possible_backrefs_in_pattern (char const *keys, idx_t len, bool bs_safe)
|
||||
{
|
||||
/* Normally a backslash, but in an unsafe encoding this is a non-char
|
||||
value so that the comparison below always fails, because if there
|
||||
@ -144,38 +138,51 @@ possible_backrefs_in_pattern (char const *keys, ptrdiff_t len, bool bs_safe)
|
||||
}
|
||||
|
||||
static bool
|
||||
regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
|
||||
ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
|
||||
regex_compile (struct dfa_comp *dc, char const *p, idx_t len,
|
||||
idx_t pcount, idx_t lineno, reg_syntax_t syntax_bits,
|
||||
bool syntax_only)
|
||||
{
|
||||
struct re_pattern_buffer pat0;
|
||||
struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount];
|
||||
pat->buffer = NULL;
|
||||
pat->allocated = 0;
|
||||
struct re_pattern_buffer pat;
|
||||
pat.buffer = nullptr;
|
||||
pat.allocated = 0;
|
||||
|
||||
/* Do not use a fastmap with -i, to work around glibc Bug#20381. */
|
||||
pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1);
|
||||
static_assert (UCHAR_MAX < IDX_MAX);
|
||||
idx_t uchar_max = UCHAR_MAX;
|
||||
pat.fastmap = syntax_only | match_icase ? nullptr : ximalloc (uchar_max + 1);
|
||||
|
||||
pat->translate = NULL;
|
||||
pat.translate = nullptr;
|
||||
|
||||
if (syntax_only)
|
||||
re_set_syntax (syntax_bits | RE_NO_SUB);
|
||||
else
|
||||
re_set_syntax (syntax_bits);
|
||||
|
||||
char const *err = re_compile_pattern (p, len, pat);
|
||||
char const *err = re_compile_pattern (p, len, &pat);
|
||||
if (!err)
|
||||
return true;
|
||||
{
|
||||
if (syntax_only)
|
||||
regfree (&pat);
|
||||
else
|
||||
dc->patterns[pcount] = pat;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
free (pat.fastmap);
|
||||
|
||||
/* Emit a filename:lineno: prefix for patterns taken from files. */
|
||||
size_t pat_lineno;
|
||||
idx_t pat_lineno;
|
||||
char const *pat_filename
|
||||
= lineno < 0 ? "" : pattern_file_name (lineno, &pat_lineno);
|
||||
|
||||
if (*pat_filename == '\0')
|
||||
error (0, 0, "%s", err);
|
||||
else
|
||||
error (0, 0, "%s:%zu: %s", pat_filename, pat_lineno, err);
|
||||
{
|
||||
ptrdiff_t n = pat_lineno;
|
||||
error (0, 0, "%s:%td: %s", pat_filename, n, err);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -185,7 +192,7 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
|
||||
Return a description of the compiled pattern. */
|
||||
|
||||
void *
|
||||
GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits,
|
||||
bool exact)
|
||||
{
|
||||
char *motif;
|
||||
@ -195,7 +202,10 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
|
||||
if (match_icase)
|
||||
syntax_bits |= RE_ICASE;
|
||||
int dfaopts = eolbyte ? 0 : DFA_EOL_NUL;
|
||||
int dfaopts = (DFA_CONFUSING_BRACKETS_ERROR | DFA_STRAY_BACKSLASH_WARN
|
||||
| DFA_PLUS_WARN
|
||||
| (syntax_bits & RE_CONTEXT_INDEP_OPS ? DFA_STAR_WARN : 0)
|
||||
| (eolbyte ? 0 : DFA_EOL_NUL));
|
||||
dfasyntax (dc->dfa, &localeinfo, syntax_bits, dfaopts);
|
||||
bool bs_safe = !localeinfo.multibyte | localeinfo.using_utf8;
|
||||
|
||||
@ -210,29 +220,30 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
dc->patterns = xmalloc (sizeof *dc->patterns);
|
||||
dc->patterns++;
|
||||
dc->pcount = 0;
|
||||
size_t palloc = 1;
|
||||
idx_t palloc = 1;
|
||||
|
||||
char const *prev = pattern;
|
||||
|
||||
/* Buffer containing back-reference-free patterns. */
|
||||
char *buf = NULL;
|
||||
ptrdiff_t buflen = 0;
|
||||
size_t bufalloc = 0;
|
||||
char *buf = nullptr;
|
||||
idx_t buflen = 0;
|
||||
idx_t bufalloc = 0;
|
||||
|
||||
ptrdiff_t lineno = 0;
|
||||
idx_t lineno = 0;
|
||||
|
||||
do
|
||||
{
|
||||
char const *sep = rawmemchr (p, '\n');
|
||||
ptrdiff_t len = sep - p;
|
||||
idx_t len = sep - p;
|
||||
|
||||
bool backref = possible_backrefs_in_pattern (p, len, bs_safe);
|
||||
|
||||
if (backref && prev < p)
|
||||
{
|
||||
ptrdiff_t prevlen = p - prev;
|
||||
while (bufalloc < buflen + prevlen)
|
||||
buf = x2realloc (buf, &bufalloc);
|
||||
idx_t prevlen = p - prev;
|
||||
ptrdiff_t bufshortage = buflen - bufalloc + prevlen;
|
||||
if (0 < bufshortage)
|
||||
buf = xpalloc (buf, &bufalloc, bufshortage, -1, 1);
|
||||
memcpy (buf + buflen, prev, prevlen);
|
||||
buflen += prevlen;
|
||||
}
|
||||
@ -240,15 +251,14 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
/* Ensure room for at least two more patterns. The extra one is
|
||||
for the regex_compile that may be executed after this loop
|
||||
exits, and its (unused) slot is patterns[-1] until then. */
|
||||
while (palloc <= dc->pcount + 1)
|
||||
ptrdiff_t shortage = dc->pcount - palloc + 2;
|
||||
if (0 < shortage)
|
||||
{
|
||||
dc->patterns = x2nrealloc (dc->patterns - 1, &palloc,
|
||||
sizeof *dc->patterns);
|
||||
dc->patterns = xpalloc (dc->patterns - 1, &palloc, shortage, -1,
|
||||
sizeof *dc->patterns);
|
||||
dc->patterns++;
|
||||
}
|
||||
|
||||
re_set_syntax (syntax_bits);
|
||||
|
||||
if (!regex_compile (dc, p, len, dc->pcount, lineno, syntax_bits,
|
||||
!backref))
|
||||
compilation_failed = true;
|
||||
@ -267,20 +277,19 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
if (compilation_failed)
|
||||
exit (EXIT_TROUBLE);
|
||||
|
||||
if (prev <= patlim)
|
||||
if (patlim < prev)
|
||||
buflen--;
|
||||
else if (pattern < prev)
|
||||
{
|
||||
if (pattern < prev)
|
||||
{
|
||||
ptrdiff_t prevlen = patlim - prev;
|
||||
buf = xrealloc (buf, buflen + prevlen);
|
||||
memcpy (buf + buflen, prev, prevlen);
|
||||
buflen += prevlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = pattern;
|
||||
buflen = size;
|
||||
}
|
||||
idx_t prevlen = patlim - prev;
|
||||
buf = xirealloc (buf, buflen + prevlen);
|
||||
memcpy (buf + buflen, prev, prevlen);
|
||||
buflen += prevlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = pattern;
|
||||
buflen = size;
|
||||
}
|
||||
|
||||
/* In the match_words and match_lines cases, we use a different pattern
|
||||
@ -298,11 +307,12 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
|
||||
static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
|
||||
int bk = !(syntax_bits & RE_NO_BK_PARENS);
|
||||
char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);
|
||||
idx_t bracket_bytes = sizeof word_beg_bk - 1 + sizeof word_end_bk;
|
||||
char *n = ximalloc (size + bracket_bytes);
|
||||
|
||||
strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
|
||||
: (bk ? word_beg_bk : word_beg_no_bk));
|
||||
size_t total = strlen (n);
|
||||
idx_t total = strlen (n);
|
||||
memcpy (n + total, pattern, size);
|
||||
total += size;
|
||||
strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
|
||||
@ -312,13 +322,13 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
size = total;
|
||||
}
|
||||
else
|
||||
motif = NULL;
|
||||
motif = nullptr;
|
||||
|
||||
dfaparse (pattern, size, dc->dfa);
|
||||
kwsmusts (dc);
|
||||
dfacomp (NULL, 0, dc->dfa, 1);
|
||||
dfacomp (nullptr, 0, dc->dfa, 1);
|
||||
|
||||
if (buf != NULL)
|
||||
if (buf)
|
||||
{
|
||||
if (exact || !dfasupported (dc->dfa))
|
||||
{
|
||||
@ -338,16 +348,16 @@ GEAcompile (char *pattern, size_t size, reg_syntax_t syntax_bits,
|
||||
return dc;
|
||||
}
|
||||
|
||||
size_t
|
||||
EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
||||
ptrdiff_t
|
||||
EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
|
||||
char const *start_ptr)
|
||||
{
|
||||
char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start;
|
||||
char eol = eolbyte;
|
||||
regoff_t start;
|
||||
size_t len, best_len;
|
||||
idx_t len, best_len;
|
||||
struct kwsmatch kwsm;
|
||||
size_t i;
|
||||
idx_t i;
|
||||
struct dfa_comp *dc = vdc;
|
||||
struct dfa *superset = dfasuperset (dc->dfa);
|
||||
bool dfafast = dfaisfast (dc->dfa);
|
||||
@ -362,7 +372,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
||||
if (!start_ptr)
|
||||
{
|
||||
char const *next_beg, *dfa_beg = beg;
|
||||
ptrdiff_t count = 0;
|
||||
idx_t count = 0;
|
||||
bool exact_kwset_match = false;
|
||||
bool backref = false;
|
||||
|
||||
@ -412,7 +422,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
||||
goto success;
|
||||
if (mb_start < beg)
|
||||
mb_start = beg;
|
||||
if (mb_goback (&mb_start, NULL, match, buflim) == 0)
|
||||
if (mb_goback (&mb_start, nullptr, match, buflim) == 0)
|
||||
goto success;
|
||||
/* The matched line starts in the middle of a multibyte
|
||||
character. Perform the DFA search starting from the
|
||||
@ -428,8 +438,8 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
||||
potential matches; this is more likely to be fast
|
||||
than falling back to KWset would be. */
|
||||
next_beg = dfaexec (superset, dfa_beg, (char *) end, 0,
|
||||
&count, NULL);
|
||||
if (next_beg == NULL || next_beg == end)
|
||||
&count, nullptr);
|
||||
if (!next_beg || next_beg == end)
|
||||
continue;
|
||||
|
||||
/* Narrow down to the line we've found. */
|
||||
@ -451,7 +461,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
||||
|
||||
/* If there's no match, or if we've matched the sentinel,
|
||||
we're done. */
|
||||
if (next_beg == NULL || next_beg == end)
|
||||
if (!next_beg || next_beg == end)
|
||||
continue;
|
||||
|
||||
/* Narrow down to the line we've found. */
|
||||
@ -584,7 +594,6 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
|
||||
success:
|
||||
len = end - beg;
|
||||
success_in_len:;
|
||||
size_t off = beg - buf;
|
||||
*match_size = len;
|
||||
return off;
|
||||
return beg - buf;
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Report an error and exit.
|
||||
Copyright 2016-2021 Free Software Foundation, Inc.
|
||||
Copyright 2016-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,15 +12,12 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef DIE_H
|
||||
#define DIE_H
|
||||
|
||||
#include <error.h>
|
||||
#include <stdbool.h>
|
||||
#include <verify.h>
|
||||
|
||||
/* Like 'error (STATUS, ...)', except STATUS must be a nonzero constant.
|
||||
|
||||
@ -1,2 +1,4 @@
|
||||
#!@SHELL@
|
||||
cmd=${0##*/}
|
||||
echo "$cmd: warning: $cmd is obsolescent; using @grep@ @option@" >&2
|
||||
exec @grep@ @option@ "$@"
|
||||
|
||||
632
src/grep.c
632
src/grep.c
File diff suppressed because it is too large
Load Diff
10
src/grep.h
10
src/grep.h
@ -1,5 +1,5 @@
|
||||
/* grep.h - interface to grep driver for searching subroutines.
|
||||
Copyright (C) 1992, 1998, 2001, 2007, 2009-2021 Free Software Foundation,
|
||||
Copyright (C) 1992, 1998, 2001, 2007, 2009-2026 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -13,14 +13,12 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GREP_GREP_H
|
||||
#define GREP_GREP_H 1
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <idx.h>
|
||||
|
||||
/* The following flags are exported from grep for the matchers
|
||||
to look at. */
|
||||
@ -29,6 +27,6 @@ extern bool match_words; /* -w */
|
||||
extern bool match_lines; /* -x */
|
||||
extern char eolbyte; /* -z */
|
||||
|
||||
extern char const *pattern_file_name (size_t, size_t *);
|
||||
extern char const *pattern_file_name (idx_t, idx_t *);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* kwsearch.c - searching subroutines using kwset for grep.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,14 +12,12 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written August 1992 by Mike Haertel. */
|
||||
|
||||
#include <config.h>
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
|
||||
/* A compiled -F pattern list. */
|
||||
|
||||
@ -32,11 +30,11 @@ struct kwsearch
|
||||
'kwswords (kwset)' when some extra one-character words have been
|
||||
appended, one for each troublesome character that will require a
|
||||
DFA search. */
|
||||
ptrdiff_t words;
|
||||
idx_t words;
|
||||
|
||||
/* The user's pattern and its size in bytes. */
|
||||
char *pattern;
|
||||
size_t size;
|
||||
idx_t size;
|
||||
|
||||
/* The user's pattern compiled as a regular expression,
|
||||
or null if it has not been compiled. */
|
||||
@ -47,11 +45,11 @@ struct kwsearch
|
||||
followed by '\n'. Return a description of the compiled pattern. */
|
||||
|
||||
void *
|
||||
Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
{
|
||||
kwset_t kwset;
|
||||
char *buf = NULL;
|
||||
size_t bufalloc = 0;
|
||||
char *buf = nullptr;
|
||||
idx_t bufalloc = 0;
|
||||
|
||||
kwset = kwsinit (true);
|
||||
|
||||
@ -59,7 +57,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
do
|
||||
{
|
||||
char const *sep = rawmemchr (p, '\n');
|
||||
ptrdiff_t len = sep - p;
|
||||
idx_t len = sep - p;
|
||||
|
||||
if (match_lines)
|
||||
{
|
||||
@ -70,8 +68,8 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
if (bufalloc < len + 2)
|
||||
{
|
||||
free (buf);
|
||||
bufalloc = len + 2;
|
||||
buf = x2realloc (NULL, &bufalloc);
|
||||
bufalloc = len;
|
||||
buf = xpalloc (nullptr, &bufalloc, 2, -1, 1);
|
||||
buf[0] = eolbyte;
|
||||
}
|
||||
memcpy (buf + 1, p, len);
|
||||
@ -88,7 +86,7 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
|
||||
free (buf);
|
||||
|
||||
ptrdiff_t words = kwswords (kwset);
|
||||
idx_t words = kwswords (kwset);
|
||||
kwsprep (kwset);
|
||||
|
||||
struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch);
|
||||
@ -96,20 +94,20 @@ Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
kwsearch->words = words;
|
||||
kwsearch->pattern = pattern;
|
||||
kwsearch->size = size;
|
||||
kwsearch->re = NULL;
|
||||
kwsearch->re = nullptr;
|
||||
return kwsearch;
|
||||
}
|
||||
|
||||
/* Use the compiled pattern VCP to search the buffer BUF of size SIZE.
|
||||
If found, return the offset of the first match and store its
|
||||
size into *MATCH_SIZE. If not found, return SIZE_MAX.
|
||||
size into *MATCH_SIZE. If not found, return -1.
|
||||
If START_PTR is nonnull, start searching there. */
|
||||
size_t
|
||||
Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
ptrdiff_t
|
||||
Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
|
||||
char const *start_ptr)
|
||||
{
|
||||
char const *beg, *end, *mb_start;
|
||||
ptrdiff_t len;
|
||||
idx_t len;
|
||||
char eol = eolbyte;
|
||||
struct kwsearch *kwsearch = vcp;
|
||||
kwset_t kwset = kwsearch->kwset;
|
||||
@ -126,7 +124,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
break;
|
||||
len = kwsmatch.size - 2 * match_lines;
|
||||
|
||||
size_t mbclen = 0;
|
||||
idx_t mbclen = 0;
|
||||
if (mb_check
|
||||
&& mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
|
||||
{
|
||||
@ -151,7 +149,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
goto success_in_beg_and_len;
|
||||
if (match_lines)
|
||||
{
|
||||
len += start_ptr == NULL;
|
||||
len += !start_ptr;
|
||||
goto success_in_beg_and_len;
|
||||
}
|
||||
if (! match_words)
|
||||
@ -198,8 +196,8 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
else
|
||||
end = buf + size;
|
||||
|
||||
if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
|
||||
!= (size_t) -1)
|
||||
if (0 <= EGexecute (kwsearch->re, beg, end - beg,
|
||||
match_size, nullptr))
|
||||
goto success_match_words;
|
||||
beg = end - 1;
|
||||
break;
|
||||
|
||||
929
src/kwset.c
929
src/kwset.c
@ -1,929 +0,0 @@
|
||||
/* kwset.c - search for any of a set of keywords.
|
||||
Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2021 Free Software
|
||||
Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
|
||||
/* Written August 1989 by Mike Haertel. */
|
||||
|
||||
/* For the Aho-Corasick algorithm, see:
|
||||
Aho AV, Corasick MJ. Efficient string matching: an aid to
|
||||
bibliographic search. CACM 18, 6 (1975), 333-40
|
||||
<https://dx.doi.org/10.1145/360825.360855>, which describes the
|
||||
failure function used below.
|
||||
|
||||
For the Boyer-Moore algorithm, see: Boyer RS, Moore JS.
|
||||
A fast string searching algorithm. CACM 20, 10 (1977), 762-72
|
||||
<https://dx.doi.org/10.1145/359842.359859>.
|
||||
|
||||
For a survey of more-recent string matching algorithms that might
|
||||
help improve performance, see: Faro S, Lecroq T. The exact online
|
||||
string matching problem: a review of the most recent results.
|
||||
ACM Computing Surveys 45, 2 (2013), 13
|
||||
<https://dx.doi.org/10.1145/2431211.2431212>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include "kwset.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include "system.h"
|
||||
#include "intprops.h"
|
||||
#include "memchr2.h"
|
||||
#include "obstack.h"
|
||||
#include "xalloc.h"
|
||||
#include "verify.h"
|
||||
|
||||
#define obstack_chunk_alloc xmalloc
|
||||
#define obstack_chunk_free free
|
||||
|
||||
static unsigned char
|
||||
U (char ch)
|
||||
{
|
||||
return to_uchar (ch);
|
||||
}
|
||||
|
||||
/* Balanced tree of edges and labels leaving a given trie node. */
|
||||
struct tree
|
||||
{
|
||||
struct tree *llink; /* Left link; MUST be first field. */
|
||||
struct tree *rlink; /* Right link (to larger labels). */
|
||||
struct trie *trie; /* Trie node pointed to by this edge. */
|
||||
unsigned char label; /* Label on this edge. */
|
||||
char balance; /* Difference in depths of subtrees. */
|
||||
};
|
||||
|
||||
/* Node of a trie representing a set of keywords. */
|
||||
struct trie
|
||||
{
|
||||
/* If an accepting node, this is either 2*W + 1 where W is the word
|
||||
index, or is SIZE_MAX if Aho-Corasick is in use and FAIL
|
||||
specifies where to look for more info. If not an accepting node,
|
||||
this is zero. */
|
||||
size_t accepting;
|
||||
|
||||
struct tree *links; /* Tree of edges leaving this node. */
|
||||
struct trie *parent; /* Parent of this node. */
|
||||
struct trie *next; /* List of all trie nodes in level order. */
|
||||
struct trie *fail; /* Aho-Corasick failure function. */
|
||||
ptrdiff_t depth; /* Depth of this node from the root. */
|
||||
ptrdiff_t shift; /* Shift function for search failures. */
|
||||
ptrdiff_t maxshift; /* Max shift of self and descendants. */
|
||||
};
|
||||
|
||||
/* Structure returned opaquely to the caller, containing everything. */
|
||||
struct kwset
|
||||
{
|
||||
struct obstack obstack; /* Obstack for node allocation. */
|
||||
ptrdiff_t words; /* Number of words in the trie. */
|
||||
struct trie *trie; /* The trie itself. */
|
||||
ptrdiff_t mind; /* Minimum depth of an accepting node. */
|
||||
ptrdiff_t maxd; /* Maximum depth of any node. */
|
||||
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
|
||||
struct trie *next[NCHAR]; /* Table of children of the root. */
|
||||
char *target; /* Target string if there's only one. */
|
||||
ptrdiff_t *shift; /* Used in Boyer-Moore search for one
|
||||
string. */
|
||||
char const *trans; /* Character translation table. */
|
||||
|
||||
/* This helps to match a terminal byte, which is the first byte
|
||||
for Aho-Corasick, and the last byte for Boyer-More. If all the
|
||||
patterns have the same terminal byte (after translation via TRANS
|
||||
if TRANS is nonnull), then this is that byte as an unsigned char.
|
||||
Otherwise this is -1 if there is disagreement among the strings
|
||||
about terminal bytes, and -2 if there are no terminal bytes and
|
||||
no disagreement because all the patterns are empty. */
|
||||
int gc1;
|
||||
|
||||
/* This helps to match a terminal byte. If 0 <= GC1HELP, B is
|
||||
terminal when B == GC1 || B == GC1HELP (note that GC1 == GCHELP
|
||||
is common here). This is typically faster than evaluating
|
||||
to_uchar (TRANS[B]) == GC1. */
|
||||
int gc1help;
|
||||
|
||||
/* If the string has two or more bytes, this is the penultimate byte,
|
||||
after translation via TRANS if TRANS is nonnull. This variable
|
||||
is used only by Boyer-Moore. */
|
||||
char gc2;
|
||||
|
||||
/* kwsexec implementation. */
|
||||
ptrdiff_t (*kwsexec) (kwset_t, char const *, ptrdiff_t,
|
||||
struct kwsmatch *, bool);
|
||||
};
|
||||
|
||||
/* Use TRANS to transliterate C. A null TRANS does no transliteration. */
|
||||
static inline char
|
||||
tr (char const *trans, char c)
|
||||
{
|
||||
return trans ? trans[U(c)] : c;
|
||||
}
|
||||
|
||||
static ptrdiff_t acexec (kwset_t, char const *, ptrdiff_t,
|
||||
struct kwsmatch *, bool);
|
||||
static ptrdiff_t bmexec (kwset_t, char const *, ptrdiff_t,
|
||||
struct kwsmatch *, bool);
|
||||
|
||||
/* Return a newly allocated keyword set. A nonnull TRANS specifies a
|
||||
table of character translations to be applied to all pattern and
|
||||
search text. */
|
||||
kwset_t
|
||||
kwsalloc (char const *trans)
|
||||
{
|
||||
struct kwset *kwset = xmalloc (sizeof *kwset);
|
||||
|
||||
obstack_init (&kwset->obstack);
|
||||
kwset->words = 0;
|
||||
kwset->trie = obstack_alloc (&kwset->obstack, sizeof *kwset->trie);
|
||||
kwset->trie->accepting = 0;
|
||||
kwset->trie->links = NULL;
|
||||
kwset->trie->parent = NULL;
|
||||
kwset->trie->next = NULL;
|
||||
kwset->trie->fail = NULL;
|
||||
kwset->trie->depth = 0;
|
||||
kwset->trie->shift = 0;
|
||||
kwset->mind = PTRDIFF_MAX;
|
||||
kwset->maxd = -1;
|
||||
kwset->target = NULL;
|
||||
kwset->trans = trans;
|
||||
kwset->kwsexec = acexec;
|
||||
|
||||
return kwset;
|
||||
}
|
||||
|
||||
/* This upper bound is valid for CHAR_BIT >= 4 and
|
||||
exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
|
||||
enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
|
||||
|
||||
/* Add the given string to the contents of the keyword set. */
|
||||
void
|
||||
kwsincr (kwset_t kwset, char const *text, ptrdiff_t len)
|
||||
{
|
||||
assume (0 <= len);
|
||||
struct trie *trie = kwset->trie;
|
||||
char const *trans = kwset->trans;
|
||||
bool reverse = kwset->kwsexec == bmexec;
|
||||
|
||||
if (reverse)
|
||||
text += len;
|
||||
|
||||
/* Descend the trie (built of keywords) character-by-character,
|
||||
installing new nodes when necessary. */
|
||||
while (len--)
|
||||
{
|
||||
unsigned char uc = reverse ? *--text : *text++;
|
||||
unsigned char label = trans ? trans[uc] : uc;
|
||||
|
||||
/* Descend the tree of outgoing links for this trie node,
|
||||
looking for the current character and keeping track
|
||||
of the path followed. */
|
||||
struct tree *cur = trie->links;
|
||||
struct tree *links[DEPTH_SIZE];
|
||||
enum { L, R } dirs[DEPTH_SIZE];
|
||||
links[0] = (struct tree *) &trie->links;
|
||||
dirs[0] = L;
|
||||
ptrdiff_t depth = 1;
|
||||
|
||||
while (cur && label != cur->label)
|
||||
{
|
||||
links[depth] = cur;
|
||||
if (label < cur->label)
|
||||
dirs[depth++] = L, cur = cur->llink;
|
||||
else
|
||||
dirs[depth++] = R, cur = cur->rlink;
|
||||
}
|
||||
|
||||
/* The current character doesn't have an outgoing link at
|
||||
this trie node, so build a new trie node and install
|
||||
a link in the current trie node's tree. */
|
||||
if (!cur)
|
||||
{
|
||||
cur = obstack_alloc (&kwset->obstack, sizeof *cur);
|
||||
cur->llink = NULL;
|
||||
cur->rlink = NULL;
|
||||
cur->trie = obstack_alloc (&kwset->obstack, sizeof *cur->trie);
|
||||
cur->trie->accepting = 0;
|
||||
cur->trie->links = NULL;
|
||||
cur->trie->parent = trie;
|
||||
cur->trie->next = NULL;
|
||||
cur->trie->fail = NULL;
|
||||
cur->trie->depth = trie->depth + 1;
|
||||
cur->trie->shift = 0;
|
||||
cur->label = label;
|
||||
cur->balance = 0;
|
||||
|
||||
/* Install the new tree node in its parent. */
|
||||
if (dirs[--depth] == L)
|
||||
links[depth]->llink = cur;
|
||||
else
|
||||
links[depth]->rlink = cur;
|
||||
|
||||
/* Back up the tree fixing the balance flags. */
|
||||
while (depth && !links[depth]->balance)
|
||||
{
|
||||
if (dirs[depth] == L)
|
||||
--links[depth]->balance;
|
||||
else
|
||||
++links[depth]->balance;
|
||||
--depth;
|
||||
}
|
||||
|
||||
/* Rebalance the tree by pointer rotations if necessary. */
|
||||
if (depth && ((dirs[depth] == L && --links[depth]->balance)
|
||||
|| (dirs[depth] == R && ++links[depth]->balance)))
|
||||
{
|
||||
struct tree *t, *r, *l, *rl, *lr;
|
||||
|
||||
switch (links[depth]->balance)
|
||||
{
|
||||
case (char) -2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case L:
|
||||
r = links[depth], t = r->llink, rl = t->rlink;
|
||||
t->rlink = r, r->llink = rl;
|
||||
t->balance = r->balance = 0;
|
||||
break;
|
||||
case R:
|
||||
r = links[depth], l = r->llink, t = l->rlink;
|
||||
rl = t->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case R:
|
||||
l = links[depth], t = l->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr;
|
||||
t->balance = l->balance = 0;
|
||||
break;
|
||||
case L:
|
||||
l = links[depth], r = l->rlink, t = r->llink;
|
||||
lr = t->llink, rl = t->rlink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (dirs[depth - 1] == L)
|
||||
links[depth - 1]->llink = t;
|
||||
else
|
||||
links[depth - 1]->rlink = t;
|
||||
}
|
||||
}
|
||||
|
||||
trie = cur->trie;
|
||||
}
|
||||
|
||||
/* Mark the node finally reached as accepting, encoding the
|
||||
index number of this word in the keyword set so far. */
|
||||
if (!trie->accepting)
|
||||
{
|
||||
size_t words = kwset->words;
|
||||
trie->accepting = 2 * words + 1;
|
||||
}
|
||||
++kwset->words;
|
||||
|
||||
/* Keep track of the longest and shortest string of the keyword set. */
|
||||
if (trie->depth < kwset->mind)
|
||||
kwset->mind = trie->depth;
|
||||
if (trie->depth > kwset->maxd)
|
||||
kwset->maxd = trie->depth;
|
||||
}
|
||||
|
||||
ptrdiff_t
|
||||
kwswords (kwset_t kwset)
|
||||
{
|
||||
return kwset->words;
|
||||
}
|
||||
|
||||
/* Enqueue the trie nodes referenced from the given tree in the
|
||||
given queue. */
|
||||
static void
|
||||
enqueue (struct tree *tree, struct trie **last)
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
enqueue (tree->llink, last);
|
||||
enqueue (tree->rlink, last);
|
||||
(*last) = (*last)->next = tree->trie;
|
||||
}
|
||||
|
||||
/* Compute the Aho-Corasick failure function for the trie nodes referenced
|
||||
from the given tree, given the failure function for their parent as
|
||||
well as a last resort failure node. */
|
||||
static void
|
||||
treefails (struct tree const *tree, struct trie const *fail,
|
||||
struct trie *recourse, bool reverse)
|
||||
{
|
||||
struct tree *cur;
|
||||
|
||||
if (!tree)
|
||||
return;
|
||||
|
||||
treefails (tree->llink, fail, recourse, reverse);
|
||||
treefails (tree->rlink, fail, recourse, reverse);
|
||||
|
||||
/* Find, in the chain of fails going back to the root, the first
|
||||
node that has a descendant on the current label. */
|
||||
while (fail)
|
||||
{
|
||||
cur = fail->links;
|
||||
while (cur && tree->label != cur->label)
|
||||
if (tree->label < cur->label)
|
||||
cur = cur->llink;
|
||||
else
|
||||
cur = cur->rlink;
|
||||
if (cur)
|
||||
{
|
||||
tree->trie->fail = cur->trie;
|
||||
if (!reverse && cur->trie->accepting && !tree->trie->accepting)
|
||||
tree->trie->accepting = SIZE_MAX;
|
||||
return;
|
||||
}
|
||||
fail = fail->fail;
|
||||
}
|
||||
|
||||
tree->trie->fail = recourse;
|
||||
}
|
||||
|
||||
/* Set delta entries for the links of the given tree such that
|
||||
the preexisting delta value is larger than the current depth. */
|
||||
static void
|
||||
treedelta (struct tree const *tree, ptrdiff_t depth, unsigned char delta[])
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treedelta (tree->llink, depth, delta);
|
||||
treedelta (tree->rlink, depth, delta);
|
||||
if (depth < delta[tree->label])
|
||||
delta[tree->label] = depth;
|
||||
}
|
||||
|
||||
/* Return true if A has every label in B. */
|
||||
static bool _GL_ATTRIBUTE_PURE
|
||||
hasevery (struct tree const *a, struct tree const *b)
|
||||
{
|
||||
if (!b)
|
||||
return true;
|
||||
if (!hasevery (a, b->llink))
|
||||
return false;
|
||||
if (!hasevery (a, b->rlink))
|
||||
return false;
|
||||
while (a && b->label != a->label)
|
||||
if (b->label < a->label)
|
||||
a = a->llink;
|
||||
else
|
||||
a = a->rlink;
|
||||
return !!a;
|
||||
}
|
||||
|
||||
/* Compute a vector, indexed by character code, of the trie nodes
|
||||
referenced from the given tree. */
|
||||
static void
|
||||
treenext (struct tree const *tree, struct trie *next[])
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treenext (tree->llink, next);
|
||||
treenext (tree->rlink, next);
|
||||
next[tree->label] = tree->trie;
|
||||
}
|
||||
|
||||
/* Prepare a built keyword set for use. */
|
||||
void
|
||||
kwsprep (kwset_t kwset)
|
||||
{
|
||||
char const *trans = kwset->trans;
|
||||
ptrdiff_t i;
|
||||
unsigned char deltabuf[NCHAR];
|
||||
unsigned char *delta = trans ? deltabuf : kwset->delta;
|
||||
struct trie *curr, *last;
|
||||
|
||||
/* Use Boyer-Moore if just one pattern, Aho-Corasick otherwise. */
|
||||
bool reverse = kwset->words == 1;
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
kwset_t new_kwset;
|
||||
|
||||
/* Enqueue the immediate descendants in the level order queue. */
|
||||
for (curr = last = kwset->trie; curr; curr = curr->next)
|
||||
enqueue (curr->links, &last);
|
||||
|
||||
/* Looking for just one string. Extract it from the trie. */
|
||||
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
|
||||
for (i = 0, curr = kwset->trie; i < kwset->mind; ++i)
|
||||
{
|
||||
kwset->target[i] = curr->links->label;
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
new_kwset = kwsalloc (kwset->trans);
|
||||
new_kwset->kwsexec = bmexec;
|
||||
kwsincr (new_kwset, kwset->target, kwset->mind);
|
||||
obstack_free (&kwset->obstack, NULL);
|
||||
*kwset = *new_kwset;
|
||||
free (new_kwset);
|
||||
}
|
||||
|
||||
/* Initial values for the delta table; will be changed later. The
|
||||
delta entry for a given character is the smallest depth of any
|
||||
node at which an outgoing edge is labeled by that character. */
|
||||
memset (delta, MIN (kwset->mind, UCHAR_MAX), sizeof deltabuf);
|
||||
|
||||
/* Traverse the nodes of the trie in level order, simultaneously
|
||||
computing the delta table, failure function, and shift function. */
|
||||
for (curr = last = kwset->trie; curr; curr = curr->next)
|
||||
{
|
||||
/* Enqueue the immediate descendants in the level order queue. */
|
||||
enqueue (curr->links, &last);
|
||||
|
||||
/* Update the delta table for the descendants of this node. */
|
||||
treedelta (curr->links, curr->depth, delta);
|
||||
|
||||
/* Compute the failure function for the descendants of this node. */
|
||||
treefails (curr->links, curr->fail, kwset->trie, reverse);
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
curr->shift = kwset->mind;
|
||||
curr->maxshift = kwset->mind;
|
||||
|
||||
/* Update the shifts at each node in the current node's chain
|
||||
of fails back to the root. */
|
||||
struct trie *fail;
|
||||
for (fail = curr->fail; fail; fail = fail->fail)
|
||||
{
|
||||
/* If the current node has some outgoing edge that the fail
|
||||
doesn't, then the shift at the fail should be no larger
|
||||
than the difference of their depths. */
|
||||
if (!hasevery (fail->links, curr->links))
|
||||
if (curr->depth - fail->depth < fail->shift)
|
||||
fail->shift = curr->depth - fail->depth;
|
||||
|
||||
/* If the current node is accepting then the shift at the
|
||||
fail and its descendants should be no larger than the
|
||||
difference of their depths. */
|
||||
if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
|
||||
fail->maxshift = curr->depth - fail->depth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
/* Traverse the trie in level order again, fixing up all nodes whose
|
||||
shift exceeds their inherited maxshift. */
|
||||
for (curr = kwset->trie->next; curr; curr = curr->next)
|
||||
{
|
||||
if (curr->maxshift > curr->parent->maxshift)
|
||||
curr->maxshift = curr->parent->maxshift;
|
||||
if (curr->shift > curr->maxshift)
|
||||
curr->shift = curr->maxshift;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create a vector, indexed by character code, of the outgoing links
|
||||
from the root node. Accumulate GC1 and GC1HELP. */
|
||||
struct trie *nextbuf[NCHAR];
|
||||
struct trie **next = trans ? nextbuf : kwset->next;
|
||||
memset (next, 0, sizeof nextbuf);
|
||||
treenext (kwset->trie->links, next);
|
||||
int gc1 = -2;
|
||||
int gc1help = -1;
|
||||
for (i = 0; i < NCHAR; i++)
|
||||
{
|
||||
int ti = i;
|
||||
if (trans)
|
||||
{
|
||||
ti = U(trans[i]);
|
||||
kwset->next[i] = next[ti];
|
||||
}
|
||||
if (kwset->next[i])
|
||||
{
|
||||
if (gc1 < -1)
|
||||
{
|
||||
gc1 = ti;
|
||||
gc1help = i;
|
||||
}
|
||||
else if (gc1 == ti)
|
||||
gc1help = gc1help == ti ? i : -1;
|
||||
else if (i == ti && gc1 == gc1help)
|
||||
gc1help = i;
|
||||
else
|
||||
gc1 = -1;
|
||||
}
|
||||
}
|
||||
kwset->gc1 = gc1;
|
||||
kwset->gc1help = gc1help;
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
/* Looking for just one string. Extract it from the trie. */
|
||||
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
|
||||
for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
|
||||
{
|
||||
kwset->target[i] = curr->links->label;
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
if (kwset->mind > 1)
|
||||
{
|
||||
/* Looking for the delta2 shift that might be made after a
|
||||
backwards match has failed. Extract it from the trie. */
|
||||
kwset->shift
|
||||
= obstack_alloc (&kwset->obstack,
|
||||
sizeof *kwset->shift * (kwset->mind - 1));
|
||||
for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i)
|
||||
{
|
||||
kwset->shift[i] = curr->shift;
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
/* The penultimate byte. */
|
||||
kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fix things up for any translation table. */
|
||||
if (trans)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->delta[i] = delta[U(trans[i])];
|
||||
}
|
||||
|
||||
/* Delta2 portion of a Boyer-Moore search. *TP is the string text
|
||||
pointer; it is updated in place. EP is the end of the string text,
|
||||
and SP the end of the pattern. LEN is the pattern length; it must
|
||||
be at least 2. TRANS, if nonnull, is the input translation table.
|
||||
GC1 and GC2 are the last and second-from last bytes of the pattern,
|
||||
transliterated by TRANS; the caller precomputes them for
|
||||
efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP
|
||||
when failing. KWSET->shift says how much to shift. */
|
||||
static inline bool
|
||||
bm_delta2_search (char const **tpp, char const *ep, char const *sp,
|
||||
ptrdiff_t len,
|
||||
char const *trans, char gc1, char gc2,
|
||||
unsigned char const *d1, kwset_t kwset)
|
||||
{
|
||||
char const *tp = *tpp;
|
||||
ptrdiff_t d = len, skip = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
ptrdiff_t i = 2;
|
||||
if (tr (trans, tp[-2]) == gc2)
|
||||
{
|
||||
while (++i <= d)
|
||||
if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
|
||||
break;
|
||||
if (i > d)
|
||||
{
|
||||
for (i = d + skip + 1; i <= len; ++i)
|
||||
if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
|
||||
break;
|
||||
if (i > len)
|
||||
{
|
||||
*tpp = tp - len;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tp += d = kwset->shift[i - 2];
|
||||
if (tp > ep)
|
||||
break;
|
||||
if (tr (trans, tp[-1]) != gc1)
|
||||
{
|
||||
if (d1)
|
||||
tp += d1[U(tp[-1])];
|
||||
break;
|
||||
}
|
||||
skip = i - 1;
|
||||
}
|
||||
|
||||
*tpp = tp;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return the address of the first byte in the buffer S (of size N)
|
||||
that matches the terminal byte specified by KWSET, or NULL if there
|
||||
is no match. KWSET->gc1 should be nonnegative. */
|
||||
static char const *
|
||||
memchr_kwset (char const *s, ptrdiff_t n, kwset_t kwset)
|
||||
{
|
||||
char const *slim = s + n;
|
||||
if (kwset->gc1help < 0)
|
||||
{
|
||||
for (; s < slim; s++)
|
||||
if (kwset->next[U(*s)])
|
||||
return s;
|
||||
}
|
||||
else
|
||||
{
|
||||
int small_heuristic = 2;
|
||||
size_t small_bytes = small_heuristic * sizeof (unsigned long int);
|
||||
while (s < slim)
|
||||
{
|
||||
if (kwset->next[U(*s)])
|
||||
return s;
|
||||
s++;
|
||||
if ((uintptr_t) s % small_bytes == 0)
|
||||
return memchr2 (s, kwset->gc1, kwset->gc1help, slim - s);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Fast Boyer-Moore search (inlinable version). */
|
||||
static inline ptrdiff_t _GL_ATTRIBUTE_PURE
|
||||
bmexec_trans (kwset_t kwset, char const *text, ptrdiff_t size)
|
||||
{
|
||||
assume (0 <= size);
|
||||
unsigned char const *d1;
|
||||
char const *ep, *sp, *tp;
|
||||
int d;
|
||||
ptrdiff_t len = kwset->mind;
|
||||
char const *trans = kwset->trans;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
if (len > size)
|
||||
return -1;
|
||||
if (len == 1)
|
||||
{
|
||||
tp = memchr_kwset (text, size, kwset);
|
||||
return tp ? tp - text : -1;
|
||||
}
|
||||
|
||||
d1 = kwset->delta;
|
||||
sp = kwset->target + len;
|
||||
tp = text + len;
|
||||
char gc1 = kwset->gc1;
|
||||
char gc2 = kwset->gc2;
|
||||
|
||||
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
|
||||
ptrdiff_t len12;
|
||||
if (!INT_MULTIPLY_WRAPV (len, 12, &len12) && len12 < size)
|
||||
/* 11 is not a bug, the initial offset happens only once. */
|
||||
for (ep = text + size - 11 * len; tp <= ep; )
|
||||
{
|
||||
char const *tp0 = tp;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d != 0)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d != 0)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d != 0)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
|
||||
/* As a heuristic, prefer memchr to seeking by
|
||||
delta1 when the latter doesn't advance much. */
|
||||
int advance_heuristic = 16 * sizeof (long);
|
||||
if (advance_heuristic <= tp - tp0)
|
||||
continue;
|
||||
tp--;
|
||||
tp = memchr_kwset (tp, text + size - tp, kwset);
|
||||
if (! tp)
|
||||
return -1;
|
||||
tp++;
|
||||
if (ep <= tp)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
|
||||
return tp - text;
|
||||
}
|
||||
|
||||
/* Now only a few characters are left to search. Carefully avoid
|
||||
ever producing an out-of-bounds pointer. */
|
||||
ep = text + size;
|
||||
d = d1[U(tp[-1])];
|
||||
while (d <= ep - tp)
|
||||
{
|
||||
d = d1[U((tp += d)[-1])];
|
||||
if (d != 0)
|
||||
continue;
|
||||
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
|
||||
return tp - text;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Fast Boyer-Moore search. */
|
||||
static ptrdiff_t
|
||||
bmexec (kwset_t kwset, char const *text, ptrdiff_t size,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
/* Help the compiler inline in two ways, depending on whether
|
||||
kwset->trans is null. */
|
||||
ptrdiff_t ret = (IGNORE_DUPLICATE_BRANCH_WARNING
|
||||
(kwset->trans
|
||||
? bmexec_trans (kwset, text, size)
|
||||
: bmexec_trans (kwset, text, size)));
|
||||
kwsmatch->index = 0;
|
||||
kwsmatch->offset = ret;
|
||||
kwsmatch->size = kwset->mind;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Hairy multiple string search with the Aho-Corasick algorithm.
|
||||
(inlinable version) */
|
||||
static inline ptrdiff_t
|
||||
acexec_trans (kwset_t kwset, char const *text, ptrdiff_t len,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
struct trie const *trie, *accept;
|
||||
char const *tp, *left, *lim;
|
||||
struct tree const *tree;
|
||||
char const *trans;
|
||||
|
||||
/* Initialize register copies and look for easy ways out. */
|
||||
if (len < kwset->mind)
|
||||
return -1;
|
||||
trans = kwset->trans;
|
||||
trie = kwset->trie;
|
||||
lim = text + len;
|
||||
tp = text;
|
||||
|
||||
if (!trie->accepting)
|
||||
{
|
||||
unsigned char c;
|
||||
int gc1 = kwset->gc1;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (gc1 < 0)
|
||||
{
|
||||
while (! (trie = kwset->next[c = tr (trans, *tp++)]))
|
||||
if (tp >= lim)
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
tp = memchr_kwset (tp, lim - tp, kwset);
|
||||
if (!tp)
|
||||
return -1;
|
||||
c = tr (trans, *tp++);
|
||||
trie = kwset->next[c];
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (trie->accepting)
|
||||
goto match;
|
||||
if (tp >= lim)
|
||||
return -1;
|
||||
c = tr (trans, *tp++);
|
||||
|
||||
for (tree = trie->links; c != tree->label; )
|
||||
{
|
||||
tree = c < tree->label ? tree->llink : tree->rlink;
|
||||
if (! tree)
|
||||
{
|
||||
trie = trie->fail;
|
||||
if (!trie)
|
||||
{
|
||||
trie = kwset->next[c];
|
||||
if (trie)
|
||||
goto have_trie;
|
||||
if (tp >= lim)
|
||||
return -1;
|
||||
goto next_c;
|
||||
}
|
||||
if (trie->accepting)
|
||||
{
|
||||
--tp;
|
||||
goto match;
|
||||
}
|
||||
tree = trie->links;
|
||||
}
|
||||
}
|
||||
trie = tree->trie;
|
||||
have_trie:;
|
||||
}
|
||||
next_c:;
|
||||
}
|
||||
}
|
||||
|
||||
match:
|
||||
accept = trie;
|
||||
while (accept->accepting == SIZE_MAX)
|
||||
accept = accept->fail;
|
||||
left = tp - accept->depth;
|
||||
|
||||
/* Try left-most longest match. */
|
||||
if (longest)
|
||||
{
|
||||
while (tp < lim)
|
||||
{
|
||||
struct trie const *accept1;
|
||||
char const *left1;
|
||||
unsigned char c = tr (trans, *tp++);
|
||||
|
||||
do
|
||||
{
|
||||
tree = trie->links;
|
||||
while (tree && c != tree->label)
|
||||
tree = c < tree->label ? tree->llink : tree->rlink;
|
||||
}
|
||||
while (!tree && (trie = trie->fail) && accept->depth <= trie->depth);
|
||||
|
||||
if (!tree)
|
||||
break;
|
||||
trie = tree->trie;
|
||||
if (trie->accepting)
|
||||
{
|
||||
accept1 = trie;
|
||||
while (accept1->accepting == SIZE_MAX)
|
||||
accept1 = accept1->fail;
|
||||
left1 = tp - accept1->depth;
|
||||
if (left1 <= left)
|
||||
{
|
||||
left = left1;
|
||||
accept = accept1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kwsmatch->index = accept->accepting / 2;
|
||||
kwsmatch->offset = left - text;
|
||||
kwsmatch->size = accept->depth;
|
||||
|
||||
return left - text;
|
||||
}
|
||||
|
||||
/* Hairy multiple string search with Aho-Corasick algorithm. */
|
||||
static ptrdiff_t
|
||||
acexec (kwset_t kwset, char const *text, ptrdiff_t size,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
assume (0 <= size);
|
||||
/* Help the compiler inline in two ways, depending on whether
|
||||
kwset->trans is null. */
|
||||
return (IGNORE_DUPLICATE_BRANCH_WARNING
|
||||
(kwset->trans
|
||||
? acexec_trans (kwset, text, size, kwsmatch, longest)
|
||||
: acexec_trans (kwset, text, size, kwsmatch, longest)));
|
||||
}
|
||||
|
||||
/* Find the first instance of a KWSET member in TEXT, which has SIZE bytes.
|
||||
Return the offset (into TEXT) of the first byte of the matching substring,
|
||||
or -1 if no match is found. Upon a match, store details in
|
||||
*KWSMATCH: index of matched keyword, start offset (same as the return
|
||||
value), and length. If LONGEST, find the longest match; otherwise
|
||||
any match will do. */
|
||||
ptrdiff_t
|
||||
kwsexec (kwset_t kwset, char const *text, ptrdiff_t size,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
|
||||
}
|
||||
|
||||
/* Free the components of the given keyword set. */
|
||||
void
|
||||
kwsfree (kwset_t kwset)
|
||||
{
|
||||
obstack_free (&kwset->obstack, NULL);
|
||||
free (kwset);
|
||||
}
|
||||
44
src/kwset.h
44
src/kwset.h
@ -1,44 +0,0 @@
|
||||
/* kwset.h - header declaring the keyword set library.
|
||||
Copyright (C) 1989, 1998, 2005, 2007, 2009-2021 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
|
||||
/* Written August 1989 by Mike Haertel. */
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct kwsmatch
|
||||
{
|
||||
ptrdiff_t index; /* Index number of matching keyword. */
|
||||
ptrdiff_t offset; /* Offset of match. */
|
||||
ptrdiff_t size; /* Length of match. */
|
||||
};
|
||||
|
||||
#include "arg-nonnull.h"
|
||||
|
||||
struct kwset;
|
||||
typedef struct kwset *kwset_t;
|
||||
|
||||
extern kwset_t kwsalloc (char const *);
|
||||
extern void kwsincr (kwset_t, char const *, ptrdiff_t);
|
||||
extern ptrdiff_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
|
||||
extern void kwsprep (kwset_t);
|
||||
extern ptrdiff_t kwsexec (kwset_t, char const *, ptrdiff_t,
|
||||
struct kwsmatch *, bool)
|
||||
_GL_ARG_NONNULL ((4));
|
||||
extern void kwsfree (kwset_t);
|
||||
383
src/pcresearch.c
383
src/pcresearch.c
@ -1,5 +1,5 @@
|
||||
/* pcresearch.c - searching subroutines using PCRE for grep.
|
||||
Copyright 2000, 2007, 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,228 +12,286 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
|
||||
/* Written August 1992 by Mike Haertel. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
#include "search.h"
|
||||
|
||||
#include <search.h>
|
||||
#include "die.h"
|
||||
|
||||
#include <pcre.h>
|
||||
#include <stdckdint.h>
|
||||
|
||||
/* This must be at least 2; everything after that is for performance
|
||||
in pcre_exec. */
|
||||
enum { NSUB = 300 };
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
|
||||
#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
||||
# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
|
||||
/* For older PCRE2. */
|
||||
#ifndef PCRE2_SIZE_MAX
|
||||
# define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#endif
|
||||
#ifndef PCRE_STUDY_JIT_COMPILE
|
||||
# define PCRE_STUDY_JIT_COMPILE 0
|
||||
#ifndef PCRE2_CONFIG_DEPTHLIMIT
|
||||
# define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT
|
||||
# define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT
|
||||
# define pcre2_set_depth_limit pcre2_set_recursion_limit
|
||||
#endif
|
||||
#ifndef PCRE_STUDY_EXTRA_NEEDED
|
||||
# define PCRE_STUDY_EXTRA_NEEDED 0
|
||||
#ifndef PCRE2_EXTRA_ASCII_BSD
|
||||
# define PCRE2_EXTRA_ASCII_BSD 0
|
||||
#endif
|
||||
|
||||
/* Use PCRE2_MATCH_INVALID_UTF if supported and not buggy;
|
||||
see <https://github.com/PCRE2Project/pcre2/issues/224>.
|
||||
Assume the bug will be fixed after PCRE2 10.42. */
|
||||
#if defined PCRE2_MATCH_INVALID_UTF && 10 < PCRE2_MAJOR + (42 < PCRE2_MINOR)
|
||||
enum { MATCH_INVALID_UTF = PCRE2_MATCH_INVALID_UTF };
|
||||
#else
|
||||
enum { MATCH_INVALID_UTF = 0 };
|
||||
#endif
|
||||
|
||||
struct pcre_comp
|
||||
{
|
||||
/* General context for PCRE operations. */
|
||||
pcre2_general_context *gcontext;
|
||||
|
||||
/* Compiled internal form of a Perl regular expression. */
|
||||
pcre *cre;
|
||||
pcre2_code *cre;
|
||||
|
||||
/* Additional information about the pattern. */
|
||||
pcre_extra *extra;
|
||||
/* Match context and data block. */
|
||||
pcre2_match_context *mcontext;
|
||||
pcre2_match_data *data;
|
||||
|
||||
#if PCRE_STUDY_JIT_COMPILE
|
||||
/* The JIT stack and its maximum size. */
|
||||
pcre_jit_stack *jit_stack;
|
||||
int jit_stack_size;
|
||||
#endif
|
||||
pcre2_jit_stack *jit_stack;
|
||||
idx_t jit_stack_size;
|
||||
|
||||
/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
|
||||
/* Table, indexed by ! (flag & PCRE2_NOTBOL), of whether the empty
|
||||
string matches when that flag is used. */
|
||||
int empty_match[2];
|
||||
};
|
||||
|
||||
/* Memory allocation functions for PCRE. */
|
||||
static void *
|
||||
private_malloc (PCRE2_SIZE size, _GL_UNUSED void *unused)
|
||||
{
|
||||
if (IDX_MAX < size)
|
||||
xalloc_die ();
|
||||
return ximalloc (size);
|
||||
}
|
||||
static void
|
||||
private_free (void *ptr, _GL_UNUSED void *unused)
|
||||
{
|
||||
free (ptr);
|
||||
}
|
||||
|
||||
void
|
||||
Pprint_version (void)
|
||||
{
|
||||
char *buf = ximalloc (pcre2_config (PCRE2_CONFIG_VERSION, nullptr));
|
||||
pcre2_config (PCRE2_CONFIG_VERSION, buf);
|
||||
printf (_("\ngrep -P uses PCRE2 %s\n"), buf);
|
||||
free (buf);
|
||||
}
|
||||
|
||||
/* Match the already-compiled PCRE pattern against the data in SUBJECT,
|
||||
of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
|
||||
options OPTIONS, and storing resulting matches into SUB. Return
|
||||
the (nonnegative) match location or a (negative) error number. */
|
||||
options OPTIONS.
|
||||
Return the (nonnegative) match count or a (negative) error number. */
|
||||
static int
|
||||
jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
|
||||
int search_offset, int options, int *sub)
|
||||
jit_exec (struct pcre_comp *pc, char const *subject, idx_t search_bytes,
|
||||
idx_t search_offset, int options)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes,
|
||||
search_offset, options, sub, NSUB);
|
||||
/* STACK_GROWTH_RATE is taken from PCRE's src/pcre2_jit_compile.c.
|
||||
Going over the jitstack_max limit could trigger an int
|
||||
overflow bug. */
|
||||
int STACK_GROWTH_RATE = 8192;
|
||||
idx_t jitstack_max = MIN (IDX_MAX, SIZE_MAX - (STACK_GROWTH_RATE - 1));
|
||||
|
||||
#if PCRE_STUDY_JIT_COMPILE
|
||||
if (e == PCRE_ERROR_JIT_STACKLIMIT
|
||||
&& 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
|
||||
int e = pcre2_match (pc->cre, (PCRE2_SPTR) subject, search_bytes,
|
||||
search_offset, options, pc->data, pc->mcontext);
|
||||
if (e == PCRE2_ERROR_JIT_STACKLIMIT
|
||||
&& pc->jit_stack_size <= jitstack_max / 2)
|
||||
{
|
||||
int old_size = pc->jit_stack_size;
|
||||
int new_size = pc->jit_stack_size = old_size * 2;
|
||||
if (pc->jit_stack)
|
||||
pcre_jit_stack_free (pc->jit_stack);
|
||||
pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size);
|
||||
idx_t old_size = pc->jit_stack_size;
|
||||
idx_t new_size = pc->jit_stack_size = old_size * 2;
|
||||
pcre2_jit_stack_free (pc->jit_stack);
|
||||
pc->jit_stack = pcre2_jit_stack_create (old_size, new_size,
|
||||
pc->gcontext);
|
||||
if (!pc->jit_stack)
|
||||
die (EXIT_TROUBLE, 0,
|
||||
_("failed to allocate memory for the PCRE JIT stack"));
|
||||
pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack);
|
||||
continue;
|
||||
xalloc_die ();
|
||||
if (!pc->mcontext)
|
||||
pc->mcontext = pcre2_match_context_create (pc->gcontext);
|
||||
pcre2_jit_stack_assign (pc->mcontext, nullptr, pc->jit_stack);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
||||
if (e == PCRE_ERROR_RECURSIONLIMIT
|
||||
&& (PCRE_STUDY_EXTRA_NEEDED || pc->extra))
|
||||
else if (e == PCRE2_ERROR_DEPTHLIMIT)
|
||||
{
|
||||
unsigned long lim
|
||||
= (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
||||
? pc->extra->match_limit_recursion
|
||||
: 0);
|
||||
if (lim <= ULONG_MAX / 2)
|
||||
{
|
||||
pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1;
|
||||
pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
continue;
|
||||
}
|
||||
uint32_t lim;
|
||||
pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim);
|
||||
if (ckd_mul (&lim, lim, 2))
|
||||
return e;
|
||||
if (!pc->mcontext)
|
||||
pc->mcontext = pcre2_match_context_create (pc->gcontext);
|
||||
pcre2_set_depth_limit (pc->mcontext, lim);
|
||||
}
|
||||
#endif
|
||||
|
||||
return e;
|
||||
else
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if E is an error code for bad UTF-8. */
|
||||
static bool
|
||||
bad_utf8_from_pcre2 (int e)
|
||||
{
|
||||
return PCRE2_ERROR_UTF8_ERR21 <= e && e <= PCRE2_ERROR_UTF8_ERR1;
|
||||
}
|
||||
|
||||
/* Compile the -P style PATTERN, containing SIZE bytes that are
|
||||
followed by '\n'. Return a description of the compiled pattern. */
|
||||
|
||||
void *
|
||||
Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
{
|
||||
int e;
|
||||
char const *ep;
|
||||
static char const wprefix[] = "(?<!\\w)(?:";
|
||||
static char const wsuffix[] = ")(?!\\w)";
|
||||
static char const xprefix[] = "^(?:";
|
||||
static char const xsuffix[] = ")$";
|
||||
int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
|
||||
sizeof xprefix - 1 + sizeof xsuffix - 1);
|
||||
char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
|
||||
int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
|
||||
PCRE2_SIZE e;
|
||||
int ec;
|
||||
int flags = PCRE2_DOLLAR_ENDONLY | (match_icase ? PCRE2_CASELESS : 0);
|
||||
char *patlim = pattern + size;
|
||||
char *n = re;
|
||||
char const *p;
|
||||
char const *pnul;
|
||||
struct pcre_comp *pc = xcalloc (1, sizeof (*pc));
|
||||
struct pcre_comp *pc = ximalloc (sizeof *pc);
|
||||
pcre2_general_context *gcontext = pc->gcontext
|
||||
= pcre2_general_context_create (private_malloc, private_free, nullptr);
|
||||
pcre2_compile_context *ccontext = pcre2_compile_context_create (gcontext);
|
||||
|
||||
if (localeinfo.multibyte)
|
||||
{
|
||||
uint32_t unicode;
|
||||
if (pcre2_config (PCRE2_CONFIG_UNICODE, &unicode) < 0 || !unicode)
|
||||
die (EXIT_TROUBLE, 0,
|
||||
_("-P supports only unibyte locales on this platform"));
|
||||
if (! localeinfo.using_utf8)
|
||||
die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
|
||||
flags |= PCRE_UTF8;
|
||||
|
||||
flags |= PCRE2_UTF;
|
||||
|
||||
/* If supported, consider invalid UTF-8 as a barrier not an error. */
|
||||
flags |= MATCH_INVALID_UTF;
|
||||
|
||||
/* If PCRE2_EXTRA_ASCII_BSD is available, use PCRE2_UCP
|
||||
so that \d does not have the undesirable effect of matching
|
||||
non-ASCII digits. Otherwise (i.e., with PCRE2 10.42 and earlier),
|
||||
escapes like \w have only their ASCII interpretations,
|
||||
but that's better than the confusion that would ensue if \d
|
||||
matched non-ASCII digits. */
|
||||
flags |= PCRE2_EXTRA_ASCII_BSD ? PCRE2_UCP : 0;
|
||||
|
||||
#if 0
|
||||
/* Do not match individual code units but only UTF-8. */
|
||||
flags |= PCRE2_NEVER_BACKSLASH_C;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* FIXME: Remove this restriction. */
|
||||
if (rawmemchr (pattern, '\n') != patlim)
|
||||
die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
|
||||
|
||||
*n = '\0';
|
||||
if (match_words)
|
||||
strcpy (n, wprefix);
|
||||
if (match_lines)
|
||||
strcpy (n, xprefix);
|
||||
n += strlen (n);
|
||||
|
||||
/* The PCRE interface doesn't allow NUL bytes in the pattern, so
|
||||
replace each NUL byte in the pattern with the four characters
|
||||
"\000", removing a preceding backslash if there are an odd
|
||||
number of backslashes before the NUL. */
|
||||
*patlim = '\0';
|
||||
for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
|
||||
{
|
||||
memcpy (n, p, pnul - p);
|
||||
n += pnul - p;
|
||||
for (p = pnul; pattern < p && p[-1] == '\\'; p--)
|
||||
continue;
|
||||
n -= (pnul - p) & 1;
|
||||
strcpy (n, "\\000");
|
||||
n += 4;
|
||||
}
|
||||
memcpy (n, p, patlim - p + 1);
|
||||
n += patlim - p;
|
||||
*patlim = '\n';
|
||||
|
||||
if (match_words)
|
||||
strcpy (n, wsuffix);
|
||||
if (match_lines)
|
||||
strcpy (n, xsuffix);
|
||||
|
||||
pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
|
||||
if (!pc->cre)
|
||||
die (EXIT_TROUBLE, 0, "%s", ep);
|
||||
|
||||
int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE;
|
||||
pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep);
|
||||
if (ep)
|
||||
die (EXIT_TROUBLE, 0, "%s", ep);
|
||||
|
||||
#if PCRE_STUDY_JIT_COMPILE
|
||||
if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e))
|
||||
die (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
|
||||
|
||||
/* The PCRE documentation says that a 32 KiB stack is the default. */
|
||||
if (e)
|
||||
pc->jit_stack_size = 32 << 10;
|
||||
#ifdef PCRE2_EXTRA_MATCH_LINE
|
||||
uint32_t extra_options = (PCRE2_EXTRA_ASCII_BSD
|
||||
| (match_lines ? PCRE2_EXTRA_MATCH_LINE : 0));
|
||||
pcre2_set_compile_extra_options (ccontext, extra_options);
|
||||
#endif
|
||||
|
||||
free (re);
|
||||
void *re_storage = nullptr;
|
||||
if (match_lines)
|
||||
{
|
||||
#ifndef PCRE2_EXTRA_MATCH_LINE
|
||||
static char const *const xprefix = "^(?:";
|
||||
static char const *const xsuffix = ")$";
|
||||
idx_t re_size = size + strlen (xprefix) + strlen (xsuffix);
|
||||
char *re = re_storage = ximalloc (re_size);
|
||||
char *rez = mempcpy (re, xprefix, strlen (xprefix));
|
||||
rez = mempcpy (rez, pattern, size);
|
||||
memcpy (rez, xsuffix, strlen (xsuffix));
|
||||
pattern = re;
|
||||
size = re_size;
|
||||
#endif
|
||||
}
|
||||
else if (match_words)
|
||||
{
|
||||
/* PCRE2_EXTRA_MATCH_WORD is incompatible with grep -w;
|
||||
do things the grep way. */
|
||||
static char const *const wprefix = "(?<!\\w)(?:";
|
||||
static char const *const wsuffix = ")(?!\\w)";
|
||||
idx_t re_size = size + strlen (wprefix) + strlen (wsuffix);
|
||||
char *re = re_storage = ximalloc (re_size);
|
||||
char *rez = mempcpy (re, wprefix, strlen (wprefix));
|
||||
rez = mempcpy (rez, pattern, size);
|
||||
memcpy (rez, wsuffix, strlen (wsuffix));
|
||||
pattern = re;
|
||||
size = re_size;
|
||||
}
|
||||
|
||||
int sub[NSUB];
|
||||
pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0,
|
||||
PCRE_NOTBOL, sub, NSUB);
|
||||
pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub,
|
||||
NSUB);
|
||||
if (!localeinfo.multibyte)
|
||||
pcre2_set_character_tables (ccontext, pcre2_maketables (gcontext));
|
||||
|
||||
pc->cre = pcre2_compile ((PCRE2_SPTR) pattern, size, flags,
|
||||
&ec, &e, ccontext);
|
||||
if (!pc->cre)
|
||||
{
|
||||
enum { ERRBUFSIZ = 256 }; /* Taken from pcre2grep.c ERRBUFSIZ. */
|
||||
PCRE2_UCHAR8 ep[ERRBUFSIZ];
|
||||
pcre2_get_error_message (ec, ep, sizeof ep);
|
||||
die (EXIT_TROUBLE, 0, "%s", ep);
|
||||
}
|
||||
|
||||
free (re_storage);
|
||||
pcre2_compile_context_free (ccontext);
|
||||
|
||||
pc->mcontext = nullptr;
|
||||
pc->data = pcre2_match_data_create_from_pattern (pc->cre, gcontext);
|
||||
|
||||
/* Ignore any failure return from pcre2_jit_compile, as that merely
|
||||
means JIT won't be used during matching. */
|
||||
pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
|
||||
|
||||
/* The PCRE documentation says that a 32 KiB stack is the default. */
|
||||
pc->jit_stack = nullptr;
|
||||
pc->jit_stack_size = 32 << 10;
|
||||
|
||||
pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL);
|
||||
pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0);
|
||||
|
||||
return pc;
|
||||
}
|
||||
|
||||
size_t
|
||||
Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
ptrdiff_t
|
||||
Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
|
||||
char const *start_ptr)
|
||||
{
|
||||
int sub[NSUB];
|
||||
char const *p = start_ptr ? start_ptr : buf;
|
||||
bool bol = p[-1] == eolbyte;
|
||||
char const *line_start = buf;
|
||||
int e = PCRE_ERROR_NOMATCH;
|
||||
int e = PCRE2_ERROR_NOMATCH;
|
||||
char const *line_end;
|
||||
struct pcre_comp *pc = vcp;
|
||||
PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data);
|
||||
|
||||
/* The search address to pass to pcre_exec. This is the start of
|
||||
/* The search address to pass to PCRE. This is the start of
|
||||
the buffer, or just past the most-recently discovered encoding
|
||||
error or line end. */
|
||||
char const *subject = buf;
|
||||
|
||||
do
|
||||
{
|
||||
/* Search line by line. Although this code formerly used
|
||||
PCRE_MULTILINE for performance, the performance wasn't always
|
||||
/* Search line by line. Although this formerly used something like
|
||||
PCRE2_MULTILINE for performance, the performance wasn't always
|
||||
better and the correctness issues were too puzzling. See
|
||||
Bug#22655. */
|
||||
line_end = rawmemchr (p, eolbyte);
|
||||
if (INT_MAX < line_end - p)
|
||||
if (PCRE2_SIZE_MAX < line_end - p)
|
||||
die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Skip past bytes that are easily determined to be encoding
|
||||
errors, treating them as data that cannot match. This is
|
||||
faster than having pcre_exec check them. */
|
||||
faster than having PCRE check them. */
|
||||
while (localeinfo.sbclen[to_uchar (*p)] == -1)
|
||||
{
|
||||
p++;
|
||||
@ -241,10 +299,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
bol = false;
|
||||
}
|
||||
|
||||
int search_offset = p - subject;
|
||||
idx_t search_offset = p - subject;
|
||||
|
||||
/* Check for an empty match; this is faster than letting
|
||||
pcre_exec do it. */
|
||||
PCRE do it. */
|
||||
if (p == line_end)
|
||||
{
|
||||
sub[0] = sub[1] = search_offset;
|
||||
@ -254,13 +312,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
|
||||
int options = 0;
|
||||
if (!bol)
|
||||
options |= PCRE_NOTBOL;
|
||||
options |= PCRE2_NOTBOL;
|
||||
|
||||
e = jit_exec (pc, subject, line_end - subject, search_offset,
|
||||
options, sub);
|
||||
if (e != PCRE_ERROR_BADUTF8)
|
||||
e = jit_exec (pc, subject, line_end - subject,
|
||||
search_offset, options);
|
||||
if (MATCH_INVALID_UTF || !bad_utf8_from_pcre2 (e))
|
||||
break;
|
||||
int valid_bytes = sub[0];
|
||||
|
||||
idx_t valid_bytes = pcre2_get_startchar (pc->data);
|
||||
|
||||
if (search_offset <= valid_bytes)
|
||||
{
|
||||
@ -270,14 +329,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
/* Handle the empty-match case specially, for speed.
|
||||
This optimization is valid if VALID_BYTES is zero,
|
||||
which means SEARCH_OFFSET is also zero. */
|
||||
sub[0] = valid_bytes;
|
||||
sub[1] = 0;
|
||||
e = pc->empty_match[bol];
|
||||
}
|
||||
else
|
||||
e = jit_exec (pc, subject, valid_bytes, search_offset,
|
||||
options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
|
||||
options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL);
|
||||
|
||||
if (e != PCRE_ERROR_NOMATCH)
|
||||
if (e != PCRE2_ERROR_NOMATCH)
|
||||
break;
|
||||
|
||||
/* Treat the encoding error as data that cannot match. */
|
||||
@ -288,7 +348,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
subject += valid_bytes + 1;
|
||||
}
|
||||
|
||||
if (e != PCRE_ERROR_NOMATCH)
|
||||
if (e != PCRE2_ERROR_NOMATCH)
|
||||
break;
|
||||
bol = true;
|
||||
p = subject = line_start = line_end + 1;
|
||||
@ -299,26 +359,35 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case PCRE_ERROR_NOMATCH:
|
||||
case PCRE2_ERROR_NOMATCH:
|
||||
break;
|
||||
|
||||
case PCRE_ERROR_NOMEMORY:
|
||||
case PCRE2_ERROR_NOMEMORY:
|
||||
die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ());
|
||||
|
||||
#if PCRE_STUDY_JIT_COMPILE
|
||||
case PCRE_ERROR_JIT_STACKLIMIT:
|
||||
case PCRE2_ERROR_JIT_STACKLIMIT:
|
||||
die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"),
|
||||
input_filename ());
|
||||
#endif
|
||||
|
||||
case PCRE_ERROR_MATCHLIMIT:
|
||||
case PCRE2_ERROR_MATCHLIMIT:
|
||||
die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"),
|
||||
input_filename ());
|
||||
|
||||
case PCRE_ERROR_RECURSIONLIMIT:
|
||||
die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"),
|
||||
case PCRE2_ERROR_DEPTHLIMIT:
|
||||
die (EXIT_TROUBLE, 0,
|
||||
_("%s: exceeded PCRE's nested backtracking limit"),
|
||||
input_filename ());
|
||||
|
||||
case PCRE2_ERROR_RECURSELOOP:
|
||||
die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"),
|
||||
input_filename ());
|
||||
|
||||
#ifdef PCRE2_ERROR_HEAPLIMIT
|
||||
case PCRE2_ERROR_HEAPLIMIT:
|
||||
die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"),
|
||||
input_filename ());
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* For now, we lump all remaining PCRE failures into this basket.
|
||||
If anyone cares to provide sample grep usage that can trigger
|
||||
|
||||
57
src/search.h
57
src/search.h
@ -1,5 +1,5 @@
|
||||
/* search.c - searching subroutines using dfa, kwset and regex for grep.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GREP_SEARCH_H
|
||||
#define GREP_SEARCH_H 1
|
||||
@ -24,7 +22,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "system.h"
|
||||
@ -48,38 +45,56 @@ typedef signed char mb_len_map_t;
|
||||
/* searchutils.c */
|
||||
extern void wordinit (void);
|
||||
extern kwset_t kwsinit (bool);
|
||||
extern size_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
|
||||
extern size_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
|
||||
extern size_t wordchar_prev (char const *, char const *, char const *)
|
||||
extern idx_t wordchars_size (char const *, char const *) _GL_ATTRIBUTE_PURE;
|
||||
extern idx_t wordchar_next (char const *, char const *) _GL_ATTRIBUTE_PURE;
|
||||
extern idx_t wordchar_prev (char const *, char const *, char const *)
|
||||
_GL_ATTRIBUTE_PURE;
|
||||
extern ptrdiff_t mb_goback (char const **, size_t *, char const *,
|
||||
char const *);
|
||||
extern ptrdiff_t mb_goback (char const **, idx_t *, char const *, char const *);
|
||||
|
||||
/* dfasearch.c */
|
||||
extern void *GEAcompile (char *, size_t, reg_syntax_t, bool);
|
||||
extern size_t EGexecute (void *, char const *, size_t, size_t *, char const *);
|
||||
extern void *GEAcompile (char *, idx_t, reg_syntax_t, bool);
|
||||
extern ptrdiff_t EGexecute (void *, char const *, idx_t, idx_t *, char const *);
|
||||
|
||||
/* kwsearch.c */
|
||||
extern void *Fcompile (char *, size_t, reg_syntax_t, bool);
|
||||
extern size_t Fexecute (void *, char const *, size_t, size_t *, char const *);
|
||||
extern void *Fcompile (char *, idx_t, reg_syntax_t, bool);
|
||||
extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *);
|
||||
|
||||
/* pcresearch.c */
|
||||
extern void *Pcompile (char *, size_t, reg_syntax_t, bool);
|
||||
extern size_t Pexecute (void *, char const *, size_t, size_t *, char const *);
|
||||
extern void *Pcompile (char *, idx_t, reg_syntax_t, bool);
|
||||
extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *);
|
||||
extern void Pprint_version (void);
|
||||
|
||||
/* grep.c */
|
||||
extern struct localeinfo localeinfo;
|
||||
extern void fgrep_to_grep_pattern (char **, size_t *);
|
||||
extern void fgrep_to_grep_pattern (char **, idx_t *);
|
||||
|
||||
/* Return the number of bytes in the character at the start of S, which
|
||||
is of size N. N must be positive. MBS is the conversion state.
|
||||
This acts like mbrlen, except it returns -1 and -2 instead of
|
||||
(size_t) -1 and (size_t) -2. */
|
||||
SEARCH_INLINE ptrdiff_t
|
||||
imbrlen (char const *s, idx_t n, mbstate_t *mbs)
|
||||
{
|
||||
size_t len = mbrlen (s, n, mbs);
|
||||
|
||||
/* Convert result to ptrdiff_t portably, even on oddball platforms.
|
||||
When optimizing, this typically uses no machine instructions. */
|
||||
if (len <= MB_LEN_MAX)
|
||||
return len;
|
||||
ptrdiff_t neglen = -len;
|
||||
return -neglen;
|
||||
}
|
||||
|
||||
/* Return the number of bytes in the character at the start of S, which
|
||||
is of size N. N must be positive. MBS is the conversion state.
|
||||
This acts like mbrlen, except it returns 1 when mbrlen would return 0,
|
||||
it returns -1 and -2 instead of (size_t) -1 and (size_t) -2,
|
||||
and it is typically faster because of the cache. */
|
||||
SEARCH_INLINE size_t
|
||||
mb_clen (char const *s, size_t n, mbstate_t *mbs)
|
||||
SEARCH_INLINE ptrdiff_t
|
||||
mb_clen (char const *s, idx_t n, mbstate_t *mbs)
|
||||
{
|
||||
size_t len = localeinfo.sbclen[to_uchar (*s)];
|
||||
return len == (size_t) -2 ? mbrlen (s, n, mbs) : len;
|
||||
signed char len = localeinfo.sbclen[to_uchar (*s)];
|
||||
return len == -2 ? imbrlen (s, n, mbs) : len;
|
||||
}
|
||||
|
||||
extern char const *input_filename (void);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* searchutils.c - helper subroutines for grep's matchers.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,15 +12,15 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#define SEARCH_INLINE _GL_EXTERN_INLINE
|
||||
#define SYSTEM_INLINE _GL_EXTERN_INLINE
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
|
||||
#include <uchar.h>
|
||||
|
||||
/* For each byte B, sbwordchar[B] is true if B is a single-byte
|
||||
character that is a word constituent, and is false otherwise. */
|
||||
@ -30,7 +30,7 @@ static bool sbwordchar[NCHAR];
|
||||
static bool
|
||||
wordchar (wint_t wc)
|
||||
{
|
||||
return wc == L'_' || iswalnum (wc);
|
||||
return wc == L'_' || c32isalnum (wc);
|
||||
}
|
||||
|
||||
void
|
||||
@ -43,11 +43,11 @@ wordinit (void)
|
||||
kwset_t
|
||||
kwsinit (bool mb_trans)
|
||||
{
|
||||
char *trans = NULL;
|
||||
char *trans = nullptr;
|
||||
|
||||
if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
|
||||
{
|
||||
trans = xmalloc (NCHAR);
|
||||
trans = ximalloc (NCHAR);
|
||||
/* If I is a single-byte character that becomes a different
|
||||
single-byte character when uppercased, set trans[I]
|
||||
to that character. Otherwise, set trans[I] to I. */
|
||||
@ -58,56 +58,85 @@ kwsinit (bool mb_trans)
|
||||
return kwsalloc (trans);
|
||||
}
|
||||
|
||||
/* In the buffer *MB_START, return the number of bytes needed to go
|
||||
back from CUR to the previous boundary, where a "boundary" is the
|
||||
start of a multibyte character or is an error-encoding byte. The
|
||||
buffer ends at END (i.e., one past the address of the buffer's last
|
||||
byte). If CUR is already at a boundary, return 0. If CUR is no
|
||||
larger than *MB_START, return CUR - *MB_START without modifying
|
||||
*MB_START or *MBCLEN.
|
||||
/* Return the number of bytes needed to go back to the start of a
|
||||
multibyte character in a buffer. The buffer starts at *MB_START.
|
||||
(See below for MBCLEN's role.) The multibyte character contains
|
||||
the byte addressed by CUR. The buffer ends just before END, which
|
||||
must not be less than CUR.
|
||||
|
||||
When returning zero, set *MB_START to CUR. When returning a
|
||||
positive value, set *MB_START to the next boundary after CUR,
|
||||
or to END if there is no such boundary, and set *MBCLEN to the
|
||||
length of the preceding character. */
|
||||
If CUR is no larger than *MB_START, return CUR - *MB_START without
|
||||
modifying *MB_START or dealing with MBCLEN. Otherwise, update
|
||||
*MB_START to point to the first multibyte character starting on or
|
||||
after CUR, and if MBCLEN is nonnull then deal with MBCLEN as follows:
|
||||
|
||||
- If this function returns 0 and the locale is multibyte and is
|
||||
not UTF-8, set *MBCLEN to the number of bytes in the multibyte
|
||||
character containing the byte addressed by (CUR - 1).
|
||||
|
||||
- Otherwise, possibly set *MBCLEN to an unspecified value.
|
||||
|
||||
*MB_START should point to the start of a multibyte character, or to
|
||||
an encoding-error byte.
|
||||
|
||||
*END should be a sentinel byte - one of '\0', '\r', '\n', '.', '/',
|
||||
which POSIX says cannot be part of any other character. Also,
|
||||
there should be a byte string immediately before *MB_START that
|
||||
contains a sentinel byte. This means it is OK to scan backwards
|
||||
before *MB_START as long as the scan stops at a sentinel byte, and
|
||||
similarly it is OK to scan forwards from CUR (without checking END)
|
||||
so long as the scan stops at a sentinel byte.
|
||||
|
||||
Treat encoding errors as if they were single-byte characters. */
|
||||
ptrdiff_t
|
||||
mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
|
||||
mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
|
||||
char const *end)
|
||||
{
|
||||
const char *p = *mb_start;
|
||||
const char *p0 = p;
|
||||
size_t clen;
|
||||
|
||||
if (cur <= p)
|
||||
return cur - p;
|
||||
|
||||
if (localeinfo.using_utf8)
|
||||
{
|
||||
/* UTF-8 permits scanning backward to the previous character.
|
||||
Start by assuming CUR is at a character boundary. */
|
||||
p = cur;
|
||||
clen = 1;
|
||||
|
||||
if (cur < end && (*cur & 0xc0) == 0x80)
|
||||
if ((*cur & 0xc0) == 0x80)
|
||||
for (int i = 1; i <= 3; i++)
|
||||
if ((cur[-i] & 0xc0) != 0x80)
|
||||
{
|
||||
mbstate_t mbs = { 0 };
|
||||
clen = mb_clen (cur - i, end - (cur - i), &mbs);
|
||||
if (i < clen && clen < (size_t) -2)
|
||||
/* True if the length implied by the putative byte 1 at
|
||||
CUR[-I] extends at least through *CUR. */
|
||||
bool long_enough = (~cur[-i] & 0xff) >> (7 - i) == 0;
|
||||
|
||||
if (long_enough)
|
||||
{
|
||||
p0 = cur - i;
|
||||
p = p0 + clen;
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs);
|
||||
if (0 <= clen)
|
||||
{
|
||||
/* This multibyte character contains *CUR. */
|
||||
p0 = cur - i;
|
||||
p = p0 + clen;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
mbstate_t mbs = { 0 };
|
||||
/* In non-UTF-8 encodings, to find character boundaries one must
|
||||
in general scan forward from the start of the buffer. */
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
ptrdiff_t clen;
|
||||
|
||||
do
|
||||
{
|
||||
clen = mb_clen (p, end - p, &mbs);
|
||||
|
||||
if ((size_t) -2 <= clen)
|
||||
if (clen < 0)
|
||||
{
|
||||
/* An invalid sequence, or a truncated multibyte character.
|
||||
Treat it as a single byte character. */
|
||||
@ -118,47 +147,48 @@ mb_goback (char const **mb_start, size_t *mbclen, char const *cur,
|
||||
p += clen;
|
||||
}
|
||||
while (p < cur);
|
||||
|
||||
if (mbclen)
|
||||
*mbclen = clen;
|
||||
}
|
||||
|
||||
*mb_start = p;
|
||||
if (mbclen)
|
||||
*mbclen = clen;
|
||||
return p == cur ? 0 : cur - p0;
|
||||
}
|
||||
|
||||
/* Examine the start of BUF (which goes to END) for word constituents.
|
||||
If COUNTALL, examine as many as possible; otherwise, examine at most one.
|
||||
Return the total number of bytes in the examined characters. */
|
||||
static size_t
|
||||
static idx_t
|
||||
wordchars_count (char const *buf, char const *end, bool countall)
|
||||
{
|
||||
size_t n = 0;
|
||||
mbstate_t mbs = { 0 };
|
||||
while (n < end - buf)
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
char const *p = buf;
|
||||
while (p < end)
|
||||
{
|
||||
unsigned char b = buf[n];
|
||||
unsigned char b = *p;
|
||||
if (sbwordchar[b])
|
||||
n++;
|
||||
p++;
|
||||
else if (localeinfo.sbclen[b] != -2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
wchar_t wc = 0;
|
||||
size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs);
|
||||
char32_t wc = 0;
|
||||
size_t wcbytes = mbrtoc32 (&wc, p, end - p, &mbs);
|
||||
if (!wordchar (wc))
|
||||
break;
|
||||
n += wcbytes + !wcbytes;
|
||||
p += wcbytes + !wcbytes;
|
||||
}
|
||||
if (!countall)
|
||||
break;
|
||||
}
|
||||
return n;
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
/* Examine the start of BUF for the longest prefix containing just
|
||||
word constituents. Return the total number of bytes in the prefix.
|
||||
The buffer ends at END. */
|
||||
size_t
|
||||
idx_t
|
||||
wordchars_size (char const *buf, char const *end)
|
||||
{
|
||||
return wordchars_count (buf, end, true);
|
||||
@ -166,7 +196,7 @@ wordchars_size (char const *buf, char const *end)
|
||||
|
||||
/* If BUF starts with a word constituent, return the number of bytes
|
||||
used to represent it; otherwise, return zero. The buffer ends at END. */
|
||||
size_t
|
||||
idx_t
|
||||
wordchar_next (char const *buf, char const *end)
|
||||
{
|
||||
return wordchars_count (buf, end, false);
|
||||
@ -175,16 +205,15 @@ wordchar_next (char const *buf, char const *end)
|
||||
/* In the buffer BUF, return nonzero if the character whose encoding
|
||||
contains the byte before CUR is a word constituent. The buffer
|
||||
ends at END. */
|
||||
size_t
|
||||
idx_t
|
||||
wordchar_prev (char const *buf, char const *cur, char const *end)
|
||||
{
|
||||
if (buf == cur)
|
||||
return 0;
|
||||
unsigned char b = *--cur;
|
||||
if (! localeinfo.multibyte
|
||||
|| (localeinfo.using_utf8 && localeinfo.sbclen[b] == 1))
|
||||
if (! localeinfo.multibyte || localeinfo.using_utf8 & ~(b >> 7))
|
||||
return sbwordchar[b];
|
||||
char const *p = buf;
|
||||
cur -= mb_goback (&p, NULL, cur, end);
|
||||
cur -= mb_goback (&p, nullptr, cur, end);
|
||||
return wordchar_next (cur, end);
|
||||
}
|
||||
|
||||
10
src/system.h
10
src/system.h
@ -1,5 +1,5 @@
|
||||
/* Portability cruft. Include after config.h and sys/types.h.
|
||||
Copyright 1996, 1998-2000, 2007, 2009-2021 Free Software Foundation, Inc.
|
||||
Copyright 1996, 1998-2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GREP_SYSTEM_H
|
||||
#define GREP_SYSTEM_H 1
|
||||
@ -101,9 +99,9 @@ void __asan_unpoison_memory_region (void const volatile *addr, size_t size);
|
||||
|
||||
#else
|
||||
|
||||
static _GL_UNUSED void
|
||||
_GL_UNUSED static void
|
||||
__asan_poison_memory_region (void const volatile *addr, size_t size) { }
|
||||
static _GL_UNUSED void
|
||||
_GL_UNUSED static void
|
||||
__asan_unpoison_memory_region (void const volatile *addr, size_t size) { }
|
||||
#endif
|
||||
|
||||
|
||||
15
tests/100k-entries
Executable file
15
tests/100k-entries
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
# This would make grep-3.11 fail with ENOTSUP and exit 2.
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
expensive_
|
||||
|
||||
fail=0
|
||||
|
||||
mkdir t || framework_failure_
|
||||
(cd t && seq 100000|xargs touch) || framework_failure_
|
||||
|
||||
returns_ 1 grep -r x t > out 2> err
|
||||
compare /dev/null out || fail=1
|
||||
compare /dev/null err || fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -1,7 +1,7 @@
|
||||
package Coreutils;
|
||||
# This is a testing framework.
|
||||
|
||||
# Copyright (C) 1998-2015, 2017-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1998-2015, 2017-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
package CuSkip;
|
||||
# Skip a test: emit diag to log and to stderr, and exit 77
|
||||
|
||||
# Copyright (C) 2011-2015, 2017-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2015, 2017-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
package CuTmpdir;
|
||||
# create, then chdir into a temporary sub-directory
|
||||
|
||||
# Copyright (C) 2007-2015, 2017-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2007-2015, 2017-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
## Process this file with automake to create Makefile.in
|
||||
# Copyright 1997-1998, 2005-2021 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -41,18 +41,27 @@ AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
|
||||
|
||||
# Tell the linker to omit references to unused shared libraries.
|
||||
AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS)
|
||||
LDADD = ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a
|
||||
LDADD = ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a \
|
||||
$(HARD_LOCALE_LIB) $(LIBC32CONV) $(LIBCSTACK) \
|
||||
$(LIBSIGSEGV) $(LIBUNISTRING) $(MBRTOWC_LIB) $(SETLOCALE_NULL_LIB) \
|
||||
$(LIBTHREAD)
|
||||
|
||||
# The triple-backref test is expected to fail with both the system
|
||||
# matcher (i.e., with glibc) and with the included matcher.
|
||||
# Both matchers need to be fixed.
|
||||
# FIXME-2015: Remove this once the glibc and gnulib bugs are fixed.
|
||||
# FIXME-2025: Remove this once the glibc and gnulib bugs are fixed.
|
||||
XFAIL_TESTS = triple-backref
|
||||
|
||||
# The glibc-infloop test is expected to fail with both the system
|
||||
# matcher (i.e., with glibc) and with the included matcher.
|
||||
# Both matchers need to be fixed.
|
||||
# FIXME-2025: Remove this once the glibc and gnulib bugs are fixed.
|
||||
XFAIL_TESTS += glibc-infloop
|
||||
|
||||
# Equivalence classes are only supported when using the system
|
||||
# matcher (which means only with glibc).
|
||||
# The included matcher needs to be fixed.
|
||||
# FIXME-2015: Remove this once the gnulib bug is fixed.
|
||||
# FIXME-2025: Remove this once the gnulib bug is fixed.
|
||||
if USE_INCLUDED_REGEX
|
||||
XFAIL_TESTS += equiv-classes
|
||||
else
|
||||
@ -62,6 +71,7 @@ else
|
||||
endif
|
||||
|
||||
TESTS = \
|
||||
100k-entries \
|
||||
backref \
|
||||
backref-alt \
|
||||
backref-multibyte-slow \
|
||||
@ -83,6 +93,7 @@ TESTS = \
|
||||
case-fold-titlecase \
|
||||
char-class-multibyte \
|
||||
char-class-multibyte2 \
|
||||
color-colors \
|
||||
context-0 \
|
||||
count-newline \
|
||||
dfa-coverage \
|
||||
@ -104,11 +115,14 @@ TESTS = \
|
||||
fgrep-longest \
|
||||
file \
|
||||
filename-lineno.pl \
|
||||
fillbuf-long-line \
|
||||
fmbtest \
|
||||
foad1 \
|
||||
glibc-infloop \
|
||||
grep-dev-null \
|
||||
grep-dev-null-out \
|
||||
grep-dir \
|
||||
hangul-syllable \
|
||||
hash-collision-perf \
|
||||
help-version \
|
||||
high-bit-range \
|
||||
@ -136,6 +150,7 @@ TESTS = \
|
||||
options \
|
||||
pcre \
|
||||
pcre-abort \
|
||||
pcre-ascii-digits \
|
||||
pcre-context \
|
||||
pcre-count \
|
||||
pcre-infloop \
|
||||
@ -144,6 +159,8 @@ TESTS = \
|
||||
pcre-jitstack \
|
||||
pcre-o \
|
||||
pcre-utf8 \
|
||||
pcre-utf8-bug224 \
|
||||
pcre-utf8-w \
|
||||
pcre-w \
|
||||
pcre-wx-backref \
|
||||
pcre-z \
|
||||
@ -161,6 +178,7 @@ TESTS = \
|
||||
stack-overflow \
|
||||
status \
|
||||
surrogate-pair \
|
||||
surrogate-search \
|
||||
symlink \
|
||||
triple-backref \
|
||||
turkish-I \
|
||||
@ -172,11 +190,13 @@ TESTS = \
|
||||
unibyte-bracket-expr \
|
||||
unibyte-negated-circumflex \
|
||||
utf8-bracket \
|
||||
version-pcre \
|
||||
warn-char-classes \
|
||||
word-delim-multibyte \
|
||||
word-multi-file \
|
||||
word-multibyte \
|
||||
write-error-msg \
|
||||
y2038-vs-32-bit \
|
||||
yesno \
|
||||
z-anchor-newline
|
||||
|
||||
@ -247,6 +267,7 @@ TESTS_ENVIRONMENT = \
|
||||
srcdir='$(srcdir)' \
|
||||
top_srcdir='$(top_srcdir)' \
|
||||
CC='$(CC)' \
|
||||
CONFIG_HEADER='$(abs_top_builddir)/$(CONFIG_INCLUDE)' \
|
||||
GREP_TEST_NAME=`echo $$tst|sed 's,^\./,,;s,/,-,g'` \
|
||||
MAKE=$(MAKE) \
|
||||
MALLOC_PERTURB_=$(MALLOC_PERTURB_) \
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for back-references and other things.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -43,4 +43,12 @@ if test $? -ne 2 ; then
|
||||
failures=1
|
||||
fi
|
||||
|
||||
# https://bugs.gnu.org/36148#13
|
||||
echo 'Total failed: 2 (1 ignored)' |
|
||||
grep -e '^Total failed: 0$' -e '^Total failed: \([0-9]*\) (\1 ignored)$'
|
||||
if test $? -ne 1 ; then
|
||||
echo "Backref: Multiple -e test, test #5 failed"
|
||||
failures=1
|
||||
fi
|
||||
|
||||
Exit $failures
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for a bug in glibc's regex code as of 2015-09-19.
|
||||
#
|
||||
# Copyright 2015-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# This once failed to match: echo . | grep '\.'
|
||||
#
|
||||
# Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2020-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Ensure that \s and \S work with repetition operators.
|
||||
#
|
||||
# Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2013-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Ensure that neither \s nor \S matches an invalid multibyte character.
|
||||
#
|
||||
# Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2013-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
|
||||
expensive_
|
||||
require_perl_
|
||||
|
||||
# Skip this test if there is no usable SEEK_HOLE support,
|
||||
# as is the case with linux-3.5.0 on ext4 and tmpfs file systems.
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for the "binary file ... matches" diagnostic.
|
||||
#
|
||||
# Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2020-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -14,8 +14,10 @@ fail=0
|
||||
echo "grep: (standard input): binary file matches" > exp \
|
||||
|| framework_failure_
|
||||
|
||||
printf 'a\0' | grep a > out 2> err || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
compare exp err || fail=1
|
||||
for option in '' -s; do
|
||||
printf 'a\0' | grep $option a > out 2> err || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
compare exp err || fail=1
|
||||
done
|
||||
|
||||
Exit $fail
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright 2016-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2016-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Check that case folding works even with titlecase and similarly odd chars.
|
||||
|
||||
# Copyright 2014-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -168,7 +168,7 @@ do
|
||||
done
|
||||
|
||||
# Try a unibyte test with ISO 8859-7, if available.
|
||||
if test "$(get-mb-cur-max el_GR.iso88597)" -eq 1; then
|
||||
if test "$(get-mb-cur-max el_GR.iso88597)" = 1; then
|
||||
LC_ALL=el_GR.iso88597
|
||||
export LC_ALL
|
||||
|
||||
|
||||
48
tests/color-colors
Executable file
48
tests/color-colors
Executable file
@ -0,0 +1,48 @@
|
||||
#!/bin/sh
|
||||
# Check that GREP_COLOR elicits a warning.
|
||||
|
||||
# Copyright 2022-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
|
||||
fail=0
|
||||
unset GREP_COLORS
|
||||
unset GREP_COLOR
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
printf 'x\n\n' >in || framework_failure_
|
||||
printf '%s\n' \
|
||||
"grep: warning: GREP_COLOR='36' is deprecated; use GREP_COLORS='mt=36'" \
|
||||
>exp.err || framework_failure_
|
||||
|
||||
GREP_COLORS='mt=36:ln=35' grep --color=always . in >exp 2>err || fail=1
|
||||
compare /dev/null err || fail=1
|
||||
GREP_COLOR='36' GREP_COLORS='ln=35' grep --color=always . in >out 2>err \
|
||||
|| fail=1
|
||||
compare exp out || fail=1
|
||||
compare exp.err err || fail=1
|
||||
|
||||
GREP_COLORS='mt=36' grep --color=always . in >exp 2>err || fail=1
|
||||
compare /dev/null err || fail=1
|
||||
GREP_COLOR='36' grep --color=always . in >out 2>err || fail=1
|
||||
compare exp out || fail=1
|
||||
compare exp.err err || fail=1
|
||||
|
||||
GREP_COLORS='ln=35' grep --color=always . in >out 2>err || fail=1
|
||||
compare /dev/null err || fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -2,7 +2,7 @@
|
||||
# Test that newline is counted correctly even when the transition
|
||||
# table is rebuilt.
|
||||
|
||||
# Copyright 2014-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Exercise the final reachable code in dfa.c's match_mb_charset.
|
||||
|
||||
# Copyright (C) 2012-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2012-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Trigger a heap overrun in grep-2.6..grep-2.8.
|
||||
|
||||
# Copyright (C) 2011-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test whether "grep '.'" matches invalid UTF-8 byte sequences.
|
||||
#
|
||||
# Copyright 2019-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2019-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
11
tests/empty
11
tests/empty
@ -2,7 +2,7 @@
|
||||
# test that the empty file means no pattern
|
||||
# and an empty pattern means match all.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -39,17 +39,10 @@ for locale in C en_US.UTF-8; do
|
||||
failures=1
|
||||
fi
|
||||
|
||||
# should return 0 found a match
|
||||
echo "" | LC_ALL=$locale timeout 10s grep $options -e ''
|
||||
if test $? -ne 0 ; then
|
||||
echo "Status: Wrong status code, test \#4 failed ($options $locale)"
|
||||
failures=1
|
||||
fi
|
||||
|
||||
# should return 0 found a match
|
||||
echo abcd | LC_ALL=$locale timeout 10s grep $options -e ''
|
||||
if test $? -ne 0 ; then
|
||||
echo "Status: Wrong status code, test \#5 failed ($options $locale)"
|
||||
echo "Status: Wrong status code, test \#4 failed ($options $locale)"
|
||||
failures=1
|
||||
fi
|
||||
done
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Exercise bugs in grep-2.13 with -i, -n and an RE of ^$ in a multi-byte locale.
|
||||
#
|
||||
# Copyright (C) 2012-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2012-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test grep's behavior on encoding errors.
|
||||
#
|
||||
# Copyright 2015-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# -*- sh -*-
|
||||
# Check environment variables for sane values while testing.
|
||||
|
||||
# Copyright (C) 2000-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2000-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for false matches in grep 2.19..2.26 in multibyte, non-UTF8 locales
|
||||
#
|
||||
# Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2016-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -18,7 +18,7 @@ ok () { printf "${G}OK${D}"; }
|
||||
fail () { printf "${R}FAIL${D} (See ${U})"; failures=1; }
|
||||
|
||||
U=https://bugzilla.redhat.com/show_bug.cgi?id=116909
|
||||
printf "fgrep false negatives: "
|
||||
printf "grep -F false negatives: "
|
||||
cat > 116909.list <<EOF
|
||||
a
|
||||
b
|
||||
@ -59,7 +59,7 @@ if ( timeout --version ) > /dev/null 2>&1; then
|
||||
echo foobar | returns_ 124 timeout 10 grep -Fw "" && fail || ok
|
||||
|
||||
U=https://bugzilla.redhat.com/show_bug.cgi?id=140781
|
||||
printf 'fgrep hangs on binary files: '
|
||||
printf 'grep -F hangs on binary files: '
|
||||
returns_ 124 timeout 10 grep -F grep "$abs_top_builddir/src/grep" \
|
||||
> /dev/null && fail || ok
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# With multiple matches, grep -Fo could print a shorter one.
|
||||
# This bug affected grep versions 2.26 through 2.27.
|
||||
#
|
||||
# Copyright (C) 2017-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2017-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# grep -F -f pattern_file file
|
||||
# grep -G -f pattern_file file
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# file or line number from which the offending regular expression came.
|
||||
# With 2.26, now, each such diagnostic has a "FILENAME:LINENO: " prefix.
|
||||
|
||||
# Copyright (C) 2016-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2016-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -101,13 +101,13 @@ my @Tests =
|
||||
],
|
||||
['invalid-re-P-paren', '-P ")"', {EXIT=>2},
|
||||
{ERR => $ENV{PCRE_WORKS} == 1
|
||||
? "$prog: unmatched parentheses\n"
|
||||
? "$prog: unmatched closing parenthesis\n"
|
||||
: $no_pcre
|
||||
},
|
||||
],
|
||||
['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2},
|
||||
{ERR => $ENV{PCRE_WORKS} == 1
|
||||
? "$prog: unmatched parentheses\n"
|
||||
? "$prog: unmatched closing parenthesis\n"
|
||||
: $no_pcre
|
||||
},
|
||||
],
|
||||
|
||||
11
tests/fillbuf-long-line
Executable file
11
tests/fillbuf-long-line
Executable file
@ -0,0 +1,11 @@
|
||||
#!/bin/sh
|
||||
# This would fail for v3.7-15-ge3694e9 .. grep-v3.7-48-g5c3c427
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
|
||||
printf %0104681d 0 > in || framework_failure_
|
||||
|
||||
fail=0
|
||||
|
||||
returns_ 1 grep xx in || fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -1,5 +1,5 @@
|
||||
#! /bin/sh
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -10,7 +10,7 @@
|
||||
cz=cs_CZ.UTF-8
|
||||
|
||||
# If cs_CZ.UTF-8 locale doesn't work, skip this test.
|
||||
LC_ALL=$cz locale -k LC_CTYPE 2>/dev/null | grep -q charmap.*UTF-8 \
|
||||
test "`LC_ALL=$cz locale charmap 2>/dev/null`" = UTF-8 \
|
||||
|| skip_ this system lacks the $cz locale
|
||||
|
||||
# If matching is done in single-byte mode, skip this test too
|
||||
@ -53,21 +53,21 @@ EOF
|
||||
for mode in F G E; do
|
||||
|
||||
test1=$(echo $(LC_ALL=$cz grep -${mode} -f cspatfile csinput |
|
||||
tr -cs '0-9' '[ *]'))
|
||||
tr '\n' ' ' | tr -cd '0-9 '))
|
||||
if test "$test1" != "11 12 13 14 15 16 17 18"; then
|
||||
echo "Test #1 ${mode} failed: $test1"
|
||||
failures=1
|
||||
fi
|
||||
|
||||
test2=$(echo $(LC_ALL=$cz grep -${mode}i -f cspatfile csinput |
|
||||
tr -cs '0-9' '[ *]'))
|
||||
tr '\n' ' ' | tr -cd '0-9 '))
|
||||
if test "$test2" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
|
||||
echo "Test #2 ${mode} failed: $test2"
|
||||
failures=1
|
||||
fi
|
||||
|
||||
test3=$(echo $(LC_ALL=$cz grep -${mode}i -e 'ČÍšE' -e 'Čas' csinput |
|
||||
tr -cs '0-9' '[ *]'))
|
||||
tr '\n' ' ' | tr -cd '0-9 '))
|
||||
if test "$test3" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
|
||||
echo "Test #3 ${mode} failed: $test3"
|
||||
failures=1
|
||||
@ -115,7 +115,7 @@ done
|
||||
for mode in G E; do
|
||||
|
||||
test8=$(echo $(LC_ALL=$cz grep -${mode}i -e 'Č.šE' -e 'Č[a-f]s' csinput |
|
||||
tr -cs '0-9' '[ *]'))
|
||||
tr '\n' ' ' | tr -cd '0-9 '))
|
||||
if test "$test8" != "01 02 07 08 10 11 12 13 14 15 16 17 18 19 20"; then
|
||||
echo "Test #8 ${mode} failed: $test8"
|
||||
failures=1
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test various combinations of command-line options.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -150,7 +150,7 @@ Exit $failures
|
||||
# The rest of this file is meant to be executed under this locale.
|
||||
LC_ALL=cs_CZ.UTF-8; export LC_ALL
|
||||
# If the UTF-8 locale doesn't work, skip these tests silently.
|
||||
locale -k LC_CTYPE 2>/dev/null | grep -q "charmap.*UTF-8" || Exit $failures
|
||||
test "`locale charmap 2>/dev/null`" = UTF-8 || Exit $failures
|
||||
|
||||
# Test character class erroneously matching a '[' character.
|
||||
grep_test "[/" "" "[[:alpha:]]" -E
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Auxiliary program to detect support for a locale.
|
||||
Copyright 2010-2021 Free Software Foundation, Inc.
|
||||
Copyright 2010-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,17 +12,13 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "getprogname.h"
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
|
||||
30
tests/glibc-infloop
Executable file
30
tests/glibc-infloop
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
# This would infloop when using glibc's regex at least until glibc-2.36.
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
|
||||
require_timeout_
|
||||
require_en_utf8_locale_
|
||||
|
||||
fail=0
|
||||
|
||||
cat <<\EOF > glibc-check.c
|
||||
#include <features.h>
|
||||
#ifdef __GLIBC__
|
||||
int ok;
|
||||
#else
|
||||
# error "not glibc"
|
||||
#endif
|
||||
EOF
|
||||
$CC -c glibc-check.c && glibc=1 || glibc=0
|
||||
|
||||
grep '^#define USE_INCLUDED_REGEX 1' "$CONFIG_HEADER" \
|
||||
&& included_regex=1 || included_regex=0
|
||||
|
||||
case $glibc:$included_regex in
|
||||
0:0) skip_ 'runs only with glibc or when built with the included regex'
|
||||
esac
|
||||
|
||||
echo a > in || framework_failure_
|
||||
timeout 2 env LC_ALL=en_US.UTF-8 grep -E -w '((()|a)|())*' in || fail=1
|
||||
|
||||
Exit $fail
|
||||
184
tests/hangul-syllable
Executable file
184
tests/hangul-syllable
Executable file
@ -0,0 +1,184 @@
|
||||
#!/bin/sh
|
||||
# grep 3.4 through 3.7 mishandled matching '.' against the valid UTF-8
|
||||
# sequences (ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF,
|
||||
# which are some Hangul Syllables and Hangul Jamo Extended-B. They
|
||||
# also mishandled (F4)(88-8F)(80-BF)(80-BF) which correspond to
|
||||
# U+108000 through U+10FFFF (Supplemental Private Use Area plane B).
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
|
||||
require_en_utf8_locale_
|
||||
|
||||
LC_ALL=en_US.UTF-8
|
||||
export LC_ALL
|
||||
|
||||
# Check that '.' completely matches $1, i.e., that $1 is a single UTF-8 char.
|
||||
check_char ()
|
||||
{
|
||||
printf "$1\\n" >in || framework_failure_
|
||||
|
||||
grep $2 '^.$' in >out || fail=1
|
||||
cmp in out || fail=1
|
||||
}
|
||||
|
||||
# Check that '.*' does not completely match $1, i.e., that
|
||||
# $1 contains an encoding error.
|
||||
check_nonchar ()
|
||||
{
|
||||
printf "$1\\n" >in || framework_failure_
|
||||
|
||||
grep -a -v '^.*$' in >out || fail=1
|
||||
cmp in out || fail=1
|
||||
}
|
||||
|
||||
fail=0
|
||||
|
||||
# "." should match U+D45C HANGUL SYLLABLE PYO.
|
||||
check_char '\355\221\234'
|
||||
|
||||
# Check boundary-condition characters, and non-characters,
|
||||
# while we are at it.
|
||||
|
||||
check_char '\0' -a
|
||||
check_char '\177'
|
||||
check_nonchar '\200'
|
||||
check_nonchar '\277'
|
||||
check_nonchar '\300\200'
|
||||
check_nonchar '\301\277'
|
||||
|
||||
for i in 302 337; do
|
||||
for j in 200 277; do
|
||||
check_char "\\$i\\$j"
|
||||
done
|
||||
for j in 177 300; do
|
||||
check_nonchar "\\$i\\$j"
|
||||
done
|
||||
done
|
||||
for i in 340; do
|
||||
for j in 240 277; do
|
||||
for k in 200 277; do
|
||||
check_char "\\$i\\$j\\$k"
|
||||
done
|
||||
for k in 177 300; do
|
||||
check_nonchar "\\$i\\$j\\$k"
|
||||
done
|
||||
done
|
||||
for j in 239 300; do
|
||||
for k in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k"
|
||||
done
|
||||
done
|
||||
done
|
||||
for i in 341 354 356 357; do
|
||||
for j in 200 277; do
|
||||
for k in 200 277; do
|
||||
check_char "\\$i\\$j\\$k"
|
||||
done
|
||||
for k in 177 300; do
|
||||
check_nonchar "\\$i\\$j\\$k"
|
||||
done
|
||||
done
|
||||
for j in 177 300; do
|
||||
for k in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k"
|
||||
done
|
||||
done
|
||||
done
|
||||
for i in 355; do
|
||||
for j in 200 237; do
|
||||
for k in 200 277; do
|
||||
check_char "\\$i\\$j\\$k"
|
||||
done
|
||||
for k in 177 300; do
|
||||
check_nonchar "\\$i\\$j\\$k"
|
||||
done
|
||||
done
|
||||
for j in 177 240; do
|
||||
for k in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k"
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
# On platforms like 32-bit AIX where WCHAR_MAX == 0xFFFF, skip checks
|
||||
# where the corresponding Unicode characters are not supported.
|
||||
if test $fail -eq 0; then
|
||||
printf '\360\220\200\200\n' >in || framework_failure_
|
||||
grep '^.$' in >out 2>&1 || fail=1
|
||||
cmp in out || skip_ 'platform does not support U+10000'
|
||||
fi
|
||||
|
||||
for i in 360; do
|
||||
for j in 220 277; do
|
||||
for k in 200 277; do
|
||||
for l in 200 277; do
|
||||
check_char "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
for l in 177 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
for k in 177 300; do
|
||||
for l in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
done
|
||||
for j in 217 300; do
|
||||
for k in 177 200 277 300; do
|
||||
for l in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
for i in 361 363; do
|
||||
for j in 200 277; do
|
||||
for k in 200 277; do
|
||||
for l in 200 277; do
|
||||
check_char "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
for l in 177 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
for k in 177 300; do
|
||||
for l in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
done
|
||||
for j in 177 300; do
|
||||
for k in 177 200 277 300; do
|
||||
for l in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
for i in 364; do
|
||||
for j in 200 217; do
|
||||
for k in 200 277; do
|
||||
for l in 200 277; do
|
||||
check_char "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
for l in 177 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
for k in 177 300; do
|
||||
for l in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
done
|
||||
for j in 177 220; do
|
||||
for k in 177 200 277 300; do
|
||||
for l in 177 200 277 300; do
|
||||
check_nonchar "\\$i\\$j\\$k\\$l"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
Exit $fail
|
||||
@ -2,7 +2,7 @@
|
||||
# Test for this performance regression:
|
||||
# grep-3.5 and 3.6 would take O(N^2) time for some sets of input regexps.
|
||||
|
||||
# Copyright 2020-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2020-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -21,6 +21,8 @@
|
||||
|
||||
fail=0
|
||||
|
||||
require_perl_
|
||||
|
||||
: > empty || framework_failure_
|
||||
|
||||
# Construct a test case that consumes enough CPU time that we don't
|
||||
@ -32,6 +34,9 @@ while :; do
|
||||
small_ms=$(LC_ALL=C user_time_ 1 grep --file=in empty) || fail=1
|
||||
test $small_ms -ge 200 && break
|
||||
n_pat=$(expr $n_pat '*' 2)
|
||||
case $n_pat:$small_ms in
|
||||
640000:0) skip_ 'user_time_ appears always to report 0 elapsed ms';;
|
||||
esac
|
||||
done
|
||||
|
||||
# Now, search for those same digits mapped to A-J.
|
||||
@ -42,7 +47,6 @@ large_ms=$(LC_ALL=C user_time_ 1 grep --file=in empty) || fail=1
|
||||
# Deliberately recording in an unused variable so it
|
||||
# shows up in set -x output, in case this test fails.
|
||||
ratio=$(expr "$large_ms" / "$small_ms")
|
||||
warn_ ratio=$ratio
|
||||
|
||||
# The duration of the latter run must be no more than 10 times
|
||||
# that of the former. Using recent versions prior to this fix,
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Make sure all of these programs work properly
|
||||
# when invoked with --help or --version.
|
||||
|
||||
# Copyright (C) 2000-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2000-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Exercise high-bit-set unibyte-in-[...]-range bug.
|
||||
|
||||
# Copyright (C) 2011-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -48,10 +48,18 @@ require_timeout_()
|
||||
|
||||
require_pcre_()
|
||||
{
|
||||
echo . | grep -P . 2>err || {
|
||||
test $? -eq 1 && fail_ PCRE available, but does not work.
|
||||
skip_ no PCRE support
|
||||
}
|
||||
case $LC_ALL in
|
||||
*.UTF-8)
|
||||
printf '\303\241\n' | grep -P '^.$' 2>err || {
|
||||
test $? -eq 1 && fail_ PCRE available, but does not work
|
||||
skip_ no PCRE Unicode support
|
||||
};;
|
||||
*)
|
||||
echo . | grep -P '^.$' 2>err || {
|
||||
test $? -eq 1 && fail_ PCRE available, but does not work.
|
||||
skip_ no PCRE support
|
||||
};;
|
||||
esac
|
||||
compare /dev/null err || fail_ PCRE available, but stderr not empty.
|
||||
}
|
||||
|
||||
@ -138,6 +146,13 @@ require_JP_EUC_locale_()
|
||||
skip_ "$locale locale not found"
|
||||
}
|
||||
|
||||
# Skip the current test if we lack Perl.
|
||||
require_perl_()
|
||||
{
|
||||
test "$PERL" && $PERL -e 'use warnings' > /dev/null 2>&1 \
|
||||
|| skip_ 'configure did not find a usable version of Perl'
|
||||
}
|
||||
|
||||
expensive_()
|
||||
{
|
||||
if test "$RUN_EXPENSIVE_TESTS" != yes; then
|
||||
|
||||
683
tests/init.sh
683
tests/init.sh
@ -1,683 +0,0 @@
|
||||
# source this file; set up for tests
|
||||
|
||||
# Copyright (C) 2009-2021 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# Using this file in a test
|
||||
# =========================
|
||||
#
|
||||
# The typical skeleton of a test looks like this:
|
||||
#
|
||||
# #!/bin/sh
|
||||
# . "${srcdir=.}/init.sh"; path_prepend_ .
|
||||
# Execute some commands.
|
||||
# Note that these commands are executed in a subdirectory, therefore you
|
||||
# need to prepend "../" to relative filenames in the build directory.
|
||||
# Note that the "path_prepend_ ." is useful only if the body of your
|
||||
# test invokes programs residing in the initial directory.
|
||||
# For example, if the programs you want to test are in src/, and this test
|
||||
# script is named tests/test-1, then you would use "path_prepend_ ../src",
|
||||
# or perhaps export PATH='$(abs_top_builddir)/src$(PATH_SEPARATOR)'"$$PATH"
|
||||
# to all tests via automake's TESTS_ENVIRONMENT.
|
||||
# Set the exit code 0 for success, 77 for skipped, or 1 or other for failure.
|
||||
# Use the skip_ and fail_ functions to print a diagnostic and then exit
|
||||
# with the corresponding exit code.
|
||||
# Exit $?
|
||||
|
||||
# Executing a test that uses this file
|
||||
# ====================================
|
||||
#
|
||||
# Running a single test:
|
||||
# $ make check TESTS=test-foo.sh
|
||||
#
|
||||
# Running a single test, with verbose output:
|
||||
# $ make check TESTS=test-foo.sh VERBOSE=yes
|
||||
#
|
||||
# Running a single test, keeping the temporary directory:
|
||||
# $ make check TESTS=test-foo.sh KEEP=yes
|
||||
#
|
||||
# Running a single test, with single-stepping:
|
||||
# 1. Go into a sub-shell:
|
||||
# $ bash
|
||||
# 2. Set relevant environment variables from TESTS_ENVIRONMENT in the
|
||||
# Makefile:
|
||||
# $ export srcdir=../../tests # this is an example
|
||||
# 3. Execute the commands from the test, copy&pasting them one by one:
|
||||
# $ . "$srcdir/init.sh"; path_prepend_ .
|
||||
# ...
|
||||
# 4. Finally
|
||||
# $ exit
|
||||
|
||||
# =============================================================================
|
||||
# Elementary diagnostics
|
||||
|
||||
ME_=`expr "./$0" : '.*/\(.*\)$'`
|
||||
|
||||
# Prepare PATH_SEPARATOR.
|
||||
# The user is always right.
|
||||
if test "${PATH_SEPARATOR+set}" != set; then
|
||||
# Determine PATH_SEPARATOR by trying to find /bin/sh in a PATH which
|
||||
# contains only /bin. Note that ksh looks also at the FPATH variable,
|
||||
# so we have to set that as well for the test.
|
||||
PATH_SEPARATOR=:
|
||||
(PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 \
|
||||
&& { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 \
|
||||
|| PATH_SEPARATOR=';'
|
||||
}
|
||||
fi
|
||||
|
||||
# We use a trap below for cleanup. This requires us to go through
|
||||
# hoops to get the right exit status transported through the handler.
|
||||
# So use 'Exit STATUS' instead of 'exit STATUS' inside of the tests.
|
||||
# Turn off errexit here so that we don't trip the bug with OSF1/Tru64
|
||||
# sh inside this function.
|
||||
Exit () { set +e; (exit $1); exit $1; }
|
||||
|
||||
# Print warnings (e.g., about skipped and failed tests) to this file number.
|
||||
# Override by defining to say, 9, in init.cfg, and putting say,
|
||||
# export ...ENVVAR_SETTINGS...; $(SHELL) 9>&2
|
||||
# in the definition of TESTS_ENVIRONMENT in your tests/Makefile.am file.
|
||||
# This is useful when using automake's parallel tests mode, to print
|
||||
# the reason for skip/failure to console, rather than to the .log files.
|
||||
: ${stderr_fileno_=2}
|
||||
|
||||
# Note that correct expansion of "$*" depends on IFS starting with ' '.
|
||||
# Always write the full diagnostic to stderr.
|
||||
# When stderr_fileno_ is not 2, also emit the first line of the
|
||||
# diagnostic to that file descriptor.
|
||||
warn_ ()
|
||||
{
|
||||
# If IFS does not start with ' ', set it and emit the warning in a subshell.
|
||||
case $IFS in
|
||||
' '*) printf '%s\n' "$*" >&2
|
||||
test $stderr_fileno_ = 2 \
|
||||
|| { printf '%s\n' "$*" | sed 1q >&$stderr_fileno_ ; } ;;
|
||||
*) (IFS=' '; warn_ "$@");;
|
||||
esac
|
||||
}
|
||||
fail_ () { warn_ "$ME_: failed test: $@"; Exit 1; }
|
||||
skip_ () { warn_ "$ME_: skipped test: $@"; Exit 77; }
|
||||
fatal_ () { warn_ "$ME_: hard error: $@"; Exit 99; }
|
||||
framework_failure_ () { warn_ "$ME_: set-up failure: $@"; Exit 99; }
|
||||
|
||||
# =============================================================================
|
||||
# Ensure the shell supports modern syntax.
|
||||
|
||||
# Sanitize this shell to POSIX mode, if possible.
|
||||
DUALCASE=1; export DUALCASE
|
||||
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||
emulate sh
|
||||
NULLCMD=:
|
||||
alias -g '${1+"$@"}'='"$@"'
|
||||
setopt NO_GLOB_SUBST
|
||||
else
|
||||
case `(set -o) 2>/dev/null` in
|
||||
*posix*) set -o posix ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# We require $(...) support unconditionally.
|
||||
# We require that the printf built-in work correctly regarding octal escapes;
|
||||
# this eliminates /bin/sh on AIX 7.2.
|
||||
# We require non-surprising "local" semantics (this eliminates dash).
|
||||
# This takes the admittedly draconian step of eliminating dash, because the
|
||||
# assignment tab=$(printf '\t') works fine, yet preceding it with "local "
|
||||
# transforms it into an assignment that sets the variable to the empty string.
|
||||
# That is too counter-intuitive, and can lead to subtle run-time malfunction.
|
||||
# The example below is less subtle in that with dash, it evokes the run-time
|
||||
# exception "dash: 1: local: 1: bad variable name".
|
||||
# We require a few additional shell features only when $EXEEXT is nonempty,
|
||||
# in order to support automatic $EXEEXT emulation:
|
||||
# - hyphen-containing alias names
|
||||
# - we prefer to use ${var#...} substitution, rather than having
|
||||
# to work around lack of support for that feature.
|
||||
# The following code attempts to find a shell with support for these features.
|
||||
# If the current shell passes the test, we're done. Otherwise, test other
|
||||
# shells until we find one that passes. If one is found, re-exec it.
|
||||
# If no acceptable shell is found, skip the current test.
|
||||
#
|
||||
# The "...set -x; P=1 true 2>err..." test is to disqualify any shell that
|
||||
# emits "P=1" into err, as /bin/sh from SunOS 5.11 and OpenBSD 4.7 do.
|
||||
#
|
||||
# Use "9" to indicate success (rather than 0), in case some shell acts
|
||||
# like Solaris 10's /bin/sh but exits successfully instead of with status 2.
|
||||
|
||||
# Eval this code in a subshell to determine a shell's suitability.
|
||||
# 10 - passes all tests; ok to use
|
||||
# 9 - ok, but enabling "set -x" corrupts app stderr; prefer higher score
|
||||
# ? - not ok
|
||||
gl_shell_test_script_='
|
||||
test $(echo y) = y || exit 1
|
||||
LC_ALL=en_US.UTF-8 printf "\\351" 2>/dev/null \
|
||||
| LC_ALL=C tr "\\351" x | LC_ALL=C grep "^x$" > /dev/null \
|
||||
|| exit 1
|
||||
printf "\\351" 2>/dev/null \
|
||||
| LC_ALL=C tr "\\351" x | LC_ALL=C grep "^x$" > /dev/null \
|
||||
|| exit 1
|
||||
f_local_() { local v=1; }; f_local_ || exit 1
|
||||
f_dash_local_fail_() { local t=$(printf " 1"); }; f_dash_local_fail_
|
||||
score_=10
|
||||
if test "$VERBOSE" = yes; then
|
||||
test -n "$( (exec 3>&1; set -x; P=1 true 2>&3) 2> /dev/null)" && score_=9
|
||||
fi
|
||||
test -z "$EXEEXT" && exit $score_
|
||||
shopt -s expand_aliases
|
||||
alias a-b="echo zoo"
|
||||
v=abx
|
||||
test ${v%x} = ab \
|
||||
&& test ${v#a} = bx \
|
||||
&& test $(a-b) = zoo \
|
||||
&& exit $score_
|
||||
'
|
||||
|
||||
if test "x$1" = "x--no-reexec"; then
|
||||
shift
|
||||
else
|
||||
# Assume a working shell. Export to subshells (setup_ needs this).
|
||||
gl_set_x_corrupts_stderr_=false
|
||||
export gl_set_x_corrupts_stderr_
|
||||
|
||||
# Record the first marginally acceptable shell.
|
||||
marginal_=
|
||||
|
||||
# Search for a shell that meets our requirements.
|
||||
for re_shell_ in __current__ "${CONFIG_SHELL:-no_shell}" \
|
||||
/bin/sh bash dash zsh pdksh fail
|
||||
do
|
||||
test "$re_shell_" = no_shell && continue
|
||||
|
||||
# If we've made it all the way to the sentinel, "fail" without
|
||||
# finding even a marginal shell, skip this test.
|
||||
if test "$re_shell_" = fail; then
|
||||
test -z "$marginal_" && skip_ failed to find an adequate shell
|
||||
re_shell_=$marginal_
|
||||
break
|
||||
fi
|
||||
|
||||
# When testing the current shell, simply "eval" the test code.
|
||||
# Otherwise, run it via $re_shell_ -c ...
|
||||
if test "$re_shell_" = __current__; then
|
||||
# 'eval'ing this code makes Solaris 10's /bin/sh exit with
|
||||
# $? set to 2. It does not evaluate any of the code after the
|
||||
# "unexpected" first '('. Thus, we must run it in a subshell.
|
||||
( eval "$gl_shell_test_script_" ) > /dev/null 2>&1
|
||||
else
|
||||
"$re_shell_" -c "$gl_shell_test_script_" 2>/dev/null
|
||||
fi
|
||||
|
||||
st_=$?
|
||||
|
||||
# $re_shell_ works just fine. Use it.
|
||||
if test $st_ = 10; then
|
||||
gl_set_x_corrupts_stderr_=false
|
||||
break
|
||||
fi
|
||||
|
||||
# If this is our first marginally acceptable shell, remember it.
|
||||
if test "$st_:$marginal_" = 9: ; then
|
||||
marginal_="$re_shell_"
|
||||
gl_set_x_corrupts_stderr_=true
|
||||
fi
|
||||
done
|
||||
|
||||
if test "$re_shell_" != __current__; then
|
||||
# Found a usable shell. Preserve -v and -x.
|
||||
case $- in
|
||||
*v*x* | *x*v*) opts_=-vx ;;
|
||||
*v*) opts_=-v ;;
|
||||
*x*) opts_=-x ;;
|
||||
*) opts_= ;;
|
||||
esac
|
||||
re_shell=$re_shell_
|
||||
export re_shell
|
||||
exec "$re_shell_" $opts_ "$0" --no-reexec "$@"
|
||||
echo "$ME_: exec failed" 1>&2
|
||||
exit 127
|
||||
fi
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Ensure the shell behaves reasonably.
|
||||
|
||||
# If this is bash, turn off all aliases.
|
||||
test -n "$BASH_VERSION" && unalias -a
|
||||
|
||||
# Note that when supporting $EXEEXT (transparently mapping from PROG_NAME to
|
||||
# PROG_NAME.exe), we want to support hyphen-containing names like test-acos.
|
||||
# That is part of the shell-selection test above. Why use aliases rather
|
||||
# than functions? Because support for hyphen-containing aliases is more
|
||||
# widespread than that for hyphen-containing function names.
|
||||
test -n "$EXEEXT" && test -n "$BASH_VERSION" && shopt -s expand_aliases
|
||||
|
||||
# =============================================================================
|
||||
# Creating a temporary directory (needed by the core test framework)
|
||||
|
||||
# Create a temporary directory, much like mktemp -d does.
|
||||
# Written by Jim Meyering.
|
||||
#
|
||||
# Usage: mktempd_ /tmp phoey.XXXXXXXXXX
|
||||
#
|
||||
# First, try to use the mktemp program.
|
||||
# Failing that, we'll roll our own mktemp-like function:
|
||||
# - try to get random bytes from /dev/urandom
|
||||
# - failing that, generate output from a combination of quickly-varying
|
||||
# sources and gzip. Ignore non-varying gzip header, and extract
|
||||
# "random" bits from there.
|
||||
# - given those bits, map to file-name bytes using tr, and try to create
|
||||
# the desired directory.
|
||||
# - make only $MAX_TRIES_ attempts
|
||||
|
||||
# Helper function. Print $N pseudo-random bytes from a-zA-Z0-9.
|
||||
rand_bytes_ ()
|
||||
{
|
||||
n_=$1
|
||||
|
||||
# Maybe try openssl rand -base64 $n_prime_|tr '+/=\012' abcd first?
|
||||
# But if they have openssl, they probably have mktemp, too.
|
||||
|
||||
chars_=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
|
||||
dev_rand_=/dev/urandom
|
||||
if test -r "$dev_rand_"; then
|
||||
# Note: 256-length($chars_) == 194; 3 copies of $chars_ is 186 + 8 = 194.
|
||||
dd ibs=$n_ count=1 if=$dev_rand_ 2>/dev/null \
|
||||
| LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_
|
||||
return
|
||||
fi
|
||||
|
||||
n_plus_50_=`expr $n_ + 50`
|
||||
cmds_='date; date +%N; free; who -a; w; ps auxww; ps -ef'
|
||||
data_=` (eval "$cmds_") 2>&1 | gzip `
|
||||
|
||||
# Ensure that $data_ has length at least 50+$n_
|
||||
while :; do
|
||||
len_=`echo "$data_"|wc -c`
|
||||
test $n_plus_50_ -le $len_ && break;
|
||||
data_=` (echo "$data_"; eval "$cmds_") 2>&1 | gzip `
|
||||
done
|
||||
|
||||
echo "$data_" \
|
||||
| dd bs=1 skip=50 count=$n_ 2>/dev/null \
|
||||
| LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_
|
||||
}
|
||||
|
||||
mktempd_ ()
|
||||
{
|
||||
case $# in
|
||||
2);;
|
||||
*) fail_ "Usage: mktempd_ DIR TEMPLATE";;
|
||||
esac
|
||||
|
||||
destdir_=$1
|
||||
template_=$2
|
||||
|
||||
MAX_TRIES_=4
|
||||
|
||||
# Disallow any trailing slash on specified destdir:
|
||||
# it would subvert the post-mktemp "case"-based destdir test.
|
||||
case $destdir_ in
|
||||
/ | //) destdir_slash_=$destdir;;
|
||||
*/) fail_ "invalid destination dir: remove trailing slash(es)";;
|
||||
*) destdir_slash_=$destdir_/;;
|
||||
esac
|
||||
|
||||
case $template_ in
|
||||
*XXXX) ;;
|
||||
*) fail_ \
|
||||
"invalid template: $template_ (must have a suffix of at least 4 X's)";;
|
||||
esac
|
||||
|
||||
# First, try to use mktemp.
|
||||
d=`unset TMPDIR; { mktemp -d -t -p "$destdir_" "$template_"; } 2>/dev/null` &&
|
||||
|
||||
# The resulting name must be in the specified directory.
|
||||
case $d in "$destdir_slash_"*) :;; *) false;; esac &&
|
||||
|
||||
# It must have created the directory.
|
||||
test -d "$d" &&
|
||||
|
||||
# It must have 0700 permissions. Handle sticky "S" bits.
|
||||
perms=`ls -dgo "$d" 2>/dev/null` &&
|
||||
case $perms in drwx--[-S]---*) :;; *) false;; esac && {
|
||||
echo "$d"
|
||||
return
|
||||
}
|
||||
|
||||
# If we reach this point, we'll have to create a directory manually.
|
||||
|
||||
# Get a copy of the template without its suffix of X's.
|
||||
base_template_=`echo "$template_"|sed 's/XX*$//'`
|
||||
|
||||
# Calculate how many X's we've just removed.
|
||||
template_length_=`echo "$template_" | wc -c`
|
||||
nx_=`echo "$base_template_" | wc -c`
|
||||
nx_=`expr $template_length_ - $nx_`
|
||||
|
||||
err_=
|
||||
i_=1
|
||||
while :; do
|
||||
X_=`rand_bytes_ $nx_`
|
||||
candidate_dir_="$destdir_slash_$base_template_$X_"
|
||||
err_=`mkdir -m 0700 "$candidate_dir_" 2>&1` \
|
||||
&& { echo "$candidate_dir_"; return; }
|
||||
test $MAX_TRIES_ -le $i_ && break;
|
||||
i_=`expr $i_ + 1`
|
||||
done
|
||||
fail_ "$err_"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Core test framework
|
||||
|
||||
# An arbitrary prefix to help distinguish test directories.
|
||||
testdir_prefix_ () { printf gt; }
|
||||
|
||||
# Set up the environment for the test to run in.
|
||||
setup_ ()
|
||||
{
|
||||
if test "$VERBOSE" = yes; then
|
||||
# Test whether set -x may cause the selected shell to corrupt an
|
||||
# application's stderr. Many do, including zsh-4.3.10 and the /bin/sh
|
||||
# from SunOS 5.11, OpenBSD 4.7 and Irix 6.5.
|
||||
# If enabling verbose output this way would cause trouble, simply
|
||||
# issue a warning and refrain.
|
||||
if $gl_set_x_corrupts_stderr_; then
|
||||
warn_ "using SHELL=$SHELL with 'set -x' corrupts stderr"
|
||||
else
|
||||
set -x
|
||||
fi
|
||||
fi
|
||||
|
||||
initial_cwd_=$PWD
|
||||
|
||||
# Create and enter the temporary directory.
|
||||
pfx_=`testdir_prefix_`
|
||||
test_dir_=`mktempd_ "$initial_cwd_" "$pfx_-$ME_.XXXX"` \
|
||||
|| fail_ "failed to create temporary directory in $initial_cwd_"
|
||||
cd "$test_dir_" || fail_ "failed to cd to temporary directory"
|
||||
# Set variables srcdir, builddir, for the convenience of the test.
|
||||
case $srcdir in
|
||||
/* | ?:*) ;;
|
||||
*) srcdir="../$srcdir" ;;
|
||||
esac
|
||||
builddir=".."
|
||||
export srcdir builddir
|
||||
|
||||
# As autoconf-generated configure scripts do, ensure that IFS
|
||||
# is defined initially, so that saving and restoring $IFS works.
|
||||
gl_init_sh_nl_='
|
||||
'
|
||||
IFS=" "" $gl_init_sh_nl_"
|
||||
|
||||
# This trap statement, along with a trap on 0 below, ensure that the
|
||||
# temporary directory, $test_dir_, is removed upon exit as well as
|
||||
# upon receipt of any of the listed signals.
|
||||
for sig_ in 1 2 3 13 15; do
|
||||
eval "trap 'Exit $(expr $sig_ + 128)' $sig_"
|
||||
done
|
||||
}
|
||||
|
||||
# This is a stub function that is run upon trap (upon regular exit and
|
||||
# interrupt). Override it with a per-test function, e.g., to unmount
|
||||
# a partition, or to undo any other global state changes.
|
||||
cleanup_ () { :; }
|
||||
|
||||
# Run the user-overridable cleanup_ function, remove the temporary
|
||||
# directory and exit with the incoming value of $?.
|
||||
remove_tmp_ ()
|
||||
{
|
||||
__st=$?
|
||||
cleanup_
|
||||
if test "$KEEP" = yes; then
|
||||
echo "Not removing temporary directory $test_dir_"
|
||||
else
|
||||
# cd out of the directory we're about to remove
|
||||
cd "$initial_cwd_" || cd / || cd /tmp
|
||||
chmod -R u+rwx "$test_dir_"
|
||||
# If removal fails and exit status was to be 0, then change it to 1.
|
||||
rm -rf "$test_dir_" || { test $__st = 0 && __st=1; }
|
||||
fi
|
||||
exit $__st
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Prepending directories to PATH
|
||||
|
||||
# Given a directory name, DIR, if every entry in it that matches *.exe
|
||||
# contains only the specified bytes (see the case stmt below), then print
|
||||
# a space-separated list of those names and return 0. Otherwise, don't
|
||||
# print anything and return 1. Naming constraints apply also to DIR.
|
||||
find_exe_basenames_ ()
|
||||
{
|
||||
feb_dir_=$1
|
||||
feb_fail_=0
|
||||
feb_result_=
|
||||
feb_sp_=
|
||||
for feb_file_ in $feb_dir_/*.exe; do
|
||||
# If there was no *.exe file, or there existed a file named "*.exe" that
|
||||
# was deleted between the above glob expansion and the existence test
|
||||
# below, just skip it.
|
||||
test "x$feb_file_" = "x$feb_dir_/*.exe" && test ! -f "$feb_file_" \
|
||||
&& continue
|
||||
# Exempt [.exe, since we can't create a function by that name, yet
|
||||
# we can't invoke [ by PATH search anyways due to shell builtins.
|
||||
test "x$feb_file_" = "x$feb_dir_/[.exe" && continue
|
||||
case $feb_file_ in
|
||||
*[!-a-zA-Z/0-9_.+]*) feb_fail_=1; break;;
|
||||
*) # Remove leading file name components as well as the .exe suffix.
|
||||
feb_file_=${feb_file_##*/}
|
||||
feb_file_=${feb_file_%.exe}
|
||||
feb_result_="$feb_result_$feb_sp_$feb_file_";;
|
||||
esac
|
||||
feb_sp_=' '
|
||||
done
|
||||
test $feb_fail_ = 0 && printf %s "$feb_result_"
|
||||
return $feb_fail_
|
||||
}
|
||||
|
||||
# Consider the files in directory, $1.
|
||||
# For each file name of the form PROG.exe, create an alias named
|
||||
# PROG that simply invokes PROG.exe, then return 0. If any selected
|
||||
# file name or the directory name, $1, contains an unexpected character,
|
||||
# define no alias and return 1.
|
||||
create_exe_shims_ ()
|
||||
{
|
||||
case $EXEEXT in
|
||||
'') return 0 ;;
|
||||
.exe) ;;
|
||||
*) echo "$0: unexpected \$EXEEXT value: $EXEEXT" 1>&2; return 1 ;;
|
||||
esac
|
||||
|
||||
base_names_=`find_exe_basenames_ $1` \
|
||||
|| { echo "$0 (exe_shim): skipping directory: $1" 1>&2; return 0; }
|
||||
|
||||
if test -n "$base_names_"; then
|
||||
for base_ in $base_names_; do
|
||||
alias "$base_"="$base_$EXEEXT"
|
||||
done
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Use this function to prepend to PATH an absolute name for each
|
||||
# specified, possibly-$initial_cwd_-relative, directory.
|
||||
path_prepend_ ()
|
||||
{
|
||||
while test $# != 0; do
|
||||
path_dir_=$1
|
||||
case $path_dir_ in
|
||||
'') fail_ "invalid path dir: '$1'";;
|
||||
/* | ?:*) abs_path_dir_=$path_dir_;;
|
||||
*) abs_path_dir_=$initial_cwd_/$path_dir_;;
|
||||
esac
|
||||
case $abs_path_dir_ in
|
||||
*$PATH_SEPARATOR*) fail_ "invalid path dir: '$abs_path_dir_'";;
|
||||
esac
|
||||
PATH="$abs_path_dir_$PATH_SEPARATOR$PATH"
|
||||
|
||||
# Create an alias, FOO, for each FOO.exe in this directory.
|
||||
create_exe_shims_ "$abs_path_dir_" \
|
||||
|| fail_ "something failed (above): $abs_path_dir_"
|
||||
shift
|
||||
done
|
||||
export PATH
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Convenience environment variables for the tests
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Enable glibc's malloc-perturbing option.
|
||||
# This is useful for exposing code that depends on the fact that
|
||||
# malloc-related functions often return memory that is mostly zeroed.
|
||||
# If you have the time and cycles, use valgrind to do an even better job.
|
||||
: ${MALLOC_PERTURB_=87}
|
||||
export MALLOC_PERTURB_
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# The interpreter for Bourne-shell scripts.
|
||||
# No special standards compatibility requirements.
|
||||
# Some environments, such as Android, don't have /bin/sh.
|
||||
if test -f /bin/sh$EXEEXT; then
|
||||
BOURNE_SHELL=/bin/sh
|
||||
else
|
||||
BOURNE_SHELL=sh
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Convenience functions for the tests
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Return value checking
|
||||
|
||||
# This is used to simplify checking of the return value
|
||||
# which is useful when ensuring a command fails as desired.
|
||||
# I.e., just doing `command ... &&fail=1` will not catch
|
||||
# a segfault in command for example. With this helper you
|
||||
# instead check an explicit exit code like
|
||||
# returns_ 1 command ... || fail
|
||||
returns_ () {
|
||||
# Disable tracing so it doesn't interfere with stderr of the wrapped command
|
||||
{ set +x; } 2>/dev/null
|
||||
|
||||
local exp_exit="$1"
|
||||
shift
|
||||
"$@"
|
||||
test $? -eq $exp_exit && ret_=0 || ret_=1
|
||||
|
||||
if test "$VERBOSE" = yes && test "$gl_set_x_corrupts_stderr_" = false; then
|
||||
set -x
|
||||
fi
|
||||
{ return $ret_; } 2>/dev/null
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Text file comparison
|
||||
|
||||
# Emit a header similar to that from diff -u; Print the simulated "diff"
|
||||
# command so that the order of arguments is clear. Don't bother with @@ lines.
|
||||
emit_diff_u_header_ ()
|
||||
{
|
||||
printf '%s\n' "diff -u $*" \
|
||||
"--- $1 1970-01-01" \
|
||||
"+++ $2 1970-01-01"
|
||||
}
|
||||
|
||||
# Arrange not to let diff or cmp operate on /dev/null,
|
||||
# since on some systems (at least OSF/1 5.1), that doesn't work.
|
||||
# When there are not two arguments, or no argument is /dev/null, return 2.
|
||||
# When one argument is /dev/null and the other is not empty,
|
||||
# cat the nonempty file to stderr and return 1.
|
||||
# Otherwise, return 0.
|
||||
compare_dev_null_ ()
|
||||
{
|
||||
test $# = 2 || return 2
|
||||
|
||||
if test "x$1" = x/dev/null; then
|
||||
test -s "$2" || return 0
|
||||
emit_diff_u_header_ "$@"; sed 's/^/+/' "$2"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if test "x$2" = x/dev/null; then
|
||||
test -s "$1" || return 0
|
||||
emit_diff_u_header_ "$@"; sed 's/^/-/' "$1"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 2
|
||||
}
|
||||
|
||||
for diff_opt_ in -u -U3 -c '' no; do
|
||||
test "$diff_opt_" != no &&
|
||||
diff_out_=`exec 2>/dev/null; diff $diff_opt_ "$0" "$0" < /dev/null` &&
|
||||
break
|
||||
done
|
||||
if test "$diff_opt_" != no; then
|
||||
if test -z "$diff_out_"; then
|
||||
compare_ () { diff $diff_opt_ "$@"; }
|
||||
else
|
||||
compare_ ()
|
||||
{
|
||||
# If no differences were found, AIX and HP-UX 'diff' produce output
|
||||
# like "No differences encountered". Hide this output.
|
||||
diff $diff_opt_ "$@" > diff.out
|
||||
diff_status_=$?
|
||||
test $diff_status_ -eq 0 || cat diff.out || diff_status_=2
|
||||
rm -f diff.out || diff_status_=2
|
||||
return $diff_status_
|
||||
}
|
||||
fi
|
||||
elif cmp -s /dev/null /dev/null 2>/dev/null; then
|
||||
compare_ () { cmp -s "$@"; }
|
||||
else
|
||||
compare_ () { cmp "$@"; }
|
||||
fi
|
||||
|
||||
# Usage: compare EXPECTED ACTUAL
|
||||
#
|
||||
# Given compare_dev_null_'s preprocessing, defer to compare_ if 2 or more.
|
||||
# Otherwise, propagate $? to caller: any diffs have already been printed.
|
||||
compare ()
|
||||
{
|
||||
# This looks like it can be factored to use a simple "case $?"
|
||||
# after unchecked compare_dev_null_ invocation, but that would
|
||||
# fail in a "set -e" environment.
|
||||
if compare_dev_null_ "$@"; then
|
||||
return 0
|
||||
else
|
||||
case $? in
|
||||
1) return 1;;
|
||||
*) compare_ "$@";;
|
||||
esac
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# If you want to override the testdir_prefix_ function,
|
||||
# or to add more utility functions, use this file.
|
||||
test -f "$srcdir/init.cfg" \
|
||||
&& . "$srcdir/init.cfg"
|
||||
|
||||
# =============================================================================
|
||||
# Set up the environment for the test to run in.
|
||||
|
||||
setup_ "$@"
|
||||
# This trap is here, rather than in the setup_ function, because some
|
||||
# shells run the exit trap at shell function exit, rather than script exit.
|
||||
trap remove_tmp_ 0
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Exercise -T.
|
||||
|
||||
# Copyright 2016-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2016-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Evoke a segfault in a hard-to-reach code path of kwset.c.
|
||||
# This bug affected grep versions 2.19 through 2.21.
|
||||
#
|
||||
# Copyright (C) 2015-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# grep-2.21 would incur a 100x penalty for 10x increase in regexp length
|
||||
|
||||
# Copyright 2015-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2015-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -24,6 +24,7 @@ fail=0
|
||||
# system load during the two test runs, so we'll mark it as
|
||||
# "expensive", making it less likely to be run by regular users.
|
||||
expensive_
|
||||
require_perl_
|
||||
|
||||
echo x > in || framework_failure_
|
||||
# Note that we want 10x the byte count (not line count) in the larger file.
|
||||
@ -31,8 +32,22 @@ seq 10000 50000 | tr -d '\012' > r || framework_failure_
|
||||
cat r r r r r r r r r r > re-10x || framework_failure_
|
||||
mv r re || framework_failure_
|
||||
|
||||
base_ms=$(user_time_ 1 grep -f re in ) || fail=1
|
||||
b10x_ms=$(user_time_ 1 grep -f re-10x in) || fail=1
|
||||
returns_ 0 user_time_ 1 grep -f re in > base-ms \
|
||||
|| framework_failure_ 'failed to compute baseline timing'
|
||||
base_ms=$(cat base-ms)
|
||||
|
||||
# This test caused trouble on at least two types of fringe hosts: those
|
||||
# with very little memory (a 1.5GB RAM Solaris host) and a Linux/s390x
|
||||
# (emulated with qemu-system-s390x). The former became unusable due to
|
||||
# mem requirements of the 2nd test, and the latter ended up taking >35x
|
||||
# more time than the base case. Skipping this test for any system using
|
||||
# more than this many milliseconds for the first case should avoid those
|
||||
# false-positive failures while skipping the test on few other systems.
|
||||
test 800 -lt "$base_ms" && skip_ "this base-case test took too long"
|
||||
|
||||
returns_ 0 user_time_ 1 grep -f re-10x in > b10x-ms \
|
||||
|| framework_failure_ 'failed to compute 10x timing'
|
||||
b10x_ms=$(cat b10x-ms)
|
||||
|
||||
# Increasing the length of the regular expression by a factor
|
||||
# of 10 should cause no more than a 10x increase in duration.
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# grep-3.4 would require O(N^2) RSS for N regexps
|
||||
# grep-3.5 requires O(N) in the most common cases.
|
||||
|
||||
# Copyright 2020-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2020-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -26,15 +26,16 @@ fail=0
|
||||
# system load during the two test runs, so we'll mark it as
|
||||
# "expensive", making it less likely to be run by regular users.
|
||||
expensive_
|
||||
require_perl_
|
||||
|
||||
# Make the quick/small input large enough so that even on high-end
|
||||
# systems this first invocation takes at least 10ms of user time.
|
||||
word_list=/usr/share/dict/linux.words
|
||||
|
||||
# If $word_list does not exist, generate an input that exibhits
|
||||
# If $word_list does not exist, generate an input that exhibits
|
||||
# similar performance characteristics.
|
||||
if ! test -f $word_list; then
|
||||
# Generate data comprable to that word list.
|
||||
# Generate data comparable to that word list.
|
||||
# Note how all "words" start with "a", and that there is
|
||||
# a small percentage of lines with at least one "." metachar.
|
||||
# This requires /dev/urandom, so if it's not present, skip
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# grep -F -x -o PAT print an extra newline for each match.
|
||||
# This would fail for grep-2.19 and grep-2.20.
|
||||
|
||||
# Copyright 2014-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -12,4 +12,20 @@ echo x > exp || framework_failure_
|
||||
yes x | timeout 10 grep -m1 x > out || fail=1
|
||||
compare exp out || fail=1
|
||||
|
||||
# Make sure -m2 stops reading even when output is /dev/null.
|
||||
# In grep 3.11, it would continue reading.
|
||||
printf 'x\nx\nx\n' >in || framework_failure
|
||||
(grep -m2 x >/dev/null && head -n1) <in >out || fail=1
|
||||
compare exp out || fail=1
|
||||
|
||||
# The following two tests would fail before v3.11-70
|
||||
echo x > in || framework_failure_
|
||||
echo in > exp || framework_failure_
|
||||
grep -l -m1 . in > out || fail=1
|
||||
compare exp out || fail=1
|
||||
|
||||
# Ensure that this prints nothing and exits successfully.
|
||||
grep -q -m1 . in > out || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
|
||||
Exit $fail
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Trigger a bug in the DFA matcher.
|
||||
# This would fail for grep-2.20.
|
||||
|
||||
# Copyright 2014-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# grep would sometimes read beyond end of input, when using a non-UTF8
|
||||
# multibyte locale.
|
||||
|
||||
# Copyright 2014-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Test for a performance regression with -Fw and a non-UTF8 multibyte locale.
|
||||
|
||||
# Copyright 2019-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2019-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -23,7 +23,7 @@ fail=0
|
||||
require_timeout_
|
||||
require_JP_EUC_locale_
|
||||
|
||||
yes 00 | head -10000000 > in || framework_failure_
|
||||
yes 00 | head -n 10000000 > in || framework_failure_
|
||||
|
||||
# Since we're using an absolute timeout below and want to avoid any initial
|
||||
# disk read performance penalty, run first with a large timeout and no error
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# The 200x is on an Intel i7-based system.
|
||||
# On an AMD FX-4100, it would take up to 2500x longer.
|
||||
|
||||
# Copyright 2014-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -26,6 +26,7 @@ fail=0
|
||||
# system load during the two test runs, so we'll mark it as
|
||||
# "expensive", making it less likely to be run by regular users.
|
||||
expensive_
|
||||
require_perl_
|
||||
|
||||
# Make the input large enough so that even on high-end systems
|
||||
# the unibyte test takes at least 10ms of user time.
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# grep -Fw could false-match when using a non-UTF8 multibyte locale.
|
||||
|
||||
# Copyright 2019-2021 Free Software Foundation, Inc.
|
||||
# Copyright 2019-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test whether \s matches SP and UTF-8 multi-byte white space characters.
|
||||
#
|
||||
# Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2013-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user