mirror of
https://https.git.savannah.gnu.org/git/grep.git
synced 2026-01-28 10:24:30 +00:00
Compare commits
125 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
071ac3aa76 | ||
|
|
c635f7dd92 | ||
|
|
37b95973aa | ||
|
|
22533e58ff | ||
|
|
e6d5e6809b | ||
|
|
07a3bb2b44 | ||
|
|
8185556858 | ||
|
|
275600f387 | ||
|
|
db5172dc2b | ||
|
|
1665c885f2 | ||
|
|
682f7f693d | ||
|
|
335fcd3f53 | ||
|
|
2e19d07ef1 | ||
|
|
b871c3e428 | ||
|
|
3f8c09ec19 | ||
|
|
bd7250ca36 | ||
|
|
f8bb8c519e | ||
|
|
082f068a5e | ||
|
|
dc292e8bb0 | ||
|
|
b1dee0f8b3 | ||
|
|
b197be563e | ||
|
|
52418599b3 | ||
|
|
a4628e58dd | ||
|
|
05f8c68183 | ||
|
|
2f5068b6ea | ||
|
|
50c4df64c1 | ||
|
|
6de7c9d48b | ||
|
|
ffb27fd225 | ||
|
|
920daa57a4 | ||
|
|
9863d53a5f | ||
|
|
ef595c086b | ||
|
|
5cc5251d5d | ||
|
|
3160603308 | ||
|
|
0747169015 | ||
|
|
006951de68 | ||
|
|
fc6aba9000 | ||
|
|
b1eaccd96d | ||
|
|
ba98ec78f5 | ||
|
|
ad030d9bbb | ||
|
|
6ee856200a | ||
|
|
19e301ad53 | ||
|
|
421b2993e2 | ||
|
|
aa203fdaa9 | ||
|
|
7ddaa55cab | ||
|
|
eaca869822 | ||
|
|
29a9b72db3 | ||
|
|
24deafb92f | ||
|
|
fce28c4a5e | ||
|
|
944c2eccc7 | ||
|
|
8fb15fb5bf | ||
|
|
288ea84c70 | ||
|
|
c89ce1cd48 | ||
|
|
37ed0f5621 | ||
|
|
cb83e12460 | ||
|
|
e7481a0939 | ||
|
|
08c4ce064b | ||
|
|
1c9e7544cf | ||
|
|
eda769be72 | ||
|
|
3612f5e218 | ||
|
|
37a1e07606 | ||
|
|
58d2475965 | ||
|
|
53b889155f | ||
|
|
1fa829d367 | ||
|
|
be9fcc2d2d | ||
|
|
b4dd3b00a5 | ||
|
|
3d900da3b5 | ||
|
|
b9a8047099 | ||
|
|
443961a929 | ||
|
|
e248db797a | ||
|
|
4810ea0838 | ||
|
|
102be2bfa5 | ||
|
|
dd8f04957c | ||
|
|
554e5b25fe | ||
|
|
052282642c | ||
|
|
f80b106d15 | ||
|
|
3e926715c8 | ||
|
|
1dbdcdc4c8 | ||
|
|
180e8dd674 | ||
|
|
13fd8279e5 | ||
|
|
d1c3fbe772 | ||
|
|
105e432d7f | ||
|
|
9e915da342 | ||
|
|
975378294a | ||
|
|
975ed119e9 | ||
|
|
7918c33702 | ||
|
|
ea3ec61613 | ||
|
|
481e6b4a3b | ||
|
|
68c7d2f51c | ||
|
|
6980733869 | ||
|
|
95553c0661 | ||
|
|
d59cbb36b9 | ||
|
|
f951840aa5 | ||
|
|
16f9ca8ed1 | ||
|
|
e43470dafc | ||
|
|
2ea9219797 | ||
|
|
c84a192000 | ||
|
|
fa4e6c8a77 | ||
|
|
8d3afeebcc | ||
|
|
c3259803fe | ||
|
|
7460d0f8b0 | ||
|
|
92585cde9b | ||
|
|
0f2c2c256f | ||
|
|
3bcc2d8900 | ||
|
|
6e7253de1d | ||
|
|
fd2d0f7165 | ||
|
|
3b15d73897 | ||
|
|
e4983bd587 | ||
|
|
85e0e4fdd4 | ||
|
|
88b2d37c0a | ||
|
|
19d2275fd1 | ||
|
|
488a115bfe | ||
|
|
c63a0950ff | ||
|
|
1d59f1b342 | ||
|
|
0119aa8df1 | ||
|
|
0035fb36eb | ||
|
|
86d7b53af1 | ||
|
|
30b80b654e | ||
|
|
15f1f50e20 | ||
|
|
98ee05b4dd | ||
|
|
99330c2b1d | ||
|
|
373b4434eb | ||
|
|
c83ffc197e | ||
|
|
7979ea7ddb | ||
|
|
3dc94feb2e | ||
|
|
9ef526a617 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -52,6 +52,7 @@
|
||||
/tests/cspatfile
|
||||
/tests/ere.script
|
||||
/tests/get-mb-cur-max
|
||||
/tests/init.sh
|
||||
/tests/khadafy.out
|
||||
/tests/patfile
|
||||
/tests/spencer1.script
|
||||
|
||||
@ -1 +1 @@
|
||||
3.8
|
||||
3.12
|
||||
|
||||
2
AUTHORS
2
AUTHORS
@ -1,4 +1,4 @@
|
||||
Copyright (C) 1992, 1997-2002, 2004-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1407,7 +1407,7 @@
|
||||
is put in different compiled structure patterns[]. The patterns
|
||||
are given to dfacomp() and kwsmusts() as is.
|
||||
(Ecompile): Likewised.
|
||||
(Fcompile): Reverse to the old behaviour of compiling the enire
|
||||
(Fcompile): Reverse to the old behaviour of compiling the entire
|
||||
patterns in one shot.
|
||||
(EGexecute): If falling to GNU regex for the matching, loop in the
|
||||
array of compile patterns[] to find a match.
|
||||
@ -1457,7 +1457,7 @@
|
||||
(xrealloc): Removed using lib/xmalloc.c.
|
||||
(xmalloc): Removed using lib/xmalloc.c
|
||||
(main): Register with atexit() to check for error on stdout.
|
||||
* configure.in: Check for atexit(), call jm_MALLOC, jm_RELLOC and
|
||||
* configure.in: Check for atexit(), call jm_MALLOC, jm_REALLOC and
|
||||
jm_PREREQ_ERROR.
|
||||
* tests/bre.awk: Removed the hack to drain the buffer since we
|
||||
always fclose(stdout) atexit.
|
||||
@ -1541,7 +1541,7 @@
|
||||
* src/exclude.h: New file.
|
||||
* src/grep.c (main): Took the GNU tar code to handle
|
||||
the option --include, --exclude, --exclude-from.
|
||||
Files are check for a match, with exlude_filename ().
|
||||
Files are check for a match, with exclude_filename ().
|
||||
New option --exclude-from.
|
||||
* src/savedir.c: Call exclude_filename() to check for
|
||||
file pattern exclusion or inclusion.
|
||||
@ -1592,7 +1592,7 @@
|
||||
|
||||
* m4/dosfile.m4 (AC_DOSFILE): Move AC_DEFINEs out of AC_CACHE_CHECK.
|
||||
|
||||
2001-02-17 Alain Malgoire
|
||||
2001-02-17 Alain Magloire
|
||||
|
||||
* doc/grep.texi: Document the new options and the new behaviour
|
||||
back-references are local. Use excerpt from Karl Berry regex
|
||||
@ -1699,8 +1699,8 @@
|
||||
(color): Rename color variable to color_option.
|
||||
Removed 'always|never|auto' arguments, not necessary for grep.
|
||||
(exclude_pattern): new variable, holder for the file pattern.
|
||||
(include_pattern): new variable, hoder for the file pattern.
|
||||
* src/savedir.c: Signature change, take two new argmuments.
|
||||
(include_pattern): new variable, holder for the file pattern.
|
||||
* src/savedir.c: Signature change, take two new arguments.
|
||||
* doc/grep.texi: Document, new options.
|
||||
* doc/grep.man: Document, new options.
|
||||
|
||||
@ -1712,7 +1712,7 @@
|
||||
|
||||
2001-02-09 Alain Magloire
|
||||
|
||||
Patch from Ulrich Drepper to provide hilighting.
|
||||
Patch from Ulrich Drepper to provide highlighting.
|
||||
|
||||
* src/grep.c: New option --color.
|
||||
(color): New static var.
|
||||
@ -1722,7 +1722,7 @@
|
||||
to find the offset of the matching string.
|
||||
* src/savedir.c: Take advantage of _DIRENT_HAVE_TYPE if supported.
|
||||
* src/search.c (EGexecute, Fexecute, Pexecute): Take a new argument
|
||||
when doing exact match for the color hiligting.
|
||||
when doing exact match for the color highlighting.
|
||||
|
||||
2000-09-01 Brian Youmans
|
||||
|
||||
@ -1792,7 +1792,7 @@
|
||||
|
||||
2000-06-02 Paul Eggert
|
||||
|
||||
Problen noted by Gerald Stoller <gerald_stoller@hotmail.com>
|
||||
Problem noted by Gerald Stoller <gerald_stoller@hotmail.com>
|
||||
|
||||
* src/grep.c (main): POSIX says that -q overrides -l, which
|
||||
in turn overrides the other output options. Fix grep to
|
||||
@ -2208,7 +2208,7 @@
|
||||
on pre-OpenVMS 7.x systems; general overhaul.
|
||||
* src/getpagesize.h: Reinstate support for different pagesizes on
|
||||
VAX and Alpha. Work around problem with DEC C compiler.
|
||||
* src/vms_fab.c: Cast to some assigments; fixed typo argcp vs. argp.
|
||||
* src/vms_fab.c: Cast to some assignments; fixed typo argcp vs. argp.
|
||||
* src/vms_fab.h: Added new include files to avoid warnings about
|
||||
undefined function prototypes.
|
||||
Those patches were provided by Martin P.J. Zinser (zinser@decus.de).
|
||||
@ -2670,7 +2670,7 @@
|
||||
|
||||
1999-03-16 Volker Borchert
|
||||
|
||||
* configure.in: Use case case ... esac for checking Visual C++.
|
||||
* configure.in: Use case ... esac for checking Visual C++.
|
||||
When ${CC} contains options it was not recognize.
|
||||
|
||||
1999-03-07 Paul Eggert
|
||||
@ -2764,7 +2764,7 @@
|
||||
|
||||
1999-02-10 Alain Magloire
|
||||
|
||||
* bootstrap/{Makefile{try,am},REAMDE} : skeleton
|
||||
* bootstrap/{Makefile{try,am},README} : skeleton
|
||||
provided for system lacking the tools to autoconfigure.
|
||||
|
||||
* src/{e,f,}grepmat.c: added guard [HAVE_CONFIG_H]
|
||||
@ -2858,7 +2858,7 @@
|
||||
* doc/Makefile.am djgpp/Makefile.am m4/Makefile.am vms/Makefile.am:
|
||||
New files.
|
||||
|
||||
* m4/progtest.m4: proctect '[]' from m4.
|
||||
* m4/progtest.m4: protect '[]' from m4.
|
||||
Noted by Eli Z.
|
||||
|
||||
* PATCHES-AC: New file, add the patch for autoconf in the dist.
|
||||
@ -3333,7 +3333,7 @@
|
||||
Suggested by Harald Hanche-Olsen.
|
||||
|
||||
* src/grep.c (main): '-f /dev/null' now specifies no patterns
|
||||
and therfore matches nothing.
|
||||
and therefore matches nothing.
|
||||
Reported by Jorge Stolfi.
|
||||
Patched by Paul Eggert.
|
||||
|
||||
@ -3368,7 +3368,7 @@
|
||||
* src/grep.c: reverse back to greping directories,
|
||||
One could skip the error message by defining
|
||||
SKIP_DIR_ERROR. There is no clear way of doing
|
||||
things, I hope to setle this on the next majore release
|
||||
things, I hope to settle this on the next major release
|
||||
Thanks Paul Eggert, Eli Zaretskii and gnits for the
|
||||
exchange.
|
||||
|
||||
@ -3427,7 +3427,7 @@
|
||||
(setmatcher) [HAVE_SETRLIMIT]: Set re_max_failures so that the
|
||||
matcher won't ever overflow the stack.
|
||||
(main) [__MSDOS__, _WIN32]: Handle backslashes and drive letters
|
||||
in argv[0], remove the .exe suffix, and downcase the prgram name.
|
||||
in argv[0], remove the .exe suffix, and downcase the program name.
|
||||
[O_BINARY]: Pass additional DOS-specific options to getopt_long
|
||||
and handle them. Call stat before attempting to open the file, in
|
||||
case it is a directory (DOS will fail the open call for
|
||||
@ -3497,7 +3497,7 @@
|
||||
regex package. Change the way the tests were done to be more
|
||||
conformant to automake.
|
||||
|
||||
* configure.in: added --disable-regex for folks with their own fuctions.
|
||||
* configure.in: added --disable-regex for folks with their own functions.
|
||||
|
||||
* grep-20d : available for testing
|
||||
|
||||
@ -3551,7 +3551,7 @@
|
||||
|
||||
* check.sh, scriptgen.awk: fix grep paths.
|
||||
|
||||
* change the directory strucure: grep is now in src to comply with
|
||||
* change the directory structure: grep is now in src to comply with
|
||||
gettext.m4.
|
||||
|
||||
* grep.c version.c [VERSION]: got rid of version.c,
|
||||
@ -3648,6 +3648,6 @@
|
||||
|
||||
* Version 2.0 released.
|
||||
|
||||
Copyright (C) 1998-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1998-2026 Free Software Foundation, Inc.
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted provided the copyright notice and this notice are preserved.
|
||||
|
||||
2
HACKING
2
HACKING
@ -597,7 +597,7 @@ Then just open the index.html file (in the generated lcov-html directory)
|
||||
in your favorite web browser.
|
||||
|
||||
========================================================================
|
||||
Copyright (C) 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
Permission is granted to copy, distribute and/or modify this document
|
||||
under the terms of the GNU Free Documentation License, Version 1.3 or
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Process this file with automake to create Makefile.in
|
||||
#
|
||||
# Copyright 1997-1998, 2005-2023 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -66,13 +66,10 @@ gen-ChangeLog:
|
||||
# current locale considers to be equal.
|
||||
ASSORT = LC_ALL=C sort
|
||||
|
||||
# Extract all lines up to the first one starting with "##".
|
||||
prologue = perl -ne '/^\#\#/ and exit; print' $(srcdir)/THANKS.in
|
||||
|
||||
THANKS: THANKS.in Makefile.am .mailmap thanks-gen
|
||||
$(AM_V_GEN)rm -f $@-t $@; \
|
||||
{ \
|
||||
$(prologue); echo; \
|
||||
perl -ne '/^\#\#/ and exit; print' $(srcdir)/THANKS.in; echo; \
|
||||
{ perl -ne '/^$$/.../^$$/ and !/^$$/ and s/ +/\0/ and print' \
|
||||
$(srcdir)/THANKS.in; \
|
||||
git log --pretty=format:'%aN%x00%aE' \
|
||||
|
||||
75
NEWS
75
NEWS
@ -1,5 +1,78 @@
|
||||
GNU grep NEWS -*- outline -*-
|
||||
|
||||
* Noteworthy changes in release ?.? (????-??-??) [?]
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.12 (2025-04-10) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
Searching a directory with at least 100,000 entries no longer fails
|
||||
with "Operation not supported" and exit status 2. Now, this prints 1
|
||||
and no diagnostic, as expected:
|
||||
$ mkdir t && cd t && seq 100000|xargs touch && grep -r x .; echo $?
|
||||
1
|
||||
[bug introduced in grep 3.11]
|
||||
|
||||
-mN where 1 < N no longer mistakenly lseeks to end of input merely
|
||||
because standard output is /dev/null.
|
||||
|
||||
** Changes in behavior
|
||||
|
||||
The --unix-byte-offsets (-u) option is gone. In grep-3.7 (2021-08-14)
|
||||
it became a warning-only no-op. Before then, it was a Windows-only no-op.
|
||||
|
||||
On Windows platforms and on AIX in 32-bit mode, grep in some cases
|
||||
now supports Unicode characters outside the Basic Multilingual Plane.
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.11 (2023-05-13) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
With -P, patterns like [\d] now work again. Fixing this has caused
|
||||
grep to revert to the behavior of grep 3.8, in that patterns like \w
|
||||
and \b go back to using ASCII rather than Unicode interpretations.
|
||||
However, future versions of GNU grep and/or PCRE2 are likely to fix
|
||||
this and change the behavior of \w and \b back to Unicode again,
|
||||
without breaking [\d] as 3.10 did.
|
||||
[bug introduced in grep 3.10]
|
||||
|
||||
grep no longer fails on files dated after the year 2038,
|
||||
when running on 32-bit x86 and ARM hosts using glibc 2.34+.
|
||||
[bug introduced in grep 3.9]
|
||||
|
||||
grep -P no longer fails to match patterns using negated classes
|
||||
like \D or \W when linked with PCRE2 10.34 or newer.
|
||||
[bug introduced in grep 3.8]
|
||||
|
||||
|
||||
** Changes in behavior
|
||||
|
||||
grep --version now prints a line describing the version of PCRE2 it uses.
|
||||
For example, it prints this when built with the very latest from git:
|
||||
grep -P uses PCRE2 10.43-DEV 2023-04-14
|
||||
or this with what's currently available in Fedora 37:
|
||||
grep -P uses PCRE2 10.40 2022-04-14
|
||||
|
||||
previous versions of grep wouldn't respect the user provided settings for
|
||||
PCRE_CFLAGS and PCRE_LIBS when building if a libpcre2-8 pkg-config module
|
||||
was found.
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.10 (2023-03-22) [stable]
|
||||
|
||||
** Bug fixes
|
||||
|
||||
With -P, \d now matches only ASCII digits, regardless of PCRE
|
||||
options/modes. The changes in grep-3.9 to make \b and \w work
|
||||
properly had the undesirable side effect of making \d also match
|
||||
e.g., the Arabic digits: ٠١٢٣٤٥٦٧٨٩. With grep-3.9, -P '\d+'
|
||||
would match that ten-digit (20-byte) string. Now, to match such
|
||||
a digit, you would use \p{Nd}. Similarly, \D is now mapped to [^0-9].
|
||||
[bug introduced in grep 3.9]
|
||||
|
||||
|
||||
* Noteworthy changes in release 3.9 (2023-03-05) [stable]
|
||||
|
||||
** Bug fixes
|
||||
@ -1338,7 +1411,7 @@ necessary to track the evolution of the regex package, and since
|
||||
I was changing it anyway I decided to do a general cleanup.
|
||||
|
||||
========================================================================
|
||||
Copyright (C) 1992, 1997-2002, 2004-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
2
README
2
README
@ -1,4 +1,4 @@
|
||||
Copyright (C) 1992, 1997-2002, 2004-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
Copyright (C) 1992, 1997-2002, 2004-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -101,7 +101,7 @@ each program. One way to do this is to use vc-dwim
|
||||
|
||||
-----
|
||||
|
||||
Copyright (C) 2002-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 2002-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -13,6 +13,7 @@ end of e.g., grep --help).
|
||||
Akim Demaille akim@epita.fr
|
||||
Andreas Schwab schwab@suse.de
|
||||
Andreas Ley andy@rz.uni-karlsruhe.de
|
||||
Anton Samokat samokat700@gmail.com
|
||||
Bastiaan "Darquan" Stougie darquan@zonnet.nl
|
||||
Ben Elliston bje@cygnus.com
|
||||
Bernd Strieder strieder@student.uni-kl.de
|
||||
@ -28,6 +29,7 @@ David J MacKenzie djm@catapult.va.pubnix.com
|
||||
David O'Brien obrien@freebsd.org
|
||||
'Drake' Daham Wang drakewang@gmail.com
|
||||
Egmont Koblinger egmont@gmail.com
|
||||
Emanuele Torre torreemanuele6@gmail.com
|
||||
Fernando Basso fernandobasso.br@gmail.com
|
||||
Florian La Roche laroche@redhat.com
|
||||
François Pinard pinard@iro.umontreal.ca
|
||||
@ -55,6 +57,7 @@ Karl Pettersson karl.pettersson@klpn.se
|
||||
Kaveh R. Ghazi ghazi@caip.rutgers.edu
|
||||
Kazuro Furukawa furukawa@apricot.kek.jp
|
||||
Keith Bostic bostic@bsdi.com
|
||||
Koen Claessen koen@chalmers.se
|
||||
Krishna Sethuraman krishna@sgihub.corp.sgi.com
|
||||
Kurt D Schwehr kdschweh@insci14.ucsd.edu
|
||||
Ludovic Courtès ludo@gnu.org
|
||||
@ -79,6 +82,7 @@ Rainer Orth ro@cebitec.uni-bielefeld.de
|
||||
Roland Roberts rroberts@muller.com
|
||||
Ruslan Ermilov ru@freebsd.org
|
||||
Santiago Vila sanvila@unex.es
|
||||
Sebastian Carlos sebaaa1754@gmail.com
|
||||
Shannon Hill hill@synnet.com
|
||||
Sotiris Vassilopoulos Sotiris.Vassilopoulos@betatech.gr
|
||||
Standish Parsley adsspamtrap01@yahoo.com
|
||||
|
||||
2
TODO
2
TODO
@ -1,6 +1,6 @@
|
||||
Things to do for GNU grep
|
||||
|
||||
Copyright (C) 1992, 1997-2002, 2004-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# Bootstrap configuration.
|
||||
|
||||
# Copyright (C) 2006-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2006-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -17,6 +17,9 @@
|
||||
|
||||
avoided_gnulib_modules='
|
||||
--avoid=lock-tests
|
||||
--avoid=mbuiter
|
||||
--avoid=mbuiterf
|
||||
--avoid=mbrlen-tests
|
||||
--avoid=mbrtowc-tests
|
||||
--avoid=update-copyright-tests
|
||||
'
|
||||
@ -28,11 +31,13 @@ argmatch
|
||||
assert-h
|
||||
c-ctype
|
||||
c-stack
|
||||
c-strcase
|
||||
c-strcasecmp
|
||||
c32isalnum
|
||||
c32rtomb
|
||||
closeout
|
||||
configmake
|
||||
dirname-lgpl
|
||||
dfa
|
||||
dirname-lgpl
|
||||
do-release-commit-and-tag
|
||||
error
|
||||
exclude
|
||||
@ -52,56 +57,55 @@ hash
|
||||
idx
|
||||
ignore-value
|
||||
intprops
|
||||
inttypes
|
||||
inttypes-h
|
||||
isatty
|
||||
isblank
|
||||
kwset
|
||||
largefile
|
||||
locale
|
||||
locale-h
|
||||
lseek
|
||||
maintainer-makefile
|
||||
malloc-gnu
|
||||
manywarnings
|
||||
mbrlen
|
||||
mbrtowc
|
||||
mbrtoc32-regular
|
||||
mbszero
|
||||
mcel-prefer
|
||||
memchr
|
||||
memchr2
|
||||
mempcpy
|
||||
minmax
|
||||
nullptr
|
||||
obstack
|
||||
openat-safer
|
||||
perl
|
||||
propername
|
||||
rawmemchr
|
||||
readme-release
|
||||
realloc-gnu
|
||||
realloc-posix
|
||||
regex
|
||||
safe-read
|
||||
same-inode
|
||||
ssize_t
|
||||
stdckdint
|
||||
stddef
|
||||
stdlib
|
||||
stdckdint-h
|
||||
stddef-h
|
||||
stdlib-h
|
||||
stpcpy
|
||||
strerror
|
||||
string
|
||||
string-h
|
||||
strstr
|
||||
strtoull
|
||||
strtoumax
|
||||
sys_stat
|
||||
unistd
|
||||
sys_stat-h
|
||||
unistd-h
|
||||
unlocked-io
|
||||
update-copyright
|
||||
useless-if-before-free
|
||||
verify
|
||||
version-etc-fsf
|
||||
wchar
|
||||
wcrtomb
|
||||
wctob
|
||||
wctype-h
|
||||
wchar-single
|
||||
windows-stat-inodes
|
||||
xalloc
|
||||
xbinary-io
|
||||
xstrtoimax
|
||||
year2038
|
||||
'
|
||||
gnulib_name=libgreputils
|
||||
|
||||
@ -129,12 +133,16 @@ gnulib_tool_option_extras="--tests-base=gnulib-tests --with-tests --symlink\
|
||||
buildreq="\
|
||||
autoconf 2.62
|
||||
automake 1.11.1
|
||||
autopoint -
|
||||
autopoint 0.19.2
|
||||
gettext -
|
||||
git 1.4.4
|
||||
gzip -
|
||||
m4 -
|
||||
makeinfo -
|
||||
tar -
|
||||
texi2pdf 6.1
|
||||
wget -
|
||||
xz -
|
||||
"
|
||||
|
||||
bootstrap_post_import_hook ()
|
||||
@ -142,22 +150,27 @@ bootstrap_post_import_hook ()
|
||||
# Automake requires that ChangeLog exist.
|
||||
touch ChangeLog || return 1
|
||||
|
||||
# Copy tests/init.sh from Gnulib.
|
||||
$gnulib_tool --copy-file tests/init.sh
|
||||
|
||||
# Copy pkg-config's pkg.m4 so that our downstream users don't need to.
|
||||
local ac_dir=`aclocal --print-ac-dir`
|
||||
test -s "$ac_dir/dirlist" && ac_dir=$ac_dir:`tr '\n' : < "$ac_dir/dirlist"`
|
||||
oIFS=$IFS
|
||||
IFS=:
|
||||
local found=false
|
||||
for dir in \
|
||||
$ACLOCAL_PATH $ac_dir /usr/share/aclocal ''
|
||||
do
|
||||
IFS=$oIFS
|
||||
if test -n "$dir" && test -r "$dir/pkg.m4"; then
|
||||
cp "$dir/pkg.m4" m4/pkg.m4
|
||||
return
|
||||
found=:
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$oIFS
|
||||
die 'Cannot find pkg.m4; perhaps you need to install pkg-config'
|
||||
$found || die 'Cannot find pkg.m4; perhaps you need to install pkg-config'
|
||||
}
|
||||
|
||||
bootstrap_epilogue()
|
||||
|
||||
12
cfg.mk
12
cfg.mk
@ -1,5 +1,5 @@
|
||||
# Customize maint.mk -*- makefile -*-
|
||||
# Copyright (C) 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -45,6 +45,12 @@ announcement_Cc_ = $(translation_project_), $(PACKAGE)-devel@gnu.org
|
||||
_gl_TS_unmarked_extern_functions = \
|
||||
main usage mb_clen to_uchar dfaerror dfawarn imbrlen
|
||||
|
||||
# Write base64-encoded (not hex) checksums into the announcement.
|
||||
announce_gen_args = --cksum-checksums
|
||||
|
||||
# Add an exemption for sc_makefile_at_at_check.
|
||||
_makefile_at_at_check_exceptions = ' && !/MAKEINFO/'
|
||||
|
||||
# Now that we have better tests, make this the default.
|
||||
export VERBOSE = yes
|
||||
|
||||
@ -68,7 +74,7 @@ export VERBOSE = yes
|
||||
# 1127556 9e
|
||||
export XZ_OPT = -6e
|
||||
|
||||
old_NEWS_hash = 5c1796327f2c4c7f40e10e58fd3a8019
|
||||
old_NEWS_hash = 3713245f672c3a9d1b455d6cc410c9ec
|
||||
|
||||
# We prefer to spell it back-reference, as POSIX does.
|
||||
sc_prohibit_backref:
|
||||
@ -182,3 +188,5 @@ sc_timeout_prereq:
|
||||
| $(GREP) . \
|
||||
&& { echo '$(ME): timeout without use of require_timeout_' \
|
||||
1>&2; exit 1; } || :
|
||||
|
||||
codespell_ignore_words_list = clen,allo,Nd,abd,alph,debbugs,wee,UE,ois,creche
|
||||
|
||||
33
configure.ac
33
configure.ac
@ -1,7 +1,7 @@
|
||||
dnl
|
||||
dnl autoconf input file for GNU grep
|
||||
dnl
|
||||
dnl Copyright (C) 1997-2006, 2009-2023 Free Software Foundation, Inc.
|
||||
dnl Copyright (C) 1997-2006, 2009-2026 Free Software Foundation, Inc.
|
||||
dnl
|
||||
dnl This file is part of GNU grep.
|
||||
dnl
|
||||
@ -51,16 +51,15 @@ AC_PROG_INSTALL
|
||||
AC_PROG_CC
|
||||
gl_EARLY
|
||||
AC_PROG_RANLIB
|
||||
PKG_PROG_PKG_CONFIG([0.9.0])
|
||||
PKG_PROG_PKG_CONFIG([0.9.0], [PKG_CONFIG=false])
|
||||
|
||||
# grep never invokes mbrtowc or mbrlen on empty input,
|
||||
# so don't worry about this common bug,
|
||||
# as working around it would merely slow grep down.
|
||||
gl_cv_func_mbrtowc_empty_input='assume yes'
|
||||
gl_cv_func_mbrlen_empty_input='assume yes'
|
||||
|
||||
dnl Checks for typedefs, structures, and compiler characteristics.
|
||||
AC_TYPE_SIZE_T
|
||||
AC_C_CONST
|
||||
gl_INIT
|
||||
|
||||
# Ensure VLAs are not used.
|
||||
@ -134,6 +133,7 @@ if test $gl_gcc_warnings != no; then
|
||||
for w in $ws; do
|
||||
gl_WARN_ADD([$w])
|
||||
done
|
||||
gl_WARN_ADD([-Wtrailing-whitespace]) # This project's coding style
|
||||
gl_WARN_ADD([-Wno-missing-field-initializers]) # We need this one
|
||||
gl_WARN_ADD([-Wno-sign-compare]) # Too many warnings for now
|
||||
gl_WARN_ADD([-Wno-unused-parameter]) # Too many warnings for now
|
||||
@ -166,14 +166,19 @@ if test $gl_gcc_warnings != no; then
|
||||
AC_SUBST([GNULIB_WARN_CFLAGS])
|
||||
|
||||
# For gnulib-tests, the set is slightly smaller still.
|
||||
nw=
|
||||
nw="$nw -Wstrict-prototypes"
|
||||
# It's not worth being this picky about test programs.
|
||||
nw="$nw -Wsuggest-attribute=const"
|
||||
nw="$nw -Wsuggest-attribute=pure"
|
||||
nw="$nw -Wsuggest-attribute=format"
|
||||
nw=
|
||||
nw="$nw -Wformat-truncation=2" # False alarm in strerror_r.c
|
||||
nw="$nw -Wmissing-declarations"
|
||||
nw="$nw -Wmissing-prototypes"
|
||||
nw="$nw -Wmissing-variable-declarations"
|
||||
nw="$nw -Wnull-dereference"
|
||||
nw="$nw -Wold-style-definition"
|
||||
nw="$nw -Wstrict-prototypes"
|
||||
nw="$nw -Wsuggest-attribute=cold"
|
||||
nw="$nw -Wsuggest-attribute=const"
|
||||
nw="$nw -Wsuggest-attribute=format"
|
||||
nw="$nw -Wsuggest-attribute=pure"
|
||||
|
||||
# Disable to avoid warnings in e.g., test-intprops.c and test-limits-h.c
|
||||
# due to overlong expansions like this:
|
||||
@ -193,13 +198,7 @@ AC_DEFINE([ARGMATCH_DIE], [usage (EXIT_FAILURE)],
|
||||
AC_DEFINE([ARGMATCH_DIE_DECL], [void usage (int _e)],
|
||||
[Define to the declaration of the xargmatch failure function.])
|
||||
|
||||
dnl Checks for header files.
|
||||
AC_HEADER_DIRENT
|
||||
|
||||
dnl Checks for functions.
|
||||
AC_FUNC_CLOSEDIR_VOID
|
||||
|
||||
AC_CHECK_FUNCS_ONCE([isascii setlocale])
|
||||
AC_CHECK_FUNCS_ONCE([setlocale])
|
||||
|
||||
dnl I18N feature
|
||||
AM_GNU_GETTEXT_VERSION([0.18.2])
|
||||
@ -212,6 +211,8 @@ dnl then the installer should configure --with-included-regex.
|
||||
AM_CONDITIONAL([USE_INCLUDED_REGEX], [test "$ac_use_included_regex" = yes])
|
||||
if test "$ac_use_included_regex" = no; then
|
||||
AC_MSG_WARN([Included lib/regex.c not used])
|
||||
else
|
||||
AC_DEFINE([USE_INCLUDED_REGEX], 1, [building with included regex code])
|
||||
fi
|
||||
|
||||
gl_FUNC_PCRE
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Process this file with automake to create Makefile.in
|
||||
# Makefile.am for grep/doc.
|
||||
#
|
||||
# Copyright 2008-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2008-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -16,6 +16,10 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# The customization variable CHECK_NORMAL_MENU_STRUCTURE is necessary with
|
||||
# makeinfo versions ≥ 6.8.
|
||||
MAKEINFO = @MAKEINFO@ -c CHECK_NORMAL_MENU_STRUCTURE=1
|
||||
|
||||
info_TEXINFOS = grep.texi
|
||||
grep_TEXINFOS = fdl.texi
|
||||
|
||||
|
||||
@ -5,8 +5,8 @@
|
||||
@c hence no sectioning command or @node.
|
||||
|
||||
@display
|
||||
Copyright @copyright{} 2000--2002, 2007--2008, 2023 Free Software Foundation,
|
||||
Inc.
|
||||
Copyright @copyright{} 2000--2002, 2007--2008, 2023--2026 Free Software
|
||||
Foundation, Inc.
|
||||
@uref{https://fsf.org/}
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
|
||||
274
doc/grep.in.1
274
doc/grep.in.1
@ -2,7 +2,7 @@
|
||||
.de dT
|
||||
.ds Dt \\$2
|
||||
..
|
||||
.dT Time-stamp: "2019-12-29"
|
||||
.dT Time-stamp: "2025-03-21"
|
||||
.\" Update the above date whenever a change to either this file or
|
||||
.\" grep.c's 'usage' function results in a nontrivial change to the man page.
|
||||
.\" In Emacs, you can update the date by running 'M-x time-stamp'
|
||||
@ -11,8 +11,10 @@
|
||||
.
|
||||
.TH GREP 1 \*(Dt "GNU grep @VERSION@" "User Commands"
|
||||
.
|
||||
.if !\w|\*(lq| \{\
|
||||
.\" groff an-old.tmac does not seem to be in use, so define lq and rq.
|
||||
.ie \n(.g .ds ' \(aq
|
||||
.el .ds ' '
|
||||
.if !\w@\*(lq@ \{\
|
||||
.\" The implementation lacks \*(lq and presumably \*(rq.
|
||||
. ie \n(.g \{\
|
||||
. ds lq \(lq\"
|
||||
. ds rq \(rq\"
|
||||
@ -23,114 +25,110 @@
|
||||
. \}
|
||||
.\}
|
||||
.
|
||||
.if !\w|\*(la| \{\
|
||||
.as mC
|
||||
.if !\w@\*(mC@ \{\
|
||||
.\" groff an-ext.tmac does not seem to be in use, so define the parts of
|
||||
.\" it that are used below. For a copy of groff an-ext.tmac, please see:
|
||||
.\" https://git.savannah.gnu.org/cgit/groff.git/plain/tmac/an-ext.tmac
|
||||
.\" --- Start of lines taken from groff an-ext.tmac
|
||||
.\" it that are used below, taken from groff 1.23.0. For a copy, please see:
|
||||
.\" https://git.savannah.gnu.org/cgit/groff.git/plain/tmac/an-ext.tmac?id=1.23.0
|
||||
.nr mG \n(.g-1
|
||||
.\" --- Start of lines taken from groff an-ext.tmac,
|
||||
.\" except with "nr mH 14" replaced by "nr mH 0"
|
||||
.\" and with mS, SY, YS definitions omitted.
|
||||
.
|
||||
.\" Check whether we are using grohtml.
|
||||
.nr mH 0
|
||||
.if \n(.g \
|
||||
. if '\*(.T'html' \
|
||||
. nr mH 1
|
||||
.\" Define this to your implementation's constant-width typeface.
|
||||
.ds mC CW
|
||||
.if n .ds mC R
|
||||
.
|
||||
.\" Save the automatic hyphenation mode.
|
||||
.\"
|
||||
.\" In AT&T troff, there was no register exposing the hyphenation mode,
|
||||
.\" and no way to save and restore it. Set `mH` to a reasonable value
|
||||
.\" for your implementation and preference.
|
||||
.de mY
|
||||
. ie !\\n(.g \
|
||||
. nr mH 0
|
||||
. el \
|
||||
. do nr mH \\n[.hy] \" groff extension register
|
||||
..
|
||||
.
|
||||
.nr mE 0 \" in an example (EX/EE)?
|
||||
.
|
||||
.\" Prepare link text for mail/web hyperlinks. `MT` and `UR` call this.
|
||||
.de mV
|
||||
. ds m1 \\$1\"
|
||||
..
|
||||
.
|
||||
.
|
||||
.\" Map mono-width fonts to standard fonts for groff's TTY device.
|
||||
.if n \{\
|
||||
. do ftr CR R
|
||||
. do ftr CI I
|
||||
. do ftr CB B
|
||||
.\}
|
||||
.\" Emit hyperlink. The optional argument supplies trailing punctuation
|
||||
.\" after link text. `ME` and `UE` call this.
|
||||
.de mQ
|
||||
. mY
|
||||
. nh
|
||||
<\\*(m1>\\$1
|
||||
. hy \\n(mH
|
||||
..
|
||||
.
|
||||
.\" groff has glyph entities for angle brackets.
|
||||
.ie \n(.g \{\
|
||||
. ds la \(la\"
|
||||
. ds ra \(ra\"
|
||||
.\}
|
||||
.el \{\
|
||||
. ds la <\"
|
||||
. ds ra >\"
|
||||
. \" groff's man macros control hyphenation with this register.
|
||||
. nr HY 1
|
||||
.\}
|
||||
.
|
||||
.\" Start URL.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de UR
|
||||
. ds m1 \\$1\"
|
||||
. nh
|
||||
. if \\n(mH \{\
|
||||
. \" Start diversion in a new environment.
|
||||
. do ev URL-div
|
||||
. do di URL-div
|
||||
. \}
|
||||
. mV \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" End URL.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de UE
|
||||
. ie \\n(mH \{\
|
||||
. br
|
||||
. di
|
||||
. ev
|
||||
.
|
||||
. \" Has there been one or more input lines for the link text?
|
||||
. ie \\n(dn \{\
|
||||
. do HTML-NS "<a href=""\\*(m1"">"
|
||||
. \" Yes, strip off final newline of diversion and emit it.
|
||||
. do chop URL-div
|
||||
. do URL-div
|
||||
\c
|
||||
. do HTML-NS </a>
|
||||
. \}
|
||||
. el \
|
||||
. do HTML-NS "<a href=""\\*(m1"">\\*(m1</a>"
|
||||
\&\\$*\"
|
||||
. \}
|
||||
. el \
|
||||
\\*(la\\*(m1\\*(ra\\$*\"
|
||||
.
|
||||
. hy \\n(HY
|
||||
. mQ \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" Start email address.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de MT
|
||||
. ds m1 \\$1\"
|
||||
. nh
|
||||
. if \\n(mH \{\
|
||||
. \" Start diversion in a new environment.
|
||||
. do ev URL-div
|
||||
. do di URL-div
|
||||
. \}
|
||||
. mV \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" End email address.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de ME
|
||||
. ie \\n(mH \{\
|
||||
. br
|
||||
. di
|
||||
. ev
|
||||
.
|
||||
. \" Has there been one or more input lines for the link text?
|
||||
. ie \\n(dn \{\
|
||||
. do HTML-NS "<a href=""mailto:\\*(m1"">"
|
||||
. \" Yes, strip off final newline of diversion and emit it.
|
||||
. do chop URL-div
|
||||
. do URL-div
|
||||
\c
|
||||
. do HTML-NS </a>
|
||||
. \}
|
||||
. el \
|
||||
. do HTML-NS "<a href=""mailto:\\*(m1"">\\*(m1</a>"
|
||||
\&\\$*\"
|
||||
. \}
|
||||
. el \
|
||||
\\*(la\\*(m1\\*(ra\\$*\"
|
||||
.
|
||||
. hy \\n(HY
|
||||
. mQ \\$1
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" Start example.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de EX
|
||||
. br
|
||||
. if !\\n(mE \{\
|
||||
. nr mF \\n(.f
|
||||
. nr mP \\n(PD
|
||||
. nr PD 1v
|
||||
. nf
|
||||
. ft \\*(mC
|
||||
. nr mE 1
|
||||
. \}
|
||||
..
|
||||
.\}
|
||||
.
|
||||
.
|
||||
.\" End example.
|
||||
.if \n(.g-\n(mG \{\
|
||||
.de EE
|
||||
. br
|
||||
. if \\n(mE \{\
|
||||
. ft \\n(mF
|
||||
. nr PD \\n(mP
|
||||
. fi
|
||||
. nr mE 0
|
||||
. \}
|
||||
..
|
||||
.\}
|
||||
.\" --- End of lines taken from groff an-ext.tmac
|
||||
.\}
|
||||
.
|
||||
@ -141,30 +139,33 @@ grep \- print lines that match patterns
|
||||
.
|
||||
.SH SYNOPSIS
|
||||
.B grep
|
||||
.RI [ OPTION .\|.\|.]\&
|
||||
.RI [ OPTION ].\|.\|.\&
|
||||
.I PATTERNS
|
||||
.RI [ FILE .\|.\|.]
|
||||
.RI [ FILE ].\|.\|.
|
||||
.br
|
||||
.B grep
|
||||
.RI [ OPTION .\|.\|.]\&
|
||||
.RI [ OPTION ].\|.\|.\&
|
||||
.B \-e
|
||||
.I PATTERNS
|
||||
\&.\|.\|.\&
|
||||
.RI [ FILE .\|.\|.]
|
||||
.RI [ FILE ].\|.\|.
|
||||
.br
|
||||
.B grep
|
||||
.RI [ OPTION .\|.\|.]\&
|
||||
.RI [ OPTION ].\|.\|.\&
|
||||
.B \-f
|
||||
.I PATTERN_FILE
|
||||
\&.\|.\|.\&
|
||||
.RI [ FILE .\|.\|.]
|
||||
.RI [ FILE ].\|.\|.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.B grep
|
||||
searches for
|
||||
.I PATTERNS
|
||||
in each
|
||||
searches for patterns in each
|
||||
.IR FILE .
|
||||
In the synopsis's first form, which is used if no
|
||||
.B \-e
|
||||
or
|
||||
.B \-f
|
||||
options are present, the first operand
|
||||
.I PATTERNS
|
||||
is one or more patterns separated by newline characters, and
|
||||
.B grep
|
||||
@ -244,6 +245,11 @@ If this option is used multiple times or is combined with the
|
||||
.RB ( \-\^\-regexp )
|
||||
option, search for all patterns given.
|
||||
The empty file contains zero patterns, and therefore matches nothing.
|
||||
If
|
||||
.I FILE
|
||||
is
|
||||
.B \-
|
||||
, read patterns from standard input.
|
||||
.TP
|
||||
.BR \-i ", " \-\^\-ignore\-case
|
||||
Ignore case distinctions in patterns and input data,
|
||||
@ -399,7 +405,7 @@ Display input actually coming from standard input as input coming from file
|
||||
This can be useful for commands that transform a file's contents
|
||||
before searching,
|
||||
e.g.,
|
||||
.BR "gzip \-cd foo.gz | grep \-\^\-label=foo \-H 'some pattern'" .
|
||||
.BR "gzip \-cd foo.gz | grep \-\^\-label=foo \-H \*'some pattern\*'" .
|
||||
See also the
|
||||
.B \-H
|
||||
option.
|
||||
@ -590,7 +596,7 @@ On the other hand, when reading files whose text encodings are
|
||||
unknown, it can be helpful to use
|
||||
.B \-a
|
||||
or to set
|
||||
.B LC_ALL='C'
|
||||
.B LC_ALL=\*'C\*'
|
||||
in the environment, in order to find more matches even if the matches
|
||||
are unsafe for direct display.
|
||||
.TP
|
||||
@ -669,7 +675,7 @@ whose base name matches
|
||||
Ignore any redundant trailing slashes in
|
||||
.IR GLOB .
|
||||
.TP
|
||||
.BR \-I
|
||||
.B \-I
|
||||
Process a binary file as if it did not contain matching data; this is
|
||||
equivalent to the
|
||||
.B \-\^\-binary\-files=without\-match
|
||||
@ -715,12 +721,12 @@ This can cause a performance penalty.
|
||||
.BR \-U ", " \-\^\-binary
|
||||
Treat the file(s) as binary.
|
||||
By default, under MS-DOS and MS-Windows,
|
||||
.BR grep
|
||||
.B grep
|
||||
guesses whether a file is text or binary as described for the
|
||||
.B \-\^\-binary\-files
|
||||
option.
|
||||
If
|
||||
.BR grep
|
||||
.B grep
|
||||
decides the file is a text file, it strips the CR characters from the
|
||||
original file contents (to make regular expressions with
|
||||
.B ^
|
||||
@ -744,7 +750,7 @@ Like the
|
||||
or
|
||||
.B \-\^\-null
|
||||
option, this option can be used with commands like
|
||||
.B sort -z
|
||||
.B "sort \-z"
|
||||
to process arbitrary file names.
|
||||
.
|
||||
.SH "REGULAR EXPRESSIONS"
|
||||
@ -756,13 +762,14 @@ expressions, by using various operators to combine smaller expressions.
|
||||
understands three different versions of regular expression syntax:
|
||||
\*(lqbasic\*(rq (BRE), \*(lqextended\*(rq (ERE) and \*(lqperl\*(rq (PCRE).
|
||||
In GNU
|
||||
.B grep
|
||||
there is no difference in available functionality between basic and
|
||||
extended syntax.
|
||||
In other implementations, basic regular expressions are less powerful.
|
||||
.BR grep ,
|
||||
basic and extended regular expressions are merely different notations
|
||||
for the same pattern-matching functionality.
|
||||
In other implementations, basic regular expressions are ordinarily
|
||||
less powerful than extended, though occasionally it is the other way around.
|
||||
The following description applies to extended regular expressions;
|
||||
differences for basic regular expressions are summarized afterwards.
|
||||
Perl-compatible regular expressions give additional functionality, and are
|
||||
Perl-compatible regular expressions have different functionality, and are
|
||||
documented in
|
||||
.BR pcre2syntax (3)
|
||||
and
|
||||
@ -802,19 +809,21 @@ matches any single digit.
|
||||
Within a bracket expression, a
|
||||
.I "range expression"
|
||||
consists of two characters separated by a hyphen.
|
||||
It matches any single character that sorts between the two characters,
|
||||
inclusive, using the locale's collating sequence and character set.
|
||||
For example, in the default C locale,
|
||||
In the default C locale, it matches any single character that appears
|
||||
between the two characters in ASCII order, inclusive.
|
||||
For example,
|
||||
.B [a\-d]
|
||||
is equivalent to
|
||||
.BR [abcd] .
|
||||
Many locales sort characters in dictionary order, and in these locales
|
||||
In other locales the behavior is unspecified:
|
||||
.B [a\-d]
|
||||
is typically not equivalent to
|
||||
.BR [abcd] ;
|
||||
it might be equivalent to
|
||||
.BR [aBbCcDd] ,
|
||||
for example.
|
||||
might be equivalent to
|
||||
.B [abcd]
|
||||
or
|
||||
.B [aBbCcDd]
|
||||
or some other bracket expression,
|
||||
or it might fail to match any character, or the set of
|
||||
characters that it matches might be erratic, or it might be invalid.
|
||||
To obtain the traditional interpretation of bracket expressions,
|
||||
you can use the C locale by setting the
|
||||
.B LC_ALL
|
||||
@ -954,7 +963,7 @@ In basic regular expressions the meta-characters
|
||||
.BR | ,
|
||||
.BR ( ,
|
||||
and
|
||||
.BR )
|
||||
.B )
|
||||
lose their special meaning; instead use the backslashed
|
||||
versions
|
||||
.BR \e? ,
|
||||
@ -1243,38 +1252,13 @@ front of the operand list and are treated as options.
|
||||
Also, POSIX requires that unrecognized options be diagnosed as
|
||||
\*(lqillegal\*(rq, but since they are not really against the law the default
|
||||
is to diagnose them as \*(lqinvalid\*(rq.
|
||||
.B POSIXLY_CORRECT
|
||||
also disables \fB_\fP\fIN\fP\fB_GNU_nonoption_argv_flags_\fP,
|
||||
described below.
|
||||
.TP
|
||||
\fB_\fP\fIN\fP\fB_GNU_nonoption_argv_flags_\fP
|
||||
(Here
|
||||
.I N
|
||||
is
|
||||
.BR grep 's
|
||||
numeric process ID.) If the
|
||||
.IR i th
|
||||
character of this environment variable's value is
|
||||
.BR 1 ,
|
||||
do not consider the
|
||||
.IR i th
|
||||
operand of
|
||||
.B grep
|
||||
to be an option, even if it appears to be one.
|
||||
A shell can put this variable in the environment for each command it runs,
|
||||
specifying which operands are the results of file name wildcard
|
||||
expansion and therefore should not be treated as options.
|
||||
This behavior is available only with the GNU C library, and only
|
||||
when
|
||||
.B POSIXLY_CORRECT
|
||||
is not set.
|
||||
.
|
||||
.SH NOTES
|
||||
This man page is maintained only fitfully;
|
||||
the full documentation is often more up-to-date.
|
||||
.
|
||||
.SH COPYRIGHT
|
||||
Copyright 1998-2000, 2002, 2005-2023 Free Software Foundation, Inc.
|
||||
Copyright 1998\(en2000, 2002, 2005\(en2026 Free Software Foundation, Inc.
|
||||
.PP
|
||||
This is free software;
|
||||
see the source for copying conditions.
|
||||
@ -1318,7 +1302,7 @@ contain \*(lqg\*(rq and end in \*(lq.h\*(rq.
|
||||
The
|
||||
.B \-n
|
||||
option outputs line numbers, the
|
||||
.B \-\-
|
||||
.B \-\^\-
|
||||
argument treats expansions of \*(lq*g*.h\*(rq starting with \*(lq\-\*(rq
|
||||
as file names not options,
|
||||
and the empty file /dev/null causes file names to be output
|
||||
@ -1326,7 +1310,7 @@ even if only one file name happens to be of the form \*(lq*g*.h\*(rq.
|
||||
.PP
|
||||
.in +2n
|
||||
.EX
|
||||
$ \fBgrep\fP \-n \-\- 'f.*\e.c$' *g*.h /dev/null
|
||||
$ \fBgrep\fP \-n \-\^\- \*'f.*\e.c$\*' *g*.h /dev/null
|
||||
argmatch.h:1:/* definitions and prototypes for argmatch.c
|
||||
.EE
|
||||
.in
|
||||
|
||||
171
doc/grep.texi
171
doc/grep.texi
@ -30,7 +30,7 @@
|
||||
@copying
|
||||
This manual is for @command{grep}, a pattern matching engine.
|
||||
|
||||
Copyright @copyright{} 1999--2002, 2005, 2008--2023 Free Software Foundation,
|
||||
Copyright @copyright{} 1999--2002, 2005, 2008--2026 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
@quotation
|
||||
@ -109,7 +109,7 @@ there is no way to match newline characters in a text.
|
||||
The general synopsis of the @command{grep} command line is
|
||||
|
||||
@example
|
||||
grep [@var{option}...] [@var{patterns}] [@var{file}...]
|
||||
grep [@var{option}]@dots{} [@var{patterns}] [@var{file}]@dots{}
|
||||
@end example
|
||||
|
||||
@noindent
|
||||
@ -202,6 +202,7 @@ in a shell command.
|
||||
Obtain patterns from @var{file}, one per line.
|
||||
If this option is used multiple times or is combined with the
|
||||
@option{-e} (@option{--regexp}) option, search for all patterns given.
|
||||
When @var{file} is @samp{-}, read patterns from standard input.
|
||||
The empty file contains zero patterns, and therefore matches nothing.
|
||||
(@option{-f} is specified by POSIX.)
|
||||
|
||||
@ -217,13 +218,25 @@ so that characters that differ only in case
|
||||
match each other. Although this is straightforward when letters
|
||||
differ in case only via lowercase-uppercase pairs, the behavior is
|
||||
unspecified in other situations. For example, uppercase ``S'' has an
|
||||
unusual lowercase counterpart ``ſ'' (Unicode character U+017F, LATIN
|
||||
unusual lowercase counterpart
|
||||
@c This does not display correctly in PDF with texinfo 7.1
|
||||
@c and pdfTeX 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40).
|
||||
@ifnottex
|
||||
``ſ''
|
||||
@end ifnottex
|
||||
(Unicode character U+017F LATIN
|
||||
SMALL LETTER LONG S) in many locales, and it is unspecified whether
|
||||
this unusual character matches ``S'' or ``s'' even though uppercasing
|
||||
it yields ``S''. Another example: the lowercase German letter ``ß''
|
||||
(U+00DF, LATIN SMALL LETTER SHARP S) is normally capitalized as the
|
||||
(U+00DF LATIN SMALL LETTER SHARP S) is normally capitalized as the
|
||||
two-character string ``SS'' but it does not match ``SS'', and it might
|
||||
not match the uppercase letter ``ẞ'' (U+1E9E, LATIN CAPITAL LETTER
|
||||
not match the uppercase letter
|
||||
@c This does not display correctly in PDF with texinfo 7.1
|
||||
@c and pdfTeX 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40).
|
||||
@ifnottex
|
||||
``ẞ''
|
||||
@end ifnottex
|
||||
(U+1E9E LATIN CAPITAL LETTER
|
||||
SHARP S) even though lowercasing the latter yields the former.
|
||||
|
||||
@option{-y} is an obsolete synonym that is provided for compatibility.
|
||||
@ -548,7 +561,7 @@ Print @var{num} lines of leading and trailing output context.
|
||||
@opindex --group-separator
|
||||
@cindex group separator
|
||||
When @option{-A}, @option{-B} or @option{-C} are in use,
|
||||
print @var{string} instead of @option{--} between groups of lines.
|
||||
print @var{string} instead of @samp{--} between groups of lines.
|
||||
|
||||
@item --no-group-separator
|
||||
@opindex --group-separator
|
||||
@ -624,9 +637,11 @@ When some output is suppressed, @command{grep} follows any output
|
||||
with a message to standard error saying that a binary file matches.
|
||||
|
||||
If @var{type} is @samp{without-match},
|
||||
when @command{grep} discovers null input binary data
|
||||
it assumes that the rest of the file does not match;
|
||||
when @command{grep} discovers null binary data in an input file
|
||||
it assumes that any unprocessed input does not match;
|
||||
this is equivalent to the @option{-I} option.
|
||||
In this case the region of unprocessed input starts no later than the
|
||||
null binary data, and continues to end of file.
|
||||
|
||||
If @var{type} is @samp{text},
|
||||
@command{grep} processes binary data as if it were text;
|
||||
@ -642,6 +657,16 @@ is not matched when @var{type} is @samp{text}. Conversely, when
|
||||
@var{type} is @samp{binary} the pattern @samp{.} (period) might not
|
||||
match a null byte.
|
||||
|
||||
The heuristic that @command{grep} uses to intuit whether input is
|
||||
binary is specific to @command{grep} and may well be unsuitable for
|
||||
other applications, as it depends on command-line options, on locale,
|
||||
and on hardware and operating system characteristics such as system
|
||||
page size and input buffering. For example, if the input consists of
|
||||
a matching text line followed by nonmatching data that contains a null
|
||||
byte, @command{grep} might either output the matching line or treat
|
||||
the file as binary, depending on whether the unprocessed input happens
|
||||
to include the matching text line.
|
||||
|
||||
@emph{Warning:} The @option{-a} (@option{--binary-files=text}) option
|
||||
might output binary garbage, which can have nasty side effects if the
|
||||
output is a terminal and if the terminal driver interprets some of it
|
||||
@ -695,7 +720,7 @@ immediately after a slash (@samp{/}) in the name.
|
||||
When searching recursively, skip any subfile whose base
|
||||
name matches @var{glob}; the base name is the part after the last
|
||||
slash. A pattern can use
|
||||
@samp{*}, @samp{?}, and @samp{[}...@samp{]} as wildcards,
|
||||
@samp{*}, @samp{?}, and @samp{[}@dots{}@samp{]} as wildcards,
|
||||
and @code{\} to quote a wildcard or backslash character literally.
|
||||
|
||||
@item --exclude-from=@var{file}
|
||||
@ -763,10 +788,10 @@ directory, recursively, following all symbolic links.
|
||||
@item --
|
||||
@opindex --
|
||||
@cindex option delimiter
|
||||
Delimit the option list. Later arguments, if any, are treated as
|
||||
operands even if they begin with @samp{-}. For example, @samp{grep PAT --
|
||||
-file1 file2} searches for the pattern PAT in the files named @file{-file1}
|
||||
and @file{file2}.
|
||||
Delimit the option list. Any later argument is not treated as an
|
||||
option even if it begins with @samp{-}. For example,
|
||||
@samp{grep -- -PAT -file1 file2} searches for the pattern @samp{-PAT}
|
||||
in the files named @file{-file1} and @file{file2}.
|
||||
|
||||
@item --line-buffered
|
||||
@opindex --line-buffered
|
||||
@ -884,7 +909,7 @@ for permitted values and their meanings as character attributes.)
|
||||
These substring values are integers in decimal representation
|
||||
and can be concatenated with semicolons.
|
||||
@command{grep} takes care of assembling the result
|
||||
into a complete SGR sequence (@samp{\33[}...@samp{m}).
|
||||
into a complete SGR sequence (@samp{\33[}@dots{}@samp{m}).
|
||||
Common values to concatenate include
|
||||
@samp{1} for bold,
|
||||
@samp{4} for underline,
|
||||
@ -997,7 +1022,7 @@ or when EL is too slow or causes too much flicker.
|
||||
The default is false (i.e., the capability is omitted).
|
||||
@end table
|
||||
|
||||
Note that boolean capabilities have no @samp{=}... part.
|
||||
Note that boolean capabilities have no @samp{=}@dots{} part.
|
||||
They are omitted (i.e., false) by default and become true when specified.
|
||||
|
||||
|
||||
@ -1060,18 +1085,6 @@ and are treated as options.
|
||||
This variable specifies the output terminal type, which can affect
|
||||
what the @option{--color} option does. @xref{General Output Control}.
|
||||
|
||||
@item _@var{N}_GNU_nonoption_argv_flags_
|
||||
@vindex _@var{N}_GNU_nonoption_argv_flags_ @r{environment variable}
|
||||
(Here @code{@var{N}} is @command{grep}'s numeric process ID.)
|
||||
If the @var{i}th character of this environment variable's value is @samp{1},
|
||||
do not consider the @var{i}th operand of @command{grep} to be an option,
|
||||
even if it appears to be one.
|
||||
A shell can put this variable in the environment for each command it runs,
|
||||
specifying which operands are the results of file name wildcard expansion
|
||||
and therefore should not be treated as options.
|
||||
This behavior is available only with the GNU C library,
|
||||
and only when @env{POSIXLY_CORRECT} is not set.
|
||||
|
||||
@end table
|
||||
|
||||
The @env{GREP_OPTIONS} environment variable of @command{grep} 2.20 and
|
||||
@ -1148,10 +1161,74 @@ Interpret patterns as fixed strings, not regular expressions.
|
||||
@opindex --perl-regexp
|
||||
@cindex matching Perl-compatible regular expressions
|
||||
Interpret patterns as Perl-compatible regular expressions (PCREs).
|
||||
PCRE support is here to stay, but consider this option experimental when
|
||||
combined with the @option{-z} (@option{--null-data}) option, and note that
|
||||
@samp{grep@ -P} may warn of unimplemented features.
|
||||
@xref{Other Options}.
|
||||
|
||||
For documentation, refer to @url{https://www.pcre.org/}, with these caveats:
|
||||
@itemize
|
||||
@item
|
||||
In a UTF-8 locale, Perl treats data as UTF-8 only under certain
|
||||
conditions, e.g., if @command{perl} is invoked with the @option{-C}
|
||||
option or the @env{PERL_UNICODE} environment variable set appropriately.
|
||||
Similarly, @command{pcre2grep} treats data as UTF-8 only if
|
||||
invoked with @option{-u} or @option{-U}.
|
||||
In contrast, in a UTF-8 locale @command{grep} and @command{git grep}
|
||||
always treat data as UTF-8.
|
||||
|
||||
@item
|
||||
In Perl and @command{git grep -P}, @samp{\d} matches all Unicode digits,
|
||||
even if they are not ASCII.
|
||||
For example, @samp{\d} matches
|
||||
@ifnottex
|
||||
``٣''
|
||||
@end ifnottex
|
||||
(U+0663 ARABIC-INDIC DIGIT THREE).
|
||||
In contrast, in @samp{grep -P}, @samp{\d} matches only
|
||||
the ten ASCII digits, regardless of locale.
|
||||
In @command{pcre2grep}, @samp{\d} ordinarily behaves like Perl and
|
||||
@command{git grep -P}, but when given the @option{--posix-digit} option
|
||||
it behaves like @samp{grep -P}.
|
||||
(On all platforms, @samp{\D} matches the complement of @samp{\d}.)
|
||||
|
||||
@item
|
||||
The pattern @samp{[[:digit:]]} matches all Unicode digits
|
||||
in Perl, @samp{grep -P}, @command{git grep -P}, and @command{pcre2grep},
|
||||
so you can use it
|
||||
to get the effect of Perl's @samp{\d} on all these platforms.
|
||||
In other words, in Perl and @command{git grep -P},
|
||||
@samp{\d} is equivalent to @samp{[[:digit:]]},
|
||||
whereas in @samp{grep -P}, @samp{\d} is equivalent to @samp{[0-9]},
|
||||
and @command{pcre2grep} ordinarily follows Perl but
|
||||
when given @option{--posix-digit} it follows @samp{grep -P}.
|
||||
|
||||
(On all these platforms, @samp{[[:digit:]]} is equivalent to @samp{\p@{Nd@}}
|
||||
and to @samp{\p@{General_Category: Decimal_Number@}}.)
|
||||
|
||||
@item
|
||||
If @command{grep} is built with PCRE2 version 10.43 (2024) or later,
|
||||
@samp{(?aD)} causes @samp{\d} to behave like @samp{[0-9]} and
|
||||
@samp{(?-aD)} causes it to behave like @samp{[[:digit:]]}.
|
||||
|
||||
@item
|
||||
Although PCRE tracks the syntax and semantics of Perl's regular
|
||||
expressions, the match is not always exact. Perl
|
||||
evolves and a Perl installation may predate or postdate the PCRE2
|
||||
installation on the same host, or their Unicode versions may differ,
|
||||
or Perl and PCRE2 may disagree about an obscure construct.
|
||||
|
||||
@item
|
||||
By default, @command{grep} applies each regexp to a line at a time,
|
||||
so the @samp{(?s)} directive (making @samp{.} match line breaks)
|
||||
is generally ineffective.
|
||||
However, with @option{-z} (@option{--null-data}) it can work:
|
||||
@example
|
||||
$ printf 'a\nb\n' |grep -zP '(?s)a.b'
|
||||
a
|
||||
b
|
||||
@end example
|
||||
But beware: with the @option{-z} (@option{--null-data}) and a file
|
||||
containing no NUL byte, grep must read the entire file into memory
|
||||
before processing any of it.
|
||||
Thus, it will exhaust memory and fail for some large files.
|
||||
@end itemize
|
||||
|
||||
@end table
|
||||
|
||||
@ -1167,12 +1244,13 @@ by using various operators to combine smaller expressions.
|
||||
three different versions of regular expression syntax:
|
||||
basic (BRE), extended (ERE), and Perl-compatible (PCRE).
|
||||
In GNU @command{grep},
|
||||
there is no difference in available functionality between basic and
|
||||
extended syntax.
|
||||
In other implementations, basic regular expressions are less powerful.
|
||||
basic and extended regular expressions are merely different notations
|
||||
for the same pattern-matching functionality.
|
||||
In other implementations, basic regular expressions are ordinarily
|
||||
less powerful than extended, though occasionally it is the other way around.
|
||||
The following description applies to extended regular expressions;
|
||||
differences for basic regular expressions are summarized afterwards.
|
||||
Perl-compatible regular expressions give additional functionality, and
|
||||
Perl-compatible regular expressions have different functionality, and
|
||||
are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual
|
||||
pages, but work only if PCRE is available in the system.
|
||||
|
||||
@ -1297,13 +1375,13 @@ encoding error.
|
||||
@cindex range expression
|
||||
Within a bracket expression, a @dfn{range expression} consists of two
|
||||
characters separated by a hyphen.
|
||||
It matches any single character that
|
||||
sorts between the two characters, inclusive.
|
||||
In the default C locale, the sorting sequence is the native character
|
||||
order; for example, @samp{[a-d]} is equivalent to @samp{[abcd]}.
|
||||
In other locales, the sorting sequence is not specified, and
|
||||
@samp{[a-d]} might be equivalent to @samp{[abcd]} or to
|
||||
@samp{[aBbCcDd]}, or it might fail to match any character, or the set of
|
||||
In the default C locale, it matches any single character that appears
|
||||
between the two characters in ASCII order, inclusive.
|
||||
For example, @samp{[a-d]} is equivalent to @samp{[abcd]}.
|
||||
In other locales the behavior is unspecified:
|
||||
@samp{[a-d]} might be equivalent to
|
||||
@samp{[abcd]} or @samp{[aBbCcDd]} or some other bracket expression,
|
||||
or it might fail to match any character, or the set of
|
||||
characters that it matches might be erratic, or it might be invalid.
|
||||
To obtain the traditional interpretation
|
||||
of bracket expressions, you can use the @samp{C} locale by setting the
|
||||
@ -1706,8 +1784,9 @@ A range expression outside the POSIX locale. For example, in some
|
||||
locales @samp{[a-z]} might match some characters that are not
|
||||
lowercase letters, or might not match some lowercase letters, or might
|
||||
be invalid. With GNU @command{grep} it is not documented whether
|
||||
these range expressions use native code points, or use the collating
|
||||
sequence specified by the @env{LC_COLLATE} category, or have some
|
||||
these range expressions use native code points, or use the collation
|
||||
sequence specified by the @env{LC_COLLATE} category, or use the
|
||||
collation ordering used by @command{sort} and @code{strcoll}, or have some
|
||||
other interpretation. Outside the POSIX locale, it is portable to use
|
||||
@samp{[[:lower:]]} to match a lower-case letter, or
|
||||
@samp{[abcdefghijklmnopqrstuvwxyz]} to match an ASCII lower-case
|
||||
@ -1975,7 +2054,7 @@ Note that on some platforms,
|
||||
except the available memory.
|
||||
|
||||
@item
|
||||
Why does @command{grep} report ``Binary file matches''?
|
||||
Why does @command{grep} report ``binary file matches''?
|
||||
|
||||
If @command{grep} listed all matching ``lines'' from a binary file, it
|
||||
would probably generate output that is not useful, and it might even
|
||||
@ -2109,7 +2188,7 @@ removed from POSIX in 2001, deprecated by GNU Grep 2.5.3 in 2007,
|
||||
and changed to issue obsolescence warnings by GNU Grep 3.8 in 2022;
|
||||
eventually, they are planned to be removed entirely.
|
||||
|
||||
If you prefer the old names, you can use use your own substitutes,
|
||||
If you prefer the old names, you can use your own substitutes,
|
||||
such as a shell script named @command{egrep} with the following
|
||||
contents:
|
||||
|
||||
|
||||
2
gnulib
2
gnulib
@ -1 +1 @@
|
||||
Subproject commit 2ba7c75ed134b73c857ca5575d8803313a50db60
|
||||
Subproject commit 4f6ac2c3c689cd7312b5f9da97791b14bbc2ee53
|
||||
@ -1,4 +1,3 @@
|
||||
AM_CFLAGS =
|
||||
include gnulib.mk
|
||||
AM_CFLAGS = $(GNULIB_TEST_WARN_CFLAGS) $(WERROR_CFLAGS)
|
||||
|
||||
AM_CFLAGS += $(GNULIB_TEST_WARN_CFLAGS) $(WERROR_CFLAGS)
|
||||
include gnulib.mk
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright 1997-1998, 2005-2023 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Output colorization.
|
||||
Copyright 2011-2023 Free Software Foundation, Inc.
|
||||
Copyright 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Without this pragma, gcc 4.7.0 20120102 suggests that the
|
||||
init_colorize function might be candidate for attribute 'const' */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Output colorization on MS-Windows.
|
||||
Copyright 2011-2023 Free Software Foundation, Inc.
|
||||
Copyright 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Eli Zaretskii. */
|
||||
|
||||
@ -96,7 +94,7 @@ w32_sgr2attr (const char *sgr_seq)
|
||||
{
|
||||
if (*p == ';' || *p == '\0')
|
||||
{
|
||||
code = strtol (s, NULL, 10);
|
||||
code = strtol (s, nullptr, 10);
|
||||
s = p + (*p != '\0');
|
||||
|
||||
switch (code)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* Output colorization.
|
||||
|
||||
Copyright 2011-2023 Free Software Foundation, Inc.
|
||||
Copyright 2011-2026 Free Software Foundation, Inc.
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
extern int should_colorize (void);
|
||||
extern void init_colorize (void);
|
||||
|
||||
41
m4/pcre.m4
41
m4/pcre.m4
@ -1,6 +1,6 @@
|
||||
# pcre.m4 - check for PCRE library support
|
||||
|
||||
# Copyright (C) 2010-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2010-2026 Free Software Foundation, Inc.
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
@ -21,21 +21,38 @@ AC_DEFUN([gl_FUNC_PCRE],
|
||||
use_pcre=no
|
||||
|
||||
if test $test_pcre != no; then
|
||||
PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}])
|
||||
|
||||
AS_CASE([${PCRE_CFLAGS+set}@${PCRE_LIBS+set}@$PKG_CONFIG],
|
||||
[@@false], [],
|
||||
[@@*], [PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [:])])
|
||||
|
||||
AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile],
|
||||
[pcre_saved_CFLAGS=$CFLAGS
|
||||
pcre_saved_LIBS=$LIBS
|
||||
CFLAGS="$CFLAGS $PCRE_CFLAGS"
|
||||
LIBS="$PCRE_LIBS $LIBS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
]],
|
||||
[[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0);
|
||||
return !p;]])],
|
||||
[pcre_cv_have_pcre2_compile=yes],
|
||||
[pcre_cv_have_pcre2_compile=no])
|
||||
pcre_cv_have_pcre2_compile=no
|
||||
|
||||
while
|
||||
CFLAGS="$pcre_saved_CFLAGS $PCRE_CFLAGS"
|
||||
LIBS="$pcre_saved_LIBS $PCRE_LIBS"
|
||||
AC_LINK_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
]],
|
||||
[[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0);
|
||||
return !p;]])],
|
||||
[pcre_cv_have_pcre2_compile=yes])
|
||||
test $pcre_cv_have_pcre2_compile = no
|
||||
do
|
||||
AS_CASE([$PCRE_CFLAGS@$PCRE_LIBS],
|
||||
[@-lpcre2-8],
|
||||
[# Even the fallback setting fails; give up.
|
||||
PCRE_LIBS=
|
||||
break])
|
||||
# Fallback setting.
|
||||
PCRE_CFLAGS=
|
||||
PCRE_LIBS=-lpcre2-8
|
||||
done
|
||||
|
||||
CFLAGS=$pcre_saved_CFLAGS
|
||||
LIBS=$pcre_saved_LIBS])
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# List of files which containing translatable strings.
|
||||
#
|
||||
# Copyright 1997-1998, 2005-2023 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
## Process this file with automake to create Makefile.in
|
||||
# Copyright 1997-1998, 2005-2023 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -28,13 +28,12 @@ grep_SOURCES = \
|
||||
die.h \
|
||||
grep.c \
|
||||
kwsearch.c \
|
||||
kwset.c \
|
||||
searchutils.c
|
||||
if USE_PCRE
|
||||
grep_SOURCES += pcresearch.c
|
||||
endif
|
||||
|
||||
noinst_HEADERS = grep.h kwset.h search.h system.h
|
||||
noinst_HEADERS = grep.h search.h system.h
|
||||
|
||||
# Sometimes, the expansion of $(LIBINTL) includes -lc which may
|
||||
# include modules defining variables like 'optind', so libgreputils.a
|
||||
@ -42,7 +41,9 @@ noinst_HEADERS = grep.h kwset.h search.h system.h
|
||||
# But libgreputils.a must also follow $(LIBINTL), since libintl uses
|
||||
# replacement functions defined in libgreputils.a.
|
||||
LDADD = \
|
||||
../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a $(LIBICONV) \
|
||||
../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a \
|
||||
$(HARD_LOCALE_LIB) $(LIBC32CONV) \
|
||||
$(LIBSIGSEGV) $(LIBUNISTRING) $(MBRTOWC_LIB) $(SETLOCALE_NULL_LIB) \
|
||||
$(LIBTHREAD)
|
||||
|
||||
grep_LDADD = $(LDADD) $(PCRE_LIBS) $(LIBCSTACK)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* dfasearch.c - searching subroutines using dfa and regex for grep.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,15 +12,13 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written August 1992 by Mike Haertel. */
|
||||
|
||||
#include <config.h>
|
||||
#include "intprops.h"
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
#include "die.h"
|
||||
#include <error.h>
|
||||
|
||||
@ -145,15 +143,15 @@ regex_compile (struct dfa_comp *dc, char const *p, idx_t len,
|
||||
bool syntax_only)
|
||||
{
|
||||
struct re_pattern_buffer pat;
|
||||
pat.buffer = NULL;
|
||||
pat.buffer = nullptr;
|
||||
pat.allocated = 0;
|
||||
|
||||
/* Do not use a fastmap with -i, to work around glibc Bug#20381. */
|
||||
static_assert (UCHAR_MAX < IDX_MAX);
|
||||
idx_t uchar_max = UCHAR_MAX;
|
||||
pat.fastmap = (syntax_only | match_icase) ? NULL : ximalloc (uchar_max + 1);
|
||||
pat.fastmap = syntax_only | match_icase ? nullptr : ximalloc (uchar_max + 1);
|
||||
|
||||
pat.translate = NULL;
|
||||
pat.translate = nullptr;
|
||||
|
||||
if (syntax_only)
|
||||
re_set_syntax (syntax_bits | RE_NO_SUB);
|
||||
@ -227,7 +225,7 @@ GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits,
|
||||
char const *prev = pattern;
|
||||
|
||||
/* Buffer containing back-reference-free patterns. */
|
||||
char *buf = NULL;
|
||||
char *buf = nullptr;
|
||||
idx_t buflen = 0;
|
||||
idx_t bufalloc = 0;
|
||||
|
||||
@ -324,13 +322,13 @@ GEAcompile (char *pattern, idx_t size, reg_syntax_t syntax_bits,
|
||||
size = total;
|
||||
}
|
||||
else
|
||||
motif = NULL;
|
||||
motif = nullptr;
|
||||
|
||||
dfaparse (pattern, size, dc->dfa);
|
||||
kwsmusts (dc);
|
||||
dfacomp (NULL, 0, dc->dfa, 1);
|
||||
dfacomp (nullptr, 0, dc->dfa, 1);
|
||||
|
||||
if (buf != NULL)
|
||||
if (buf)
|
||||
{
|
||||
if (exact || !dfasupported (dc->dfa))
|
||||
{
|
||||
@ -424,7 +422,7 @@ EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
|
||||
goto success;
|
||||
if (mb_start < beg)
|
||||
mb_start = beg;
|
||||
if (mb_goback (&mb_start, NULL, match, buflim) == 0)
|
||||
if (mb_goback (&mb_start, nullptr, match, buflim) == 0)
|
||||
goto success;
|
||||
/* The matched line starts in the middle of a multibyte
|
||||
character. Perform the DFA search starting from the
|
||||
@ -440,8 +438,8 @@ EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
|
||||
potential matches; this is more likely to be fast
|
||||
than falling back to KWset would be. */
|
||||
next_beg = dfaexec (superset, dfa_beg, (char *) end, 0,
|
||||
&count, NULL);
|
||||
if (next_beg == NULL || next_beg == end)
|
||||
&count, nullptr);
|
||||
if (!next_beg || next_beg == end)
|
||||
continue;
|
||||
|
||||
/* Narrow down to the line we've found. */
|
||||
@ -463,7 +461,7 @@ EGexecute (void *vdc, char const *buf, idx_t size, idx_t *match_size,
|
||||
|
||||
/* If there's no match, or if we've matched the sentinel,
|
||||
we're done. */
|
||||
if (next_beg == NULL || next_beg == end)
|
||||
if (!next_beg || next_beg == end)
|
||||
continue;
|
||||
|
||||
/* Narrow down to the line we've found. */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Report an error and exit.
|
||||
Copyright 2016-2023 Free Software Foundation, Inc.
|
||||
Copyright 2016-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef DIE_H
|
||||
#define DIE_H
|
||||
|
||||
308
src/grep.c
308
src/grep.c
@ -1,5 +1,5 @@
|
||||
/* grep.c - main driver file for grep.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2023 Free Software Foundation, Inc.
|
||||
Copyright (C) 1992, 1997-2002, 2004-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,15 +12,14 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written July 1992 by Mike Haertel. */
|
||||
|
||||
#include <config.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <uchar.h>
|
||||
#include <wchar.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdarg.h>
|
||||
@ -35,18 +34,17 @@
|
||||
#include "closeout.h"
|
||||
#include "colorize.h"
|
||||
#include "die.h"
|
||||
#include "error.h"
|
||||
#include <error.h>
|
||||
#include "exclude.h"
|
||||
#include "exitfail.h"
|
||||
#include "fcntl-safer.h"
|
||||
#include "fts_.h"
|
||||
#include "getopt.h"
|
||||
#include <getopt.h>
|
||||
#include "grep.h"
|
||||
#include "hash.h"
|
||||
#include "intprops.h"
|
||||
#include "propername.h"
|
||||
#include "safe-read.h"
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
#include "c-strcase.h"
|
||||
#include "version-etc.h"
|
||||
#include "xalloc.h"
|
||||
@ -178,7 +176,7 @@ update_patterns (char *keys, idx_t dupfree_size, idx_t size,
|
||||
|
||||
intptr_t dst_offset_1 = dst - keys + 1;
|
||||
int inserted = hash_insert_if_absent (pattern_table,
|
||||
(void *) dst_offset_1, NULL);
|
||||
(void *) dst_offset_1, nullptr);
|
||||
if (inserted)
|
||||
{
|
||||
if (inserted < 0)
|
||||
@ -425,20 +423,24 @@ color_cap_ne_fct (void)
|
||||
static const struct color_cap color_dict[] =
|
||||
{
|
||||
{ "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
|
||||
{ "ms", &selected_match_color, NULL }, /* selected matched text */
|
||||
{ "mc", &context_match_color, NULL }, /* context matched text */
|
||||
{ "fn", &filename_color, NULL }, /* filename */
|
||||
{ "ln", &line_num_color, NULL }, /* line number */
|
||||
{ "bn", &byte_num_color, NULL }, /* byte (sic) offset */
|
||||
{ "se", &sep_color, NULL }, /* separator */
|
||||
{ "sl", &selected_line_color, NULL }, /* selected lines */
|
||||
{ "cx", &context_line_color, NULL }, /* context lines */
|
||||
{ "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
|
||||
{ "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
|
||||
{ NULL, NULL, NULL }
|
||||
{ "ms", &selected_match_color, nullptr }, /* selected matched text */
|
||||
{ "mc", &context_match_color, nullptr }, /* context matched text */
|
||||
{ "fn", &filename_color, nullptr }, /* filename */
|
||||
{ "ln", &line_num_color, nullptr }, /* line number */
|
||||
{ "bn", &byte_num_color, nullptr }, /* byte (sic) offset */
|
||||
{ "se", &sep_color, nullptr }, /* separator */
|
||||
{ "sl", &selected_line_color, nullptr }, /* selected lines */
|
||||
{ "cx", &context_line_color, nullptr }, /* context lines */
|
||||
{ "rv", nullptr, color_cap_rv_fct }, /* -v reverses sl/cx */
|
||||
{ "ne", nullptr, color_cap_ne_fct }, /* no EL on SGR_* */
|
||||
{ nullptr, nullptr, nullptr }
|
||||
};
|
||||
|
||||
/* Saved errno value from failed output functions on stdout. */
|
||||
/* Saved errno value from failed output functions on stdout.
|
||||
prline polls this to decide whether to die.
|
||||
Setting it to nonzero just before exiting can prevent clean_up_stdout
|
||||
from misbehaving on a buggy OS where 'close (STDOUT_FILENO)' fails
|
||||
with EACCES. */
|
||||
static int stdout_errno;
|
||||
|
||||
static void
|
||||
@ -503,57 +505,56 @@ enum
|
||||
/* Long options equivalences. */
|
||||
static struct option const long_options[] =
|
||||
{
|
||||
{"basic-regexp", no_argument, NULL, 'G'},
|
||||
{"extended-regexp", no_argument, NULL, 'E'},
|
||||
{"fixed-regexp", no_argument, NULL, 'F'},
|
||||
{"fixed-strings", no_argument, NULL, 'F'},
|
||||
{"perl-regexp", no_argument, NULL, 'P'},
|
||||
{"after-context", required_argument, NULL, 'A'},
|
||||
{"before-context", required_argument, NULL, 'B'},
|
||||
{"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
|
||||
{"byte-offset", no_argument, NULL, 'b'},
|
||||
{"context", required_argument, NULL, 'C'},
|
||||
{"color", optional_argument, NULL, COLOR_OPTION},
|
||||
{"colour", optional_argument, NULL, COLOR_OPTION},
|
||||
{"count", no_argument, NULL, 'c'},
|
||||
{"devices", required_argument, NULL, 'D'},
|
||||
{"directories", required_argument, NULL, 'd'},
|
||||
{"exclude", required_argument, NULL, EXCLUDE_OPTION},
|
||||
{"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
|
||||
{"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
|
||||
{"file", required_argument, NULL, 'f'},
|
||||
{"files-with-matches", no_argument, NULL, 'l'},
|
||||
{"files-without-match", no_argument, NULL, 'L'},
|
||||
{"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
|
||||
{"basic-regexp", no_argument, nullptr, 'G'},
|
||||
{"extended-regexp", no_argument, nullptr, 'E'},
|
||||
{"fixed-regexp", no_argument, nullptr, 'F'},
|
||||
{"fixed-strings", no_argument, nullptr, 'F'},
|
||||
{"perl-regexp", no_argument, nullptr, 'P'},
|
||||
{"after-context", required_argument, nullptr, 'A'},
|
||||
{"before-context", required_argument, nullptr, 'B'},
|
||||
{"binary-files", required_argument, nullptr, BINARY_FILES_OPTION},
|
||||
{"byte-offset", no_argument, nullptr, 'b'},
|
||||
{"context", required_argument, nullptr, 'C'},
|
||||
{"color", optional_argument, nullptr, COLOR_OPTION},
|
||||
{"colour", optional_argument, nullptr, COLOR_OPTION},
|
||||
{"count", no_argument, nullptr, 'c'},
|
||||
{"devices", required_argument, nullptr, 'D'},
|
||||
{"directories", required_argument, nullptr, 'd'},
|
||||
{"exclude", required_argument, nullptr, EXCLUDE_OPTION},
|
||||
{"exclude-from", required_argument, nullptr, EXCLUDE_FROM_OPTION},
|
||||
{"exclude-dir", required_argument, nullptr, EXCLUDE_DIRECTORY_OPTION},
|
||||
{"file", required_argument, nullptr, 'f'},
|
||||
{"files-with-matches", no_argument, nullptr, 'l'},
|
||||
{"files-without-match", no_argument, nullptr, 'L'},
|
||||
{"group-separator", required_argument, nullptr, GROUP_SEPARATOR_OPTION},
|
||||
{"help", no_argument, &show_help, 1},
|
||||
{"include", required_argument, NULL, INCLUDE_OPTION},
|
||||
{"ignore-case", no_argument, NULL, 'i'},
|
||||
{"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
|
||||
{"initial-tab", no_argument, NULL, 'T'},
|
||||
{"label", required_argument, NULL, LABEL_OPTION},
|
||||
{"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
|
||||
{"line-number", no_argument, NULL, 'n'},
|
||||
{"line-regexp", no_argument, NULL, 'x'},
|
||||
{"max-count", required_argument, NULL, 'm'},
|
||||
{"include", required_argument, nullptr, INCLUDE_OPTION},
|
||||
{"ignore-case", no_argument, nullptr, 'i'},
|
||||
{"no-ignore-case", no_argument, nullptr, NO_IGNORE_CASE_OPTION},
|
||||
{"initial-tab", no_argument, nullptr, 'T'},
|
||||
{"label", required_argument, nullptr, LABEL_OPTION},
|
||||
{"line-buffered", no_argument, nullptr, LINE_BUFFERED_OPTION},
|
||||
{"line-number", no_argument, nullptr, 'n'},
|
||||
{"line-regexp", no_argument, nullptr, 'x'},
|
||||
{"max-count", required_argument, nullptr, 'm'},
|
||||
|
||||
{"no-filename", no_argument, NULL, 'h'},
|
||||
{"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
|
||||
{"no-messages", no_argument, NULL, 's'},
|
||||
{"null", no_argument, NULL, 'Z'},
|
||||
{"null-data", no_argument, NULL, 'z'},
|
||||
{"only-matching", no_argument, NULL, 'o'},
|
||||
{"quiet", no_argument, NULL, 'q'},
|
||||
{"recursive", no_argument, NULL, 'r'},
|
||||
{"dereference-recursive", no_argument, NULL, 'R'},
|
||||
{"regexp", required_argument, NULL, 'e'},
|
||||
{"invert-match", no_argument, NULL, 'v'},
|
||||
{"silent", no_argument, NULL, 'q'},
|
||||
{"text", no_argument, NULL, 'a'},
|
||||
{"binary", no_argument, NULL, 'U'},
|
||||
{"unix-byte-offsets", no_argument, NULL, 'u'},
|
||||
{"version", no_argument, NULL, 'V'},
|
||||
{"with-filename", no_argument, NULL, 'H'},
|
||||
{"word-regexp", no_argument, NULL, 'w'},
|
||||
{"no-filename", no_argument, nullptr, 'h'},
|
||||
{"no-group-separator", no_argument, nullptr, GROUP_SEPARATOR_OPTION},
|
||||
{"no-messages", no_argument, nullptr, 's'},
|
||||
{"null", no_argument, nullptr, 'Z'},
|
||||
{"null-data", no_argument, nullptr, 'z'},
|
||||
{"only-matching", no_argument, nullptr, 'o'},
|
||||
{"quiet", no_argument, nullptr, 'q'},
|
||||
{"recursive", no_argument, nullptr, 'r'},
|
||||
{"dereference-recursive", no_argument, nullptr, 'R'},
|
||||
{"regexp", required_argument, nullptr, 'e'},
|
||||
{"invert-match", no_argument, nullptr, 'v'},
|
||||
{"silent", no_argument, nullptr, 'q'},
|
||||
{"text", no_argument, nullptr, 'a'},
|
||||
{"binary", no_argument, nullptr, 'U'},
|
||||
{"version", no_argument, nullptr, 'V'},
|
||||
{"with-filename", no_argument, nullptr, 'H'},
|
||||
{"word-regexp", no_argument, nullptr, 'w'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -584,7 +585,7 @@ enum directories_type
|
||||
/* How to handle directories. */
|
||||
static char const *const directories_args[] =
|
||||
{
|
||||
"read", "recurse", "skip", NULL
|
||||
"read", "recurse", "skip", nullptr
|
||||
};
|
||||
static enum directories_type const directories_types[] =
|
||||
{
|
||||
@ -761,7 +762,7 @@ buf_has_encoding_errors (char *buf, idx_t size)
|
||||
if (! unibyte_mask)
|
||||
return false;
|
||||
|
||||
mbstate_t mbs = { 0 };
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
ptrdiff_t clen;
|
||||
|
||||
buf[size] = -1;
|
||||
@ -874,6 +875,7 @@ static int bufdesc; /* File descriptor. */
|
||||
static char *bufbeg; /* Beginning of user-visible stuff. */
|
||||
static char *buflim; /* Limit of user-visible stuff. */
|
||||
static idx_t pagesize; /* alignment of memory pages */
|
||||
static idx_t good_readsize; /* good size to pass to 'read' */
|
||||
static off_t bufoffset; /* Read offset. */
|
||||
static off_t after_last_match; /* Pointer after last matching line that
|
||||
would have been output if we were
|
||||
@ -882,8 +884,15 @@ static bool skip_nuls; /* Skip '\0' in data. */
|
||||
static bool skip_empty_lines; /* Skip empty lines in data. */
|
||||
static intmax_t totalnl; /* Total newline count before lastnl. */
|
||||
|
||||
/* Initial buffer size, not counting slop. */
|
||||
enum { INITIAL_BUFSIZE = 96 * 1024 };
|
||||
/* Minimum value for good_readsize.
|
||||
If it's too small, there are more syscalls;
|
||||
if too large, it wastes memory and likely cache.
|
||||
Use 96 KiB as it gave good results in a benchmark in 2018
|
||||
(see 2018-09-06 commit labeled "grep: triple initial buffer size: 32k->96k")
|
||||
even though the same benchmark in 2024 found no significant
|
||||
difference for values from 32 KiB to 1024 KiB on Ubuntu 24.04.1 LTS
|
||||
with an Intel Xeon W-1350. */
|
||||
enum { GOOD_READSIZE_MIN = 96 * 1024 };
|
||||
|
||||
/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
|
||||
an integer or a pointer. Both args must be free of side effects. */
|
||||
@ -948,9 +957,9 @@ fillbuf (idx_t save, struct stat const *st)
|
||||
{
|
||||
char *readbuf;
|
||||
|
||||
/* After BUFLIM, we need room for at least a page of data plus a
|
||||
/* After BUFLIM, we need room for a good-sized read plus a
|
||||
trailing uword. */
|
||||
idx_t min_after_buflim = pagesize + uword_size;
|
||||
idx_t min_after_buflim = good_readsize + uword_size;
|
||||
|
||||
if (min_after_buflim <= buffer + bufalloc - buflim)
|
||||
readbuf = buflim;
|
||||
@ -959,8 +968,8 @@ fillbuf (idx_t save, struct stat const *st)
|
||||
char *newbuf;
|
||||
|
||||
/* For data to be searched we need room for the saved bytes,
|
||||
plus at least a page of data to read. */
|
||||
idx_t minsize = save + pagesize;
|
||||
plus at least a good-sized read. */
|
||||
idx_t minsize = save + good_readsize;
|
||||
|
||||
/* Add enough room so that the buffer is aligned and has room
|
||||
for byte sentinels fore and aft, and so that a uword can
|
||||
@ -986,7 +995,7 @@ fillbuf (idx_t save, struct stat const *st)
|
||||
alloc_max = MAX (a, bufalloc + incr_min);
|
||||
}
|
||||
|
||||
newbuf = xpalloc (NULL, &bufalloc, incr_min, alloc_max, 1);
|
||||
newbuf = xpalloc (nullptr, &bufalloc, incr_min, alloc_max, 1);
|
||||
}
|
||||
|
||||
readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
|
||||
@ -1003,16 +1012,13 @@ fillbuf (idx_t save, struct stat const *st)
|
||||
|
||||
clear_asan_poison ();
|
||||
|
||||
idx_t readsize = buffer + bufalloc - uword_size - readbuf;
|
||||
readsize -= readsize % pagesize;
|
||||
|
||||
idx_t fillsize;
|
||||
ptrdiff_t fillsize;
|
||||
bool cc = true;
|
||||
|
||||
while (true)
|
||||
{
|
||||
fillsize = safe_read (bufdesc, readbuf, readsize);
|
||||
if (fillsize == SAFE_READ_ERROR)
|
||||
fillsize = safe_read (bufdesc, readbuf, good_readsize);
|
||||
if (fillsize < 0)
|
||||
{
|
||||
fillsize = 0;
|
||||
cc = false;
|
||||
@ -1087,14 +1093,14 @@ static bool count_matches; /* Count matching lines. */
|
||||
static intmax_t max_count; /* Max number of selected
|
||||
lines from an input file. */
|
||||
static bool line_buffered; /* Use line buffering. */
|
||||
static char *label = NULL; /* Fake filename for stdin */
|
||||
static char *label; /* Fake filename for stdin */
|
||||
|
||||
|
||||
/* Internal variables to keep track of byte count, context, etc. */
|
||||
static intmax_t totalcc; /* Total character count before bufbeg. */
|
||||
static char const *lastnl; /* Pointer after last newline counted. */
|
||||
static char *lastout; /* Pointer after last character output;
|
||||
NULL if no character has been output
|
||||
null if no character has been output
|
||||
or if it's conceptually before bufbeg. */
|
||||
static intmax_t outleft; /* Maximum number of selected lines. */
|
||||
static intmax_t pending; /* Pending lines of output.
|
||||
@ -1211,7 +1217,7 @@ print_line_middle (char *beg, char *lim,
|
||||
idx_t match_size;
|
||||
ptrdiff_t match_offset;
|
||||
char *cur;
|
||||
char *mid = NULL;
|
||||
char *mid = nullptr;
|
||||
char *b;
|
||||
|
||||
for (cur = beg;
|
||||
@ -1243,7 +1249,7 @@ print_line_middle (char *beg, char *lim,
|
||||
{
|
||||
char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
|
||||
if (! print_line_head (b, match_size, lim, sep))
|
||||
return NULL;
|
||||
return nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1251,7 +1257,7 @@ print_line_middle (char *beg, char *lim,
|
||||
if (mid)
|
||||
{
|
||||
cur = mid;
|
||||
mid = NULL;
|
||||
mid = nullptr;
|
||||
}
|
||||
fwrite_errno (cur, 1, b - cur);
|
||||
}
|
||||
@ -1315,7 +1321,7 @@ prline (char *beg, char *lim, char sep)
|
||||
? selected_match_color : context_match_color);
|
||||
}
|
||||
else
|
||||
line_color = match_color = NULL; /* Shouldn't be used. */
|
||||
line_color = match_color = nullptr; /* Shouldn't be used. */
|
||||
|
||||
if ((only_matching && matching)
|
||||
|| (color_option && (*line_color || *match_color)))
|
||||
@ -1466,7 +1472,7 @@ grepbuf (char *beg, char const *lim)
|
||||
{
|
||||
idx_t match_size;
|
||||
ptrdiff_t match_offset = execute (compiled_pattern, p, lim - p,
|
||||
&match_size, NULL);
|
||||
&match_size, nullptr);
|
||||
if (match_offset < 0)
|
||||
{
|
||||
if (!out_invert)
|
||||
@ -1481,13 +1487,18 @@ grepbuf (char *beg, char const *lim)
|
||||
break;
|
||||
if (!out_invert || p < b)
|
||||
{
|
||||
if (list_files != LISTFILES_NONE)
|
||||
return 1;
|
||||
char *prbeg = out_invert ? p : b;
|
||||
char *prend = out_invert ? b : endp;
|
||||
prtext (prbeg, prend);
|
||||
if (!outleft || done_on_match)
|
||||
{
|
||||
if (exit_on_match)
|
||||
exit (errseen ? exit_failure : EXIT_SUCCESS);
|
||||
{
|
||||
stdout_errno = -1;
|
||||
exit (EXIT_SUCCESS);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1503,9 +1514,6 @@ grep (int fd, struct stat const *st, bool *ineof)
|
||||
{
|
||||
intmax_t nlines, i;
|
||||
idx_t residue, save;
|
||||
char oldc;
|
||||
char *beg;
|
||||
char *lim;
|
||||
char eol = eolbyte;
|
||||
char nul_zapper = '\0';
|
||||
bool done_on_match_0 = done_on_match;
|
||||
@ -1558,7 +1566,11 @@ grep (int fd, struct stat const *st, bool *ineof)
|
||||
if (binary_files == WITHOUT_MATCH_BINARY_FILES)
|
||||
return 0;
|
||||
if (!count_matches)
|
||||
done_on_match = out_quiet = true;
|
||||
{
|
||||
out_quiet = true;
|
||||
if (max_count == INTMAX_MAX)
|
||||
done_on_match = true;
|
||||
}
|
||||
nlines_first_null = nlines;
|
||||
nul_zapper = eol;
|
||||
skip_nuls = skip_empty_lines;
|
||||
@ -1568,7 +1580,7 @@ grep (int fd, struct stat const *st, bool *ineof)
|
||||
if (lastout)
|
||||
lastout = bufbeg;
|
||||
|
||||
beg = bufbeg + save;
|
||||
char *beg = bufbeg + save;
|
||||
|
||||
/* no more data to scan (eof) except for maybe a residue -> break */
|
||||
if (beg == buflim)
|
||||
@ -1581,16 +1593,9 @@ grep (int fd, struct stat const *st, bool *ineof)
|
||||
|
||||
/* Determine new residue (the length of an incomplete line at the end of
|
||||
the buffer, 0 means there is no incomplete last line). */
|
||||
oldc = beg[-1];
|
||||
beg[-1] = eol;
|
||||
/* If rawmemrchr existed it could be used here, since we have ensured
|
||||
that this use of memrchr is guaranteed never to return NULL. */
|
||||
lim = memrchr (beg - 1, eol, buflim - beg + 1);
|
||||
++lim;
|
||||
beg[-1] = oldc;
|
||||
if (lim == beg)
|
||||
lim = beg - residue;
|
||||
char *last_eol = memrchr (beg, eol, buflim - beg);
|
||||
beg -= residue;
|
||||
char *lim = last_eol ? last_eol + 1 : beg;
|
||||
residue = buflim - lim;
|
||||
|
||||
if (beg < lim)
|
||||
@ -1776,19 +1781,19 @@ drain_input (int fd, struct stat const *st)
|
||||
{
|
||||
#ifdef SPLICE_F_MOVE
|
||||
/* Should be faster, since it need not copy data to user space. */
|
||||
nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
|
||||
INITIAL_BUFSIZE, SPLICE_F_MOVE);
|
||||
nbytes = splice (fd, nullptr, STDOUT_FILENO, nullptr,
|
||||
good_readsize, SPLICE_F_MOVE);
|
||||
if (0 <= nbytes || errno != EINVAL)
|
||||
{
|
||||
while (0 < nbytes)
|
||||
nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
|
||||
INITIAL_BUFSIZE, SPLICE_F_MOVE);
|
||||
nbytes = splice (fd, nullptr, STDOUT_FILENO, nullptr,
|
||||
good_readsize, SPLICE_F_MOVE);
|
||||
return nbytes == 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
while ((nbytes = safe_read (fd, buffer, bufalloc)))
|
||||
if (nbytes == SAFE_READ_ERROR)
|
||||
if (nbytes < 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -1864,8 +1869,8 @@ grepdesc (int desc, bool command_line)
|
||||
suppressible_error (errno);
|
||||
|
||||
fts_arg[0] = (char *) filename;
|
||||
fts_arg[1] = NULL;
|
||||
fts = fts_open (fts_arg, opts, NULL);
|
||||
fts_arg[1] = nullptr;
|
||||
fts = fts_open (fts_arg, opts, nullptr);
|
||||
|
||||
if (!fts)
|
||||
xalloc_die ();
|
||||
@ -2060,10 +2065,11 @@ Context control:\n\
|
||||
-U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
|
||||
\n"));
|
||||
printf (_("\
|
||||
When FILE is '-', read standard input. With no FILE, read '.' if\n\
|
||||
recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\
|
||||
Exit status is 0 if any line is selected, 1 otherwise;\n\
|
||||
if any error occurs and -q is not given, the exit status is 2.\n"));
|
||||
When FILE is '-', read standard input. If no FILE is given, read standard\n\
|
||||
input, but with -r, recursively search the working directory instead. With\n\
|
||||
fewer than two FILEs, assume -h. Exit status is 0 if any line is selected,\n\
|
||||
1 otherwise; if any error occurs and -q is not given, the exit status is 2.\n"
|
||||
));
|
||||
emit_bug_reporting_address ();
|
||||
}
|
||||
exit (status);
|
||||
@ -2132,7 +2138,7 @@ get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
|
||||
while (true)
|
||||
{
|
||||
opt = getopt_long (argc, (char **) argv, short_options,
|
||||
long_options, NULL);
|
||||
long_options, nullptr);
|
||||
if (! c_isdigit (opt))
|
||||
break;
|
||||
|
||||
@ -2185,14 +2191,14 @@ parse_grep_colors (void)
|
||||
char *val;
|
||||
|
||||
p = getenv ("GREP_COLORS"); /* Plural! */
|
||||
if (p == NULL || *p == '\0')
|
||||
if (!p || *p == '\0')
|
||||
return;
|
||||
|
||||
/* Work off a writable copy. */
|
||||
q = xstrdup (p);
|
||||
|
||||
name = q;
|
||||
val = NULL;
|
||||
val = nullptr;
|
||||
/* From now on, be well-formed or you're gone. */
|
||||
for (;;)
|
||||
if (*q == ':' || *q == '\0')
|
||||
@ -2214,7 +2220,7 @@ parse_grep_colors (void)
|
||||
if (c == '\0')
|
||||
return;
|
||||
name = q;
|
||||
val = NULL;
|
||||
val = nullptr;
|
||||
}
|
||||
else if (*q == '=')
|
||||
{
|
||||
@ -2223,7 +2229,7 @@ parse_grep_colors (void)
|
||||
*q++ = '\0'; /* Terminate name. */
|
||||
val = q; /* Can be the empty string. */
|
||||
}
|
||||
else if (val == NULL)
|
||||
else if (!val)
|
||||
q++; /* Accumulate name. */
|
||||
else if (*q == ';' || c_isdigit (*q))
|
||||
q++; /* Accumulate val. Protect the terminal from being sent crap. */
|
||||
@ -2235,7 +2241,7 @@ parse_grep_colors (void)
|
||||
static bool
|
||||
contains_encoding_error (char const *pat, idx_t patlen)
|
||||
{
|
||||
mbstate_t mbs = { 0 };
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
ptrdiff_t charlen;
|
||||
|
||||
for (idx_t i = 0; i < patlen; i += charlen)
|
||||
@ -2261,12 +2267,12 @@ setup_ok_fold (void)
|
||||
continue;
|
||||
|
||||
int ok = 1;
|
||||
wchar_t folded[CASE_FOLDED_BUFSIZE];
|
||||
char32_t folded[CASE_FOLDED_BUFSIZE];
|
||||
for (int n = case_folded_counterparts (wi, folded); 0 <= --n; )
|
||||
{
|
||||
char buf[MB_LEN_MAX];
|
||||
mbstate_t s = { 0 };
|
||||
if (wcrtomb (buf, folded[n], &s) != 1)
|
||||
mbstate_t s; mbszero (&s);
|
||||
if (c32rtomb (buf, folded[n], &s) != 1)
|
||||
{
|
||||
ok = -1;
|
||||
break;
|
||||
@ -2291,8 +2297,8 @@ fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs)
|
||||
if (localeinfo.sbctowc[pat0] != WEOF)
|
||||
return ok_fold[pat0];
|
||||
|
||||
wchar_t wc;
|
||||
size_t wn = mbrtowc (&wc, pat, patlen, mbs);
|
||||
char32_t wc;
|
||||
size_t wn = mbrtoc32 (&wc, pat, patlen, mbs);
|
||||
|
||||
/* If PAT starts with an encoding error, Fcompile does not work. */
|
||||
if (MB_LEN_MAX < wn)
|
||||
@ -2301,7 +2307,7 @@ fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs)
|
||||
/* PAT starts with a multibyte character. Fcompile works if the
|
||||
character has no case folded counterparts and toupper translates
|
||||
none of its encoding's bytes. */
|
||||
wchar_t folded[CASE_FOLDED_BUFSIZE];
|
||||
char32_t folded[CASE_FOLDED_BUFSIZE];
|
||||
if (case_folded_counterparts (wc, folded))
|
||||
return -1;
|
||||
for (idx_t i = wn; 0 < --i; )
|
||||
@ -2321,7 +2327,7 @@ fgrep_icase_charlen (char const *pat, idx_t patlen, mbstate_t *mbs)
|
||||
static bool
|
||||
fgrep_icase_available (char const *pat, idx_t patlen)
|
||||
{
|
||||
mbstate_t mbs = {0,};
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
|
||||
for (idx_t i = 0; i < patlen; )
|
||||
{
|
||||
@ -2341,7 +2347,7 @@ fgrep_to_grep_pattern (char **keys_p, idx_t *len_p)
|
||||
{
|
||||
idx_t len = *len_p;
|
||||
char *keys = *keys_p;
|
||||
mbstate_t mb_state = { 0 };
|
||||
mbstate_t mb_state; mbszero (&mb_state);
|
||||
char *new_keys = xnmalloc (len + 1, 2);
|
||||
char *p = new_keys;
|
||||
|
||||
@ -2393,7 +2399,7 @@ try_fgrep_pattern (int matcher, char *keys, idx_t *len_p)
|
||||
char *new_keys = ximalloc (len + 1);
|
||||
char *p = new_keys;
|
||||
char const *q = keys;
|
||||
mbstate_t mb_state = { 0 };
|
||||
mbstate_t mb_state; mbszero (&mb_state);
|
||||
|
||||
while (len != 0)
|
||||
{
|
||||
@ -2461,7 +2467,7 @@ try_fgrep_pattern (int matcher, char *keys, idx_t *len_p)
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
char *keys = NULL;
|
||||
char *keys = nullptr;
|
||||
idx_t keycc = 0, keyalloc = 0;
|
||||
int matcher = -1;
|
||||
int opt;
|
||||
@ -2493,13 +2499,14 @@ main (int argc, char **argv)
|
||||
#endif
|
||||
#if defined ENABLE_NLS
|
||||
bindtextdomain (PACKAGE, LOCALEDIR);
|
||||
bindtextdomain ("gnulib", GNULIB_LOCALEDIR);
|
||||
textdomain (PACKAGE);
|
||||
#endif
|
||||
|
||||
init_localeinfo (&localeinfo);
|
||||
|
||||
atexit (clean_up_stdout);
|
||||
c_stack_action (NULL);
|
||||
c_stack_action (nullptr);
|
||||
|
||||
last_recursive = 0;
|
||||
|
||||
@ -2571,11 +2578,6 @@ main (int argc, char **argv)
|
||||
binary = true;
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
/* Obsolete option; it had no effect; FIXME: remove in 2023 */
|
||||
error (0, 0, _("warning: --unix-byte-offsets (-u) is obsolete"));
|
||||
break;
|
||||
|
||||
case 'V':
|
||||
show_version = true;
|
||||
break;
|
||||
@ -2699,7 +2701,6 @@ main (int argc, char **argv)
|
||||
|
||||
case 'q':
|
||||
exit_on_match = true;
|
||||
exit_failure = 0;
|
||||
break;
|
||||
|
||||
case 'R':
|
||||
@ -2827,9 +2828,12 @@ main (int argc, char **argv)
|
||||
if (show_version)
|
||||
{
|
||||
version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
|
||||
(char *) NULL);
|
||||
(char *) nullptr);
|
||||
puts (_("Written by Mike Haertel and others; see\n"
|
||||
"<https://git.savannah.gnu.org/cgit/grep.git/tree/AUTHORS>."));
|
||||
#if HAVE_LIBPCRE
|
||||
Pprint_version ();
|
||||
#endif
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
@ -2894,9 +2898,10 @@ main (int argc, char **argv)
|
||||
if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
|
||||
{
|
||||
count_matches = false;
|
||||
done_on_match = true;
|
||||
if (max_count == INTMAX_MAX)
|
||||
done_on_match = true;
|
||||
}
|
||||
out_quiet = count_matches | done_on_match;
|
||||
out_quiet = count_matches | done_on_match | exit_on_match;
|
||||
|
||||
if (out_after < 0)
|
||||
out_after = default_context;
|
||||
@ -2918,7 +2923,7 @@ main (int argc, char **argv)
|
||||
{
|
||||
/* Legacy. */
|
||||
char *userval = getenv ("GREP_COLOR");
|
||||
if (userval != NULL && *userval != '\0')
|
||||
if (userval && *userval)
|
||||
for (char *q = userval; *q == ';' || c_isdigit (*q); q++)
|
||||
if (!q[1])
|
||||
{
|
||||
@ -2977,8 +2982,8 @@ main (int argc, char **argv)
|
||||
/* We need one byte prior and one after. */
|
||||
char eolbytes[3] = { 0, eolbyte, 0 };
|
||||
idx_t match_size;
|
||||
skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
|
||||
&match_size, NULL) == 0)
|
||||
skip_empty_lines = (!execute (compiled_pattern, eolbytes + 1, 1,
|
||||
&match_size, nullptr)
|
||||
== out_invert);
|
||||
|
||||
int num_operands = argc - optind;
|
||||
@ -2998,7 +3003,8 @@ main (int argc, char **argv)
|
||||
if (! (0 < psize && psize <= (IDX_MAX - uword_size) / 2))
|
||||
abort ();
|
||||
pagesize = psize;
|
||||
bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + uword_size;
|
||||
good_readsize = ALIGN_TO (GOOD_READSIZE_MIN, pagesize);
|
||||
bufalloc = good_readsize + pagesize + uword_size;
|
||||
buffer = ximalloc (bufalloc);
|
||||
|
||||
if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
|
||||
@ -3011,20 +3017,20 @@ main (int argc, char **argv)
|
||||
}
|
||||
else if (directories == RECURSE_DIRECTORIES && 0 < last_recursive)
|
||||
{
|
||||
static char *const cwd_only[] = { (char *) ".", NULL };
|
||||
static char *const cwd_only[] = { (char *) ".", nullptr };
|
||||
files = cwd_only;
|
||||
omit_dot_slash = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
static char *const stdin_only[] = { (char *) "-", NULL };
|
||||
static char *const stdin_only[] = { (char *) "-", nullptr };
|
||||
files = stdin_only;
|
||||
}
|
||||
|
||||
bool status = true;
|
||||
do
|
||||
status &= grep_command_line_arg (*files++);
|
||||
while (*files != NULL);
|
||||
while (*files);
|
||||
|
||||
return errseen ? EXIT_TROUBLE : status;
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* grep.h - interface to grep driver for searching subroutines.
|
||||
Copyright (C) 1992, 1998, 2001, 2007, 2009-2023 Free Software Foundation,
|
||||
Copyright (C) 1992, 1998, 2001, 2007, 2009-2026 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -13,9 +13,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GREP_GREP_H
|
||||
#define GREP_GREP_H 1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* kwsearch.c - searching subroutines using kwset for grep.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,14 +12,12 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written August 1992 by Mike Haertel. */
|
||||
|
||||
#include <config.h>
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
|
||||
/* A compiled -F pattern list. */
|
||||
|
||||
@ -50,7 +48,7 @@ void *
|
||||
Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
{
|
||||
kwset_t kwset;
|
||||
char *buf = NULL;
|
||||
char *buf = nullptr;
|
||||
idx_t bufalloc = 0;
|
||||
|
||||
kwset = kwsinit (true);
|
||||
@ -71,7 +69,7 @@ Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
{
|
||||
free (buf);
|
||||
bufalloc = len;
|
||||
buf = xpalloc (NULL, &bufalloc, 2, -1, 1);
|
||||
buf = xpalloc (nullptr, &bufalloc, 2, -1, 1);
|
||||
buf[0] = eolbyte;
|
||||
}
|
||||
memcpy (buf + 1, p, len);
|
||||
@ -96,7 +94,7 @@ Fcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
kwsearch->words = words;
|
||||
kwsearch->pattern = pattern;
|
||||
kwsearch->size = size;
|
||||
kwsearch->re = NULL;
|
||||
kwsearch->re = nullptr;
|
||||
return kwsearch;
|
||||
}
|
||||
|
||||
@ -151,7 +149,7 @@ Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
|
||||
goto success_in_beg_and_len;
|
||||
if (match_lines)
|
||||
{
|
||||
len += start_ptr == NULL;
|
||||
len += !start_ptr;
|
||||
goto success_in_beg_and_len;
|
||||
}
|
||||
if (! match_words)
|
||||
@ -199,7 +197,7 @@ Fexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
|
||||
end = buf + size;
|
||||
|
||||
if (0 <= EGexecute (kwsearch->re, beg, end - beg,
|
||||
match_size, NULL))
|
||||
match_size, nullptr))
|
||||
goto success_match_words;
|
||||
beg = end - 1;
|
||||
break;
|
||||
|
||||
912
src/kwset.c
912
src/kwset.c
@ -1,912 +0,0 @@
|
||||
/* kwset.c - search for any of a set of keywords.
|
||||
Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2023 Free Software
|
||||
Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
|
||||
/* Written August 1989 by Mike Haertel. */
|
||||
|
||||
/* For more on the Aho-Corasick and Boyer-Moore algorithms,
|
||||
as well as other algorithms that might help improve performance,
|
||||
see the grep manual's "Performance" chapter. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include "kwset.h"
|
||||
|
||||
#include <stdckdint.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "system.h"
|
||||
#include "memchr2.h"
|
||||
#include "obstack.h"
|
||||
#include "xalloc.h"
|
||||
#include "verify.h"
|
||||
|
||||
#define obstack_chunk_alloc xmalloc
|
||||
#define obstack_chunk_free free
|
||||
|
||||
static unsigned char
|
||||
U (char ch)
|
||||
{
|
||||
return to_uchar (ch);
|
||||
}
|
||||
|
||||
/* Balanced tree of edges and labels leaving a given trie node. */
|
||||
struct tree
|
||||
{
|
||||
struct tree *llink; /* Left link; MUST be first field. */
|
||||
struct tree *rlink; /* Right link (to larger labels). */
|
||||
struct trie *trie; /* Trie node pointed to by this edge. */
|
||||
unsigned char label; /* Label on this edge. */
|
||||
char balance; /* Difference in depths of subtrees. */
|
||||
};
|
||||
|
||||
/* Node of a trie representing a set of keywords. */
|
||||
struct trie
|
||||
{
|
||||
/* If an accepting node, this is either 2*W + 1 where W is the word
|
||||
index, or is -1 if Aho-Corasick is in use and FAIL
|
||||
specifies where to look for more info. If not an accepting node,
|
||||
this is zero. */
|
||||
ptrdiff_t accepting;
|
||||
|
||||
struct tree *links; /* Tree of edges leaving this node. */
|
||||
struct trie *parent; /* Parent of this node. */
|
||||
struct trie *next; /* List of all trie nodes in level order. */
|
||||
struct trie *fail; /* Aho-Corasick failure function. */
|
||||
idx_t depth; /* Depth of this node from the root. */
|
||||
idx_t shift; /* Shift function for search failures. */
|
||||
idx_t maxshift; /* Max shift of self and descendants. */
|
||||
};
|
||||
|
||||
/* Structure returned opaquely to the caller, containing everything. */
|
||||
struct kwset
|
||||
{
|
||||
struct obstack obstack; /* Obstack for node allocation. */
|
||||
idx_t words; /* Number of words in the trie. */
|
||||
struct trie *trie; /* The trie itself. */
|
||||
idx_t mind; /* Minimum depth of an accepting node. */
|
||||
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
|
||||
struct trie *next[NCHAR]; /* Table of children of the root. */
|
||||
char *target; /* Target string if there's only one. */
|
||||
idx_t *shift; /* Used in Boyer-Moore search for one
|
||||
string. */
|
||||
char const *trans; /* Character translation table. */
|
||||
|
||||
/* This helps to match a terminal byte, which is the first byte
|
||||
for Aho-Corasick, and the last byte for Boyer-More. If all the
|
||||
patterns have the same terminal byte (after translation via TRANS
|
||||
if TRANS is nonnull), then this is that byte as an unsigned char.
|
||||
Otherwise this is -1 if there is disagreement among the strings
|
||||
about terminal bytes, and -2 if there are no terminal bytes and
|
||||
no disagreement because all the patterns are empty. */
|
||||
int gc1;
|
||||
|
||||
/* This helps to match a terminal byte. If 0 <= GC1HELP, B is
|
||||
terminal when B == GC1 || B == GC1HELP (note that GC1 == GCHELP
|
||||
is common here). This is typically faster than evaluating
|
||||
to_uchar (TRANS[B]) == GC1. */
|
||||
int gc1help;
|
||||
|
||||
/* If the string has two or more bytes, this is the penultimate byte,
|
||||
after translation via TRANS if TRANS is nonnull. This variable
|
||||
is used only by Boyer-Moore. */
|
||||
char gc2;
|
||||
|
||||
/* kwsexec implementation. */
|
||||
ptrdiff_t (*kwsexec) (kwset_t, char const *, idx_t, struct kwsmatch *, bool);
|
||||
};
|
||||
|
||||
/* Use TRANS to transliterate C. A null TRANS does no transliteration. */
|
||||
static inline char
|
||||
tr (char const *trans, char c)
|
||||
{
|
||||
return trans ? trans[U(c)] : c;
|
||||
}
|
||||
|
||||
static ptrdiff_t acexec (kwset_t, char const *, idx_t,
|
||||
struct kwsmatch *, bool);
|
||||
static ptrdiff_t bmexec (kwset_t, char const *, idx_t,
|
||||
struct kwsmatch *, bool);
|
||||
|
||||
/* Return a newly allocated keyword set. A nonnull TRANS specifies a
|
||||
table of character translations to be applied to all pattern and
|
||||
search text. */
|
||||
kwset_t
|
||||
kwsalloc (char const *trans)
|
||||
{
|
||||
struct kwset *kwset = xmalloc (sizeof *kwset);
|
||||
|
||||
obstack_init (&kwset->obstack);
|
||||
kwset->words = 0;
|
||||
kwset->trie = obstack_alloc (&kwset->obstack, sizeof *kwset->trie);
|
||||
kwset->trie->accepting = 0;
|
||||
kwset->trie->links = NULL;
|
||||
kwset->trie->parent = NULL;
|
||||
kwset->trie->next = NULL;
|
||||
kwset->trie->fail = NULL;
|
||||
kwset->trie->depth = 0;
|
||||
kwset->trie->shift = 0;
|
||||
kwset->mind = IDX_MAX;
|
||||
kwset->target = NULL;
|
||||
kwset->trans = trans;
|
||||
kwset->kwsexec = acexec;
|
||||
|
||||
return kwset;
|
||||
}
|
||||
|
||||
/* This upper bound is valid for CHAR_BIT >= 4 and
|
||||
exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
|
||||
enum { DEPTH_SIZE = CHAR_BIT + CHAR_BIT / 2 };
|
||||
|
||||
/* Add the given string to the contents of the keyword set. */
|
||||
void
|
||||
kwsincr (kwset_t kwset, char const *text, idx_t len)
|
||||
{
|
||||
assume (0 <= len);
|
||||
struct trie *trie = kwset->trie;
|
||||
char const *trans = kwset->trans;
|
||||
bool reverse = kwset->kwsexec == bmexec;
|
||||
|
||||
if (reverse)
|
||||
text += len;
|
||||
|
||||
/* Descend the trie (built of keywords) character-by-character,
|
||||
installing new nodes when necessary. */
|
||||
while (len--)
|
||||
{
|
||||
unsigned char uc = reverse ? *--text : *text++;
|
||||
unsigned char label = trans ? trans[uc] : uc;
|
||||
|
||||
/* Descend the tree of outgoing links for this trie node,
|
||||
looking for the current character and keeping track
|
||||
of the path followed. */
|
||||
struct tree *cur = trie->links;
|
||||
struct tree *links[DEPTH_SIZE];
|
||||
enum { L, R } dirs[DEPTH_SIZE];
|
||||
links[0] = (struct tree *) &trie->links;
|
||||
dirs[0] = L;
|
||||
idx_t depth = 1;
|
||||
|
||||
while (cur && label != cur->label)
|
||||
{
|
||||
links[depth] = cur;
|
||||
if (label < cur->label)
|
||||
dirs[depth++] = L, cur = cur->llink;
|
||||
else
|
||||
dirs[depth++] = R, cur = cur->rlink;
|
||||
}
|
||||
|
||||
/* The current character doesn't have an outgoing link at
|
||||
this trie node, so build a new trie node and install
|
||||
a link in the current trie node's tree. */
|
||||
if (!cur)
|
||||
{
|
||||
cur = obstack_alloc (&kwset->obstack, sizeof *cur);
|
||||
cur->llink = NULL;
|
||||
cur->rlink = NULL;
|
||||
cur->trie = obstack_alloc (&kwset->obstack, sizeof *cur->trie);
|
||||
cur->trie->accepting = 0;
|
||||
cur->trie->links = NULL;
|
||||
cur->trie->parent = trie;
|
||||
cur->trie->next = NULL;
|
||||
cur->trie->fail = NULL;
|
||||
cur->trie->depth = trie->depth + 1;
|
||||
cur->trie->shift = 0;
|
||||
cur->label = label;
|
||||
cur->balance = 0;
|
||||
|
||||
/* Install the new tree node in its parent. */
|
||||
if (dirs[--depth] == L)
|
||||
links[depth]->llink = cur;
|
||||
else
|
||||
links[depth]->rlink = cur;
|
||||
|
||||
/* Back up the tree fixing the balance flags. */
|
||||
while (depth && !links[depth]->balance)
|
||||
{
|
||||
if (dirs[depth] == L)
|
||||
--links[depth]->balance;
|
||||
else
|
||||
++links[depth]->balance;
|
||||
--depth;
|
||||
}
|
||||
|
||||
/* Rebalance the tree by pointer rotations if necessary. */
|
||||
if (depth && ((dirs[depth] == L && --links[depth]->balance)
|
||||
|| (dirs[depth] == R && ++links[depth]->balance)))
|
||||
{
|
||||
struct tree *t, *r, *l, *rl, *lr;
|
||||
|
||||
switch (links[depth]->balance)
|
||||
{
|
||||
case (char) -2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case L:
|
||||
r = links[depth], t = r->llink, rl = t->rlink;
|
||||
t->rlink = r, r->llink = rl;
|
||||
t->balance = r->balance = 0;
|
||||
break;
|
||||
case R:
|
||||
r = links[depth], l = r->llink, t = l->rlink;
|
||||
rl = t->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case R:
|
||||
l = links[depth], t = l->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr;
|
||||
t->balance = l->balance = 0;
|
||||
break;
|
||||
case L:
|
||||
l = links[depth], r = l->rlink, t = r->llink;
|
||||
lr = t->llink, rl = t->rlink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
if (dirs[depth - 1] == L)
|
||||
links[depth - 1]->llink = t;
|
||||
else
|
||||
links[depth - 1]->rlink = t;
|
||||
}
|
||||
}
|
||||
|
||||
trie = cur->trie;
|
||||
}
|
||||
|
||||
/* Mark the node finally reached as accepting, encoding the
|
||||
index number of this word in the keyword set so far. */
|
||||
if (!trie->accepting)
|
||||
trie->accepting = 2 * kwset->words + 1;
|
||||
++kwset->words;
|
||||
|
||||
/* Keep track of the longest and shortest string of the keyword set. */
|
||||
if (trie->depth < kwset->mind)
|
||||
kwset->mind = trie->depth;
|
||||
}
|
||||
|
||||
idx_t
|
||||
kwswords (kwset_t kwset)
|
||||
{
|
||||
return kwset->words;
|
||||
}
|
||||
|
||||
/* Enqueue the trie nodes referenced from the given tree in the
|
||||
given queue. */
|
||||
static void
|
||||
enqueue (struct tree *tree, struct trie **last)
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
enqueue (tree->llink, last);
|
||||
enqueue (tree->rlink, last);
|
||||
(*last) = (*last)->next = tree->trie;
|
||||
}
|
||||
|
||||
/* Compute the Aho-Corasick failure function for the trie nodes referenced
|
||||
from the given tree, given the failure function for their parent as
|
||||
well as a last resort failure node. */
|
||||
static void
|
||||
treefails (struct tree const *tree, struct trie const *fail,
|
||||
struct trie *recourse, bool reverse)
|
||||
{
|
||||
struct tree *cur;
|
||||
|
||||
if (!tree)
|
||||
return;
|
||||
|
||||
treefails (tree->llink, fail, recourse, reverse);
|
||||
treefails (tree->rlink, fail, recourse, reverse);
|
||||
|
||||
/* Find, in the chain of fails going back to the root, the first
|
||||
node that has a descendant on the current label. */
|
||||
while (fail)
|
||||
{
|
||||
cur = fail->links;
|
||||
while (cur && tree->label != cur->label)
|
||||
if (tree->label < cur->label)
|
||||
cur = cur->llink;
|
||||
else
|
||||
cur = cur->rlink;
|
||||
if (cur)
|
||||
{
|
||||
tree->trie->fail = cur->trie;
|
||||
if (!reverse && cur->trie->accepting && !tree->trie->accepting)
|
||||
tree->trie->accepting = -1;
|
||||
return;
|
||||
}
|
||||
fail = fail->fail;
|
||||
}
|
||||
|
||||
tree->trie->fail = recourse;
|
||||
}
|
||||
|
||||
/* Set delta entries for the links of the given tree such that
|
||||
the preexisting delta value is larger than the current depth. */
|
||||
static void
|
||||
treedelta (struct tree const *tree, idx_t depth, unsigned char delta[])
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treedelta (tree->llink, depth, delta);
|
||||
treedelta (tree->rlink, depth, delta);
|
||||
if (depth < delta[tree->label])
|
||||
delta[tree->label] = depth;
|
||||
}
|
||||
|
||||
/* Return true if A has every label in B. */
|
||||
static bool _GL_ATTRIBUTE_PURE
|
||||
hasevery (struct tree const *a, struct tree const *b)
|
||||
{
|
||||
if (!b)
|
||||
return true;
|
||||
if (!hasevery (a, b->llink))
|
||||
return false;
|
||||
if (!hasevery (a, b->rlink))
|
||||
return false;
|
||||
while (a && b->label != a->label)
|
||||
if (b->label < a->label)
|
||||
a = a->llink;
|
||||
else
|
||||
a = a->rlink;
|
||||
return !!a;
|
||||
}
|
||||
|
||||
/* Compute a vector, indexed by character code, of the trie nodes
|
||||
referenced from the given tree. */
|
||||
static void
|
||||
treenext (struct tree const *tree, struct trie *next[])
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treenext (tree->llink, next);
|
||||
treenext (tree->rlink, next);
|
||||
next[tree->label] = tree->trie;
|
||||
}
|
||||
|
||||
/* Prepare a built keyword set for use. */
|
||||
void
|
||||
kwsprep (kwset_t kwset)
|
||||
{
|
||||
char const *trans = kwset->trans;
|
||||
unsigned char deltabuf[NCHAR];
|
||||
unsigned char *delta = trans ? deltabuf : kwset->delta;
|
||||
struct trie *curr, *last;
|
||||
|
||||
/* Use Boyer-Moore if just one pattern, Aho-Corasick otherwise. */
|
||||
bool reverse = kwset->words == 1;
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
kwset_t new_kwset;
|
||||
|
||||
/* Enqueue the immediate descendants in the level order queue. */
|
||||
for (curr = last = kwset->trie; curr; curr = curr->next)
|
||||
enqueue (curr->links, &last);
|
||||
|
||||
/* Looking for just one string. Extract it from the trie. */
|
||||
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
|
||||
curr = kwset->trie;
|
||||
for (idx_t i = 0; i < kwset->mind; i++)
|
||||
{
|
||||
kwset->target[i] = curr->links->label;
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
new_kwset = kwsalloc (kwset->trans);
|
||||
new_kwset->kwsexec = bmexec;
|
||||
kwsincr (new_kwset, kwset->target, kwset->mind);
|
||||
obstack_free (&kwset->obstack, NULL);
|
||||
*kwset = *new_kwset;
|
||||
free (new_kwset);
|
||||
}
|
||||
|
||||
/* Initial values for the delta table; will be changed later. The
|
||||
delta entry for a given character is the smallest depth of any
|
||||
node at which an outgoing edge is labeled by that character. */
|
||||
memset (delta, MIN (kwset->mind, UCHAR_MAX), sizeof deltabuf);
|
||||
|
||||
/* Traverse the nodes of the trie in level order, simultaneously
|
||||
computing the delta table, failure function, and shift function. */
|
||||
for (curr = last = kwset->trie; curr; curr = curr->next)
|
||||
{
|
||||
/* Enqueue the immediate descendants in the level order queue. */
|
||||
enqueue (curr->links, &last);
|
||||
|
||||
/* Update the delta table for the descendants of this node. */
|
||||
treedelta (curr->links, curr->depth, delta);
|
||||
|
||||
/* Compute the failure function for the descendants of this node. */
|
||||
treefails (curr->links, curr->fail, kwset->trie, reverse);
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
curr->shift = kwset->mind;
|
||||
curr->maxshift = kwset->mind;
|
||||
|
||||
/* Update the shifts at each node in the current node's chain
|
||||
of fails back to the root. */
|
||||
struct trie *fail;
|
||||
for (fail = curr->fail; fail; fail = fail->fail)
|
||||
{
|
||||
/* If the current node has some outgoing edge that the fail
|
||||
doesn't, then the shift at the fail should be no larger
|
||||
than the difference of their depths. */
|
||||
if (!hasevery (fail->links, curr->links))
|
||||
if (curr->depth - fail->depth < fail->shift)
|
||||
fail->shift = curr->depth - fail->depth;
|
||||
|
||||
/* If the current node is accepting then the shift at the
|
||||
fail and its descendants should be no larger than the
|
||||
difference of their depths. */
|
||||
if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
|
||||
fail->maxshift = curr->depth - fail->depth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
/* Traverse the trie in level order again, fixing up all nodes whose
|
||||
shift exceeds their inherited maxshift. */
|
||||
for (curr = kwset->trie->next; curr; curr = curr->next)
|
||||
{
|
||||
if (curr->maxshift > curr->parent->maxshift)
|
||||
curr->maxshift = curr->parent->maxshift;
|
||||
if (curr->shift > curr->maxshift)
|
||||
curr->shift = curr->maxshift;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create a vector, indexed by character code, of the outgoing links
|
||||
from the root node. Accumulate GC1 and GC1HELP. */
|
||||
struct trie *nextbuf[NCHAR];
|
||||
struct trie **next = trans ? nextbuf : kwset->next;
|
||||
memset (next, 0, sizeof nextbuf);
|
||||
treenext (kwset->trie->links, next);
|
||||
int gc1 = -2;
|
||||
int gc1help = -1;
|
||||
for (int i = 0; i < NCHAR; i++)
|
||||
{
|
||||
int ti = i;
|
||||
if (trans)
|
||||
{
|
||||
ti = U(trans[i]);
|
||||
kwset->next[i] = next[ti];
|
||||
}
|
||||
if (kwset->next[i])
|
||||
{
|
||||
if (gc1 < -1)
|
||||
{
|
||||
gc1 = ti;
|
||||
gc1help = i;
|
||||
}
|
||||
else if (gc1 == ti)
|
||||
gc1help = gc1help == ti ? i : -1;
|
||||
else if (i == ti && gc1 == gc1help)
|
||||
gc1help = i;
|
||||
else
|
||||
gc1 = -1;
|
||||
}
|
||||
}
|
||||
kwset->gc1 = gc1;
|
||||
kwset->gc1help = gc1help;
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
/* Looking for just one string. Extract it from the trie. */
|
||||
kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
|
||||
curr = kwset->trie;
|
||||
for (idx_t i = kwset->mind; 0 < i; i--)
|
||||
{
|
||||
kwset->target[i - 1] = curr->links->label;
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
if (kwset->mind > 1)
|
||||
{
|
||||
/* Looking for the delta2 shift that might be made after a
|
||||
backwards match has failed. Extract it from the trie. */
|
||||
kwset->shift
|
||||
= obstack_alloc (&kwset->obstack,
|
||||
sizeof *kwset->shift * (kwset->mind - 1));
|
||||
curr = kwset->trie->next;
|
||||
for (idx_t i = 0; i < kwset->mind - 1; i++)
|
||||
{
|
||||
kwset->shift[i] = curr->shift;
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
/* The penultimate byte. */
|
||||
kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fix things up for any translation table. */
|
||||
if (trans)
|
||||
for (int i = 0; i < NCHAR; ++i)
|
||||
kwset->delta[i] = delta[U(trans[i])];
|
||||
}
|
||||
|
||||
/* Delta2 portion of a Boyer-Moore search. *TP is the string text
|
||||
pointer; it is updated in place. EP is the end of the string text,
|
||||
and SP the end of the pattern. LEN is the pattern length; it must
|
||||
be at least 2. TRANS, if nonnull, is the input translation table.
|
||||
GC1 and GC2 are the last and second-from last bytes of the pattern,
|
||||
transliterated by TRANS; the caller precomputes them for
|
||||
efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP
|
||||
when failing. KWSET->shift says how much to shift. */
|
||||
static inline bool
|
||||
bm_delta2_search (char const **tpp, char const *ep, char const *sp,
|
||||
idx_t len,
|
||||
char const *trans, char gc1, char gc2,
|
||||
unsigned char const *d1, kwset_t kwset)
|
||||
{
|
||||
char const *tp = *tpp;
|
||||
idx_t d = len, skip = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
idx_t i = 2;
|
||||
if (tr (trans, tp[-2]) == gc2)
|
||||
{
|
||||
while (++i <= d)
|
||||
if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
|
||||
break;
|
||||
if (i > d)
|
||||
{
|
||||
for (i = d + skip + 1; i <= len; ++i)
|
||||
if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
|
||||
break;
|
||||
if (i > len)
|
||||
{
|
||||
*tpp = tp - len;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tp += d = kwset->shift[i - 2];
|
||||
if (tp > ep)
|
||||
break;
|
||||
if (tr (trans, tp[-1]) != gc1)
|
||||
{
|
||||
if (d1)
|
||||
tp += d1[U(tp[-1])];
|
||||
break;
|
||||
}
|
||||
skip = i - 1;
|
||||
}
|
||||
|
||||
*tpp = tp;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return the address of the first byte in the buffer S (of size N)
|
||||
that matches the terminal byte specified by KWSET, or NULL if there
|
||||
is no match. KWSET->gc1 should be nonnegative. */
|
||||
static char const *
|
||||
memchr_kwset (char const *s, idx_t n, kwset_t kwset)
|
||||
{
|
||||
char const *slim = s + n;
|
||||
if (kwset->gc1help < 0)
|
||||
{
|
||||
for (; s < slim; s++)
|
||||
if (kwset->next[U(*s)])
|
||||
return s;
|
||||
}
|
||||
else
|
||||
{
|
||||
int small_heuristic = 2;
|
||||
idx_t small_bytes = small_heuristic * sizeof (unsigned long int);
|
||||
while (s < slim)
|
||||
{
|
||||
if (kwset->next[U(*s)])
|
||||
return s;
|
||||
s++;
|
||||
if ((uintptr_t) s % small_bytes == 0)
|
||||
return memchr2 (s, kwset->gc1, kwset->gc1help, slim - s);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Fast Boyer-Moore search (inlinable version). */
|
||||
static inline ptrdiff_t _GL_ATTRIBUTE_PURE
|
||||
bmexec_trans (kwset_t kwset, char const *text, idx_t size)
|
||||
{
|
||||
assume (0 <= size);
|
||||
unsigned char const *d1;
|
||||
char const *ep, *sp, *tp;
|
||||
int d;
|
||||
idx_t len = kwset->mind;
|
||||
char const *trans = kwset->trans;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
if (len > size)
|
||||
return -1;
|
||||
if (len == 1)
|
||||
{
|
||||
tp = memchr_kwset (text, size, kwset);
|
||||
return tp ? tp - text : -1;
|
||||
}
|
||||
|
||||
d1 = kwset->delta;
|
||||
sp = kwset->target + len;
|
||||
tp = text + len;
|
||||
char gc1 = kwset->gc1;
|
||||
char gc2 = kwset->gc2;
|
||||
|
||||
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
|
||||
idx_t len12;
|
||||
if (!ckd_mul (&len12, len, 12) && len12 < size)
|
||||
/* 11 is not a bug, the initial offset happens only once. */
|
||||
for (ep = text + size - 11 * len; tp <= ep; )
|
||||
{
|
||||
char const *tp0 = tp;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d != 0)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d != 0)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d != 0)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
|
||||
/* As a heuristic, prefer memchr to seeking by
|
||||
delta1 when the latter doesn't advance much. */
|
||||
int advance_heuristic = 16 * sizeof (long);
|
||||
if (advance_heuristic <= tp - tp0)
|
||||
continue;
|
||||
tp--;
|
||||
tp = memchr_kwset (tp, text + size - tp, kwset);
|
||||
if (! tp)
|
||||
return -1;
|
||||
tp++;
|
||||
if (ep <= tp)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
|
||||
return tp - text;
|
||||
}
|
||||
|
||||
/* Now only a few characters are left to search. Carefully avoid
|
||||
ever producing an out-of-bounds pointer. */
|
||||
ep = text + size;
|
||||
d = d1[U(tp[-1])];
|
||||
while (d <= ep - tp)
|
||||
{
|
||||
d = d1[U((tp += d)[-1])];
|
||||
if (d != 0)
|
||||
continue;
|
||||
if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
|
||||
return tp - text;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Fast Boyer-Moore search. */
|
||||
static ptrdiff_t
|
||||
bmexec (kwset_t kwset, char const *text, idx_t size,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
/* Help the compiler inline in two ways, depending on whether
|
||||
kwset->trans is null. */
|
||||
ptrdiff_t ret = (IGNORE_DUPLICATE_BRANCH_WARNING
|
||||
(kwset->trans
|
||||
? bmexec_trans (kwset, text, size)
|
||||
: bmexec_trans (kwset, text, size)));
|
||||
kwsmatch->index = 0;
|
||||
kwsmatch->offset = ret;
|
||||
kwsmatch->size = kwset->mind;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Hairy multiple string search with the Aho-Corasick algorithm.
|
||||
(inlinable version) */
|
||||
static inline ptrdiff_t
|
||||
acexec_trans (kwset_t kwset, char const *text, idx_t len,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
struct trie const *trie, *accept;
|
||||
char const *tp, *left, *lim;
|
||||
struct tree const *tree;
|
||||
char const *trans;
|
||||
|
||||
/* Initialize register copies and look for easy ways out. */
|
||||
if (len < kwset->mind)
|
||||
return -1;
|
||||
trans = kwset->trans;
|
||||
trie = kwset->trie;
|
||||
lim = text + len;
|
||||
tp = text;
|
||||
|
||||
if (!trie->accepting)
|
||||
{
|
||||
unsigned char c;
|
||||
int gc1 = kwset->gc1;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (gc1 < 0)
|
||||
{
|
||||
while (! (trie = kwset->next[c = tr (trans, *tp++)]))
|
||||
if (tp >= lim)
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
tp = memchr_kwset (tp, lim - tp, kwset);
|
||||
if (!tp)
|
||||
return -1;
|
||||
c = tr (trans, *tp++);
|
||||
trie = kwset->next[c];
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (trie->accepting)
|
||||
goto match;
|
||||
if (tp >= lim)
|
||||
return -1;
|
||||
c = tr (trans, *tp++);
|
||||
|
||||
for (tree = trie->links; c != tree->label; )
|
||||
{
|
||||
tree = c < tree->label ? tree->llink : tree->rlink;
|
||||
if (! tree)
|
||||
{
|
||||
trie = trie->fail;
|
||||
if (!trie)
|
||||
{
|
||||
trie = kwset->next[c];
|
||||
if (trie)
|
||||
goto have_trie;
|
||||
if (tp >= lim)
|
||||
return -1;
|
||||
goto next_c;
|
||||
}
|
||||
if (trie->accepting)
|
||||
{
|
||||
--tp;
|
||||
goto match;
|
||||
}
|
||||
tree = trie->links;
|
||||
}
|
||||
}
|
||||
trie = tree->trie;
|
||||
have_trie:;
|
||||
}
|
||||
next_c:;
|
||||
}
|
||||
}
|
||||
|
||||
match:
|
||||
accept = trie;
|
||||
while (accept->accepting < 0)
|
||||
accept = accept->fail;
|
||||
left = tp - accept->depth;
|
||||
|
||||
/* Try left-most longest match. */
|
||||
if (longest)
|
||||
{
|
||||
while (tp < lim)
|
||||
{
|
||||
struct trie const *accept1;
|
||||
char const *left1;
|
||||
unsigned char c = tr (trans, *tp++);
|
||||
|
||||
do
|
||||
{
|
||||
tree = trie->links;
|
||||
while (tree && c != tree->label)
|
||||
tree = c < tree->label ? tree->llink : tree->rlink;
|
||||
}
|
||||
while (!tree && (trie = trie->fail) && accept->depth <= trie->depth);
|
||||
|
||||
if (!tree)
|
||||
break;
|
||||
trie = tree->trie;
|
||||
if (trie->accepting)
|
||||
{
|
||||
accept1 = trie;
|
||||
while (accept1->accepting < 0)
|
||||
accept1 = accept1->fail;
|
||||
left1 = tp - accept1->depth;
|
||||
if (left1 <= left)
|
||||
{
|
||||
left = left1;
|
||||
accept = accept1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kwsmatch->index = accept->accepting >> 1;
|
||||
kwsmatch->offset = left - text;
|
||||
kwsmatch->size = accept->depth;
|
||||
|
||||
return left - text;
|
||||
}
|
||||
|
||||
/* Hairy multiple string search with Aho-Corasick algorithm. */
|
||||
static ptrdiff_t
|
||||
acexec (kwset_t kwset, char const *text, idx_t size,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
assume (0 <= size);
|
||||
/* Help the compiler inline in two ways, depending on whether
|
||||
kwset->trans is null. */
|
||||
return (IGNORE_DUPLICATE_BRANCH_WARNING
|
||||
(kwset->trans
|
||||
? acexec_trans (kwset, text, size, kwsmatch, longest)
|
||||
: acexec_trans (kwset, text, size, kwsmatch, longest)));
|
||||
}
|
||||
|
||||
/* Find the first instance of a KWSET member in TEXT, which has SIZE bytes.
|
||||
Return the offset (into TEXT) of the first byte of the matching substring,
|
||||
or -1 if no match is found. Upon a match, store details in
|
||||
*KWSMATCH: index of matched keyword, start offset (same as the return
|
||||
value), and length. If LONGEST, find the longest match; otherwise
|
||||
any match will do. */
|
||||
ptrdiff_t
|
||||
kwsexec (kwset_t kwset, char const *text, idx_t size,
|
||||
struct kwsmatch *kwsmatch, bool longest)
|
||||
{
|
||||
return kwset->kwsexec (kwset, text, size, kwsmatch, longest);
|
||||
}
|
||||
|
||||
/* Free the components of the given keyword set. */
|
||||
void
|
||||
kwsfree (kwset_t kwset)
|
||||
{
|
||||
obstack_free (&kwset->obstack, NULL);
|
||||
free (kwset);
|
||||
}
|
||||
46
src/kwset.h
46
src/kwset.h
@ -1,46 +0,0 @@
|
||||
/* kwset.h - header declaring the keyword set library.
|
||||
Copyright (C) 1989, 1998, 2005, 2007, 2009-2023 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
|
||||
/* Written August 1989 by Mike Haertel. */
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <idx.h>
|
||||
|
||||
struct kwsmatch
|
||||
{
|
||||
idx_t index; /* Index number of matching keyword. */
|
||||
idx_t offset; /* Offset of match. */
|
||||
idx_t size; /* Length of match. */
|
||||
};
|
||||
|
||||
#include <arg-nonnull.h>
|
||||
#include <idx.h>
|
||||
|
||||
struct kwset;
|
||||
typedef struct kwset *kwset_t;
|
||||
|
||||
extern kwset_t kwsalloc (char const *);
|
||||
extern void kwsincr (kwset_t, char const *, idx_t);
|
||||
extern idx_t kwswords (kwset_t) _GL_ATTRIBUTE_PURE;
|
||||
extern void kwsprep (kwset_t);
|
||||
extern ptrdiff_t kwsexec (kwset_t, char const *, idx_t,
|
||||
struct kwsmatch *, bool)
|
||||
_GL_ARG_NONNULL ((4));
|
||||
extern void kwsfree (kwset_t);
|
||||
107
src/pcresearch.c
107
src/pcresearch.c
@ -1,5 +1,5 @@
|
||||
/* pcresearch.c - searching subroutines using PCRE for grep.
|
||||
Copyright 2000, 2007, 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,13 +12,11 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
#include "die.h"
|
||||
|
||||
#include <stdckdint.h>
|
||||
@ -35,6 +33,18 @@
|
||||
# define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT
|
||||
# define pcre2_set_depth_limit pcre2_set_recursion_limit
|
||||
#endif
|
||||
#ifndef PCRE2_EXTRA_ASCII_BSD
|
||||
# define PCRE2_EXTRA_ASCII_BSD 0
|
||||
#endif
|
||||
|
||||
/* Use PCRE2_MATCH_INVALID_UTF if supported and not buggy;
|
||||
see <https://github.com/PCRE2Project/pcre2/issues/224>.
|
||||
Assume the bug will be fixed after PCRE2 10.42. */
|
||||
#if defined PCRE2_MATCH_INVALID_UTF && 10 < PCRE2_MAJOR + (42 < PCRE2_MINOR)
|
||||
enum { MATCH_INVALID_UTF = PCRE2_MATCH_INVALID_UTF };
|
||||
#else
|
||||
enum { MATCH_INVALID_UTF = 0 };
|
||||
#endif
|
||||
|
||||
struct pcre_comp
|
||||
{
|
||||
@ -71,6 +81,15 @@ private_free (void *ptr, _GL_UNUSED void *unused)
|
||||
free (ptr);
|
||||
}
|
||||
|
||||
void
|
||||
Pprint_version (void)
|
||||
{
|
||||
char *buf = ximalloc (pcre2_config (PCRE2_CONFIG_VERSION, nullptr));
|
||||
pcre2_config (PCRE2_CONFIG_VERSION, buf);
|
||||
printf (_("\ngrep -P uses PCRE2 %s\n"), buf);
|
||||
free (buf);
|
||||
}
|
||||
|
||||
/* Match the already-compiled PCRE pattern against the data in SUBJECT,
|
||||
of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
|
||||
options OPTIONS.
|
||||
@ -101,7 +120,7 @@ jit_exec (struct pcre_comp *pc, char const *subject, idx_t search_bytes,
|
||||
xalloc_die ();
|
||||
if (!pc->mcontext)
|
||||
pc->mcontext = pcre2_match_context_create (pc->gcontext);
|
||||
pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack);
|
||||
pcre2_jit_stack_assign (pc->mcontext, nullptr, pc->jit_stack);
|
||||
}
|
||||
else if (e == PCRE2_ERROR_DEPTHLIMIT)
|
||||
{
|
||||
@ -118,16 +137,11 @@ jit_exec (struct pcre_comp *pc, char const *subject, idx_t search_bytes,
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if E is an error code for bad UTF-8, and if pcre2_match
|
||||
could return E because PCRE lacks PCRE2_MATCH_INVALID_UTF. */
|
||||
/* Return true if E is an error code for bad UTF-8. */
|
||||
static bool
|
||||
bad_utf8_from_pcre2 (int e)
|
||||
{
|
||||
#ifdef PCRE2_MATCH_INVALID_UTF
|
||||
return false;
|
||||
#else
|
||||
return PCRE2_ERROR_UTF8_ERR21 <= e && e <= PCRE2_ERROR_UTF8_ERR1;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Compile the -P style PATTERN, containing SIZE bytes that are
|
||||
@ -142,7 +156,7 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
char *patlim = pattern + size;
|
||||
struct pcre_comp *pc = ximalloc (sizeof *pc);
|
||||
pcre2_general_context *gcontext = pc->gcontext
|
||||
= pcre2_general_context_create (private_malloc, private_free, NULL);
|
||||
= pcre2_general_context_create (private_malloc, private_free, nullptr);
|
||||
pcre2_compile_context *ccontext = pcre2_compile_context_create (gcontext);
|
||||
|
||||
if (localeinfo.multibyte)
|
||||
@ -153,14 +167,23 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
_("-P supports only unibyte locales on this platform"));
|
||||
if (! localeinfo.using_utf8)
|
||||
die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
|
||||
flags |= (PCRE2_UTF | PCRE2_UCP);
|
||||
|
||||
flags |= PCRE2_UTF;
|
||||
|
||||
/* If supported, consider invalid UTF-8 as a barrier not an error. */
|
||||
flags |= MATCH_INVALID_UTF;
|
||||
|
||||
/* If PCRE2_EXTRA_ASCII_BSD is available, use PCRE2_UCP
|
||||
so that \d does not have the undesirable effect of matching
|
||||
non-ASCII digits. Otherwise (i.e., with PCRE2 10.42 and earlier),
|
||||
escapes like \w have only their ASCII interpretations,
|
||||
but that's better than the confusion that would ensue if \d
|
||||
matched non-ASCII digits. */
|
||||
flags |= PCRE2_EXTRA_ASCII_BSD ? PCRE2_UCP : 0;
|
||||
|
||||
#if 0
|
||||
/* Do not match individual code units but only UTF-8. */
|
||||
flags |= PCRE2_NEVER_BACKSLASH_C;
|
||||
#endif
|
||||
#ifdef PCRE2_MATCH_INVALID_UTF
|
||||
/* Consider invalid UTF-8 as a barrier, instead of error. */
|
||||
flags |= PCRE2_MATCH_INVALID_UTF;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -168,19 +191,23 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
if (rawmemchr (pattern, '\n') != patlim)
|
||||
die (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
|
||||
|
||||
void *re_storage = NULL;
|
||||
#ifdef PCRE2_EXTRA_MATCH_LINE
|
||||
uint32_t extra_options = (PCRE2_EXTRA_ASCII_BSD
|
||||
| (match_lines ? PCRE2_EXTRA_MATCH_LINE : 0));
|
||||
pcre2_set_compile_extra_options (ccontext, extra_options);
|
||||
#endif
|
||||
|
||||
void *re_storage = nullptr;
|
||||
if (match_lines)
|
||||
{
|
||||
#ifdef PCRE2_EXTRA_MATCH_LINE
|
||||
pcre2_set_compile_extra_options (ccontext, PCRE2_EXTRA_MATCH_LINE);
|
||||
#else
|
||||
static char const /* These sizes omit trailing NUL. */
|
||||
xprefix[4] = "^(?:", xsuffix[2] = ")$";
|
||||
idx_t re_size = size + sizeof xprefix + sizeof xsuffix;
|
||||
#ifndef PCRE2_EXTRA_MATCH_LINE
|
||||
static char const *const xprefix = "^(?:";
|
||||
static char const *const xsuffix = ")$";
|
||||
idx_t re_size = size + strlen (xprefix) + strlen (xsuffix);
|
||||
char *re = re_storage = ximalloc (re_size);
|
||||
char *rez = mempcpy (re, xprefix, sizeof xprefix);
|
||||
char *rez = mempcpy (re, xprefix, strlen (xprefix));
|
||||
rez = mempcpy (rez, pattern, size);
|
||||
memcpy (rez, xsuffix, sizeof xsuffix);
|
||||
memcpy (rez, xsuffix, strlen (xsuffix));
|
||||
pattern = re;
|
||||
size = re_size;
|
||||
#endif
|
||||
@ -189,18 +216,20 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
{
|
||||
/* PCRE2_EXTRA_MATCH_WORD is incompatible with grep -w;
|
||||
do things the grep way. */
|
||||
static char const /* These sizes omit trailing NUL. */
|
||||
wprefix[10] = "(?<!\\w)(?:", wsuffix[7] = ")(?!\\w)";
|
||||
idx_t re_size = size + sizeof wprefix + sizeof wsuffix;
|
||||
static char const *const wprefix = "(?<!\\w)(?:";
|
||||
static char const *const wsuffix = ")(?!\\w)";
|
||||
idx_t re_size = size + strlen (wprefix) + strlen (wsuffix);
|
||||
char *re = re_storage = ximalloc (re_size);
|
||||
char *rez = mempcpy (re, wprefix, sizeof wprefix);
|
||||
char *rez = mempcpy (re, wprefix, strlen (wprefix));
|
||||
rez = mempcpy (rez, pattern, size);
|
||||
memcpy (rez, wsuffix, sizeof wsuffix);
|
||||
memcpy (rez, wsuffix, strlen (wsuffix));
|
||||
pattern = re;
|
||||
size = re_size;
|
||||
}
|
||||
|
||||
pcre2_set_character_tables (ccontext, pcre2_maketables (gcontext));
|
||||
if (!localeinfo.multibyte)
|
||||
pcre2_set_character_tables (ccontext, pcre2_maketables (gcontext));
|
||||
|
||||
pc->cre = pcre2_compile ((PCRE2_SPTR) pattern, size, flags,
|
||||
&ec, &e, ccontext);
|
||||
if (!pc->cre)
|
||||
@ -214,15 +243,15 @@ Pcompile (char *pattern, idx_t size, reg_syntax_t ignored, bool exact)
|
||||
free (re_storage);
|
||||
pcre2_compile_context_free (ccontext);
|
||||
|
||||
pc->mcontext = NULL;
|
||||
pc->mcontext = nullptr;
|
||||
pc->data = pcre2_match_data_create_from_pattern (pc->cre, gcontext);
|
||||
|
||||
ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
|
||||
if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY)
|
||||
die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec);
|
||||
/* Ignore any failure return from pcre2_jit_compile, as that merely
|
||||
means JIT won't be used during matching. */
|
||||
pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
|
||||
|
||||
/* The PCRE documentation says that a 32 KiB stack is the default. */
|
||||
pc->jit_stack = NULL;
|
||||
pc->jit_stack = nullptr;
|
||||
pc->jit_stack_size = 32 << 10;
|
||||
|
||||
pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL);
|
||||
@ -287,7 +316,7 @@ Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
|
||||
|
||||
e = jit_exec (pc, subject, line_end - subject,
|
||||
search_offset, options);
|
||||
if (!bad_utf8_from_pcre2 (e))
|
||||
if (MATCH_INVALID_UTF || !bad_utf8_from_pcre2 (e))
|
||||
break;
|
||||
|
||||
idx_t valid_bytes = pcre2_get_startchar (pc->data);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* search.c - searching subroutines using dfa, kwset and regex for grep.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GREP_SEARCH_H
|
||||
#define GREP_SEARCH_H 1
|
||||
@ -24,7 +22,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <stdint.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "system.h"
|
||||
@ -65,6 +62,7 @@ extern ptrdiff_t Fexecute (void *, char const *, idx_t, idx_t *, char const *);
|
||||
/* pcresearch.c */
|
||||
extern void *Pcompile (char *, idx_t, reg_syntax_t, bool);
|
||||
extern ptrdiff_t Pexecute (void *, char const *, idx_t, idx_t *, char const *);
|
||||
extern void Pprint_version (void);
|
||||
|
||||
/* grep.c */
|
||||
extern struct localeinfo localeinfo;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* searchutils.c - helper subroutines for grep's matchers.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright 1992, 1998, 2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,15 +12,15 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#define SEARCH_INLINE _GL_EXTERN_INLINE
|
||||
#define SYSTEM_INLINE _GL_EXTERN_INLINE
|
||||
#include "search.h"
|
||||
#include <search.h>
|
||||
|
||||
#include <uchar.h>
|
||||
|
||||
/* For each byte B, sbwordchar[B] is true if B is a single-byte
|
||||
character that is a word constituent, and is false otherwise. */
|
||||
@ -30,7 +30,7 @@ static bool sbwordchar[NCHAR];
|
||||
static bool
|
||||
wordchar (wint_t wc)
|
||||
{
|
||||
return wc == L'_' || iswalnum (wc);
|
||||
return wc == L'_' || c32isalnum (wc);
|
||||
}
|
||||
|
||||
void
|
||||
@ -43,7 +43,7 @@ wordinit (void)
|
||||
kwset_t
|
||||
kwsinit (bool mb_trans)
|
||||
{
|
||||
char *trans = NULL;
|
||||
char *trans = nullptr;
|
||||
|
||||
if (match_icase && (MB_CUR_MAX == 1 || mb_trans))
|
||||
{
|
||||
@ -113,7 +113,7 @@ mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
|
||||
|
||||
if (long_enough)
|
||||
{
|
||||
mbstate_t mbs = { 0 };
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
ptrdiff_t clen = imbrlen (cur - i, end - (cur - i), &mbs);
|
||||
if (0 <= clen)
|
||||
{
|
||||
@ -129,7 +129,7 @@ mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
|
||||
{
|
||||
/* In non-UTF-8 encodings, to find character boundaries one must
|
||||
in general scan forward from the start of the buffer. */
|
||||
mbstate_t mbs = { 0 };
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
ptrdiff_t clen;
|
||||
|
||||
do
|
||||
@ -162,27 +162,27 @@ mb_goback (char const **mb_start, idx_t *mbclen, char const *cur,
|
||||
static idx_t
|
||||
wordchars_count (char const *buf, char const *end, bool countall)
|
||||
{
|
||||
idx_t n = 0;
|
||||
mbstate_t mbs = { 0 };
|
||||
while (n < end - buf)
|
||||
mbstate_t mbs; mbszero (&mbs);
|
||||
char const *p = buf;
|
||||
while (p < end)
|
||||
{
|
||||
unsigned char b = buf[n];
|
||||
unsigned char b = *p;
|
||||
if (sbwordchar[b])
|
||||
n++;
|
||||
p++;
|
||||
else if (localeinfo.sbclen[b] != -2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
wchar_t wc = 0;
|
||||
size_t wcbytes = mbrtowc (&wc, buf + n, end - buf - n, &mbs);
|
||||
char32_t wc = 0;
|
||||
size_t wcbytes = mbrtoc32 (&wc, p, end - p, &mbs);
|
||||
if (!wordchar (wc))
|
||||
break;
|
||||
n += wcbytes + !wcbytes;
|
||||
p += wcbytes + !wcbytes;
|
||||
}
|
||||
if (!countall)
|
||||
break;
|
||||
}
|
||||
return n;
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
/* Examine the start of BUF for the longest prefix containing just
|
||||
@ -214,6 +214,6 @@ wordchar_prev (char const *buf, char const *cur, char const *end)
|
||||
if (! localeinfo.multibyte || localeinfo.using_utf8 & ~(b >> 7))
|
||||
return sbwordchar[b];
|
||||
char const *p = buf;
|
||||
cur -= mb_goback (&p, NULL, cur, end);
|
||||
cur -= mb_goback (&p, nullptr, cur, end);
|
||||
return wordchar_next (cur, end);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Portability cruft. Include after config.h and sys/types.h.
|
||||
Copyright 1996, 1998-2000, 2007, 2009-2023 Free Software Foundation, Inc.
|
||||
Copyright 1996, 1998-2000, 2007, 2009-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GREP_SYSTEM_H
|
||||
#define GREP_SYSTEM_H 1
|
||||
|
||||
15
tests/100k-entries
Executable file
15
tests/100k-entries
Executable file
@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
# This would make grep-3.11 fail with ENOTSUP and exit 2.
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
expensive_
|
||||
|
||||
fail=0
|
||||
|
||||
mkdir t || framework_failure_
|
||||
(cd t && seq 100000|xargs touch) || framework_failure_
|
||||
|
||||
returns_ 1 grep -r x t > out 2> err
|
||||
compare /dev/null out || fail=1
|
||||
compare /dev/null err || fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -1,7 +1,7 @@
|
||||
package Coreutils;
|
||||
# This is a testing framework.
|
||||
|
||||
# Copyright (C) 1998-2015, 2017-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 1998-2015, 2017-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
package CuSkip;
|
||||
# Skip a test: emit diag to log and to stderr, and exit 77
|
||||
|
||||
# Copyright (C) 2011-2015, 2017-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2015, 2017-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
package CuTmpdir;
|
||||
# create, then chdir into a temporary sub-directory
|
||||
|
||||
# Copyright (C) 2007-2015, 2017-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2007-2015, 2017-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
## Process this file with automake to create Makefile.in
|
||||
# Copyright 1997-1998, 2005-2023 Free Software Foundation, Inc.
|
||||
# Copyright 1997-1998, 2005-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -41,18 +41,27 @@ AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
|
||||
|
||||
# Tell the linker to omit references to unused shared libraries.
|
||||
AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS)
|
||||
LDADD = ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a
|
||||
LDADD = ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a \
|
||||
$(HARD_LOCALE_LIB) $(LIBC32CONV) $(LIBCSTACK) \
|
||||
$(LIBSIGSEGV) $(LIBUNISTRING) $(MBRTOWC_LIB) $(SETLOCALE_NULL_LIB) \
|
||||
$(LIBTHREAD)
|
||||
|
||||
# The triple-backref test is expected to fail with both the system
|
||||
# matcher (i.e., with glibc) and with the included matcher.
|
||||
# Both matchers need to be fixed.
|
||||
# FIXME-2015: Remove this once the glibc and gnulib bugs are fixed.
|
||||
# FIXME-2025: Remove this once the glibc and gnulib bugs are fixed.
|
||||
XFAIL_TESTS = triple-backref
|
||||
|
||||
# The glibc-infloop test is expected to fail with both the system
|
||||
# matcher (i.e., with glibc) and with the included matcher.
|
||||
# Both matchers need to be fixed.
|
||||
# FIXME-2025: Remove this once the glibc and gnulib bugs are fixed.
|
||||
XFAIL_TESTS += glibc-infloop
|
||||
|
||||
# Equivalence classes are only supported when using the system
|
||||
# matcher (which means only with glibc).
|
||||
# The included matcher needs to be fixed.
|
||||
# FIXME-2015: Remove this once the gnulib bug is fixed.
|
||||
# FIXME-2025: Remove this once the gnulib bug is fixed.
|
||||
if USE_INCLUDED_REGEX
|
||||
XFAIL_TESTS += equiv-classes
|
||||
else
|
||||
@ -62,6 +71,7 @@ else
|
||||
endif
|
||||
|
||||
TESTS = \
|
||||
100k-entries \
|
||||
backref \
|
||||
backref-alt \
|
||||
backref-multibyte-slow \
|
||||
@ -108,6 +118,7 @@ TESTS = \
|
||||
fillbuf-long-line \
|
||||
fmbtest \
|
||||
foad1 \
|
||||
glibc-infloop \
|
||||
grep-dev-null \
|
||||
grep-dev-null-out \
|
||||
grep-dir \
|
||||
@ -139,6 +150,7 @@ TESTS = \
|
||||
options \
|
||||
pcre \
|
||||
pcre-abort \
|
||||
pcre-ascii-digits \
|
||||
pcre-context \
|
||||
pcre-count \
|
||||
pcre-infloop \
|
||||
@ -147,6 +159,7 @@ TESTS = \
|
||||
pcre-jitstack \
|
||||
pcre-o \
|
||||
pcre-utf8 \
|
||||
pcre-utf8-bug224 \
|
||||
pcre-utf8-w \
|
||||
pcre-w \
|
||||
pcre-wx-backref \
|
||||
@ -177,11 +190,13 @@ TESTS = \
|
||||
unibyte-bracket-expr \
|
||||
unibyte-negated-circumflex \
|
||||
utf8-bracket \
|
||||
version-pcre \
|
||||
warn-char-classes \
|
||||
word-delim-multibyte \
|
||||
word-multi-file \
|
||||
word-multibyte \
|
||||
write-error-msg \
|
||||
y2038-vs-32-bit \
|
||||
yesno \
|
||||
z-anchor-newline
|
||||
|
||||
@ -252,6 +267,7 @@ TESTS_ENVIRONMENT = \
|
||||
srcdir='$(srcdir)' \
|
||||
top_srcdir='$(top_srcdir)' \
|
||||
CC='$(CC)' \
|
||||
CONFIG_HEADER='$(abs_top_builddir)/$(CONFIG_INCLUDE)' \
|
||||
GREP_TEST_NAME=`echo $$tst|sed 's,^\./,,;s,/,-,g'` \
|
||||
MAKE=$(MAKE) \
|
||||
MALLOC_PERTURB_=$(MALLOC_PERTURB_) \
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for back-references and other things.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for a bug in glibc's regex code as of 2015-09-19.
|
||||
#
|
||||
# Copyright 2015-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# This once failed to match: echo . | grep '\.'
|
||||
#
|
||||
# Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2020-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Ensure that \s and \S work with repetition operators.
|
||||
#
|
||||
# Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2013-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Ensure that neither \s nor \S matches an invalid multibyte character.
|
||||
#
|
||||
# Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2013-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for the "binary file ... matches" diagnostic.
|
||||
#
|
||||
# Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2020-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright 2016-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2016-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Check that case folding works even with titlecase and similarly odd chars.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Check that GREP_COLOR elicits a warning.
|
||||
|
||||
# Copyright 2022-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2022-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Test that newline is counted correctly even when the transition
|
||||
# table is rebuilt.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Exercise the final reachable code in dfa.c's match_mb_charset.
|
||||
|
||||
# Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2012-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Trigger a heap overrun in grep-2.6..grep-2.8.
|
||||
|
||||
# Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test whether "grep '.'" matches invalid UTF-8 byte sequences.
|
||||
#
|
||||
# Copyright 2019-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2019-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# test that the empty file means no pattern
|
||||
# and an empty pattern means match all.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Exercise bugs in grep-2.13 with -i, -n and an RE of ^$ in a multi-byte locale.
|
||||
#
|
||||
# Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2012-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test grep's behavior on encoding errors.
|
||||
#
|
||||
# Copyright 2015-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# -*- sh -*-
|
||||
# Check environment variables for sane values while testing.
|
||||
|
||||
# Copyright (C) 2000-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2000-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for false matches in grep 2.19..2.26 in multibyte, non-UTF8 locales
|
||||
#
|
||||
# Copyright (C) 2016-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2016-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# With multiple matches, grep -Fo could print a shorter one.
|
||||
# This bug affected grep versions 2.26 through 2.27.
|
||||
#
|
||||
# Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2017-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# grep -F -f pattern_file file
|
||||
# grep -G -f pattern_file file
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# file or line number from which the offending regular expression came.
|
||||
# With 2.26, now, each such diagnostic has a "FILENAME:LINENO: " prefix.
|
||||
|
||||
# Copyright (C) 2016-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2016-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#! /bin/sh
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -10,7 +10,7 @@
|
||||
cz=cs_CZ.UTF-8
|
||||
|
||||
# If cs_CZ.UTF-8 locale doesn't work, skip this test.
|
||||
LC_ALL=$cz locale -k LC_CTYPE 2>/dev/null | grep -q charmap.*UTF-8 \
|
||||
test "`LC_ALL=$cz locale charmap 2>/dev/null`" = UTF-8 \
|
||||
|| skip_ this system lacks the $cz locale
|
||||
|
||||
# If matching is done in single-byte mode, skip this test too
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test various combinations of command-line options.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
@ -150,7 +150,7 @@ Exit $failures
|
||||
# The rest of this file is meant to be executed under this locale.
|
||||
LC_ALL=cs_CZ.UTF-8; export LC_ALL
|
||||
# If the UTF-8 locale doesn't work, skip these tests silently.
|
||||
locale -k LC_CTYPE 2>/dev/null | grep -q "charmap.*UTF-8" || Exit $failures
|
||||
test "`locale charmap 2>/dev/null`" = UTF-8 || Exit $failures
|
||||
|
||||
# Test character class erroneously matching a '[' character.
|
||||
grep_test "[/" "" "[[:alpha:]]" -E
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/* Auxiliary program to detect support for a locale.
|
||||
Copyright 2010-2023 Free Software Foundation, Inc.
|
||||
Copyright 2010-2026 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -12,9 +12,7 @@
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
||||
02110-1301, USA. */
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
#include <locale.h>
|
||||
|
||||
30
tests/glibc-infloop
Executable file
30
tests/glibc-infloop
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
# This would infloop when using glibc's regex at least until glibc-2.36.
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
|
||||
require_timeout_
|
||||
require_en_utf8_locale_
|
||||
|
||||
fail=0
|
||||
|
||||
cat <<\EOF > glibc-check.c
|
||||
#include <features.h>
|
||||
#ifdef __GLIBC__
|
||||
int ok;
|
||||
#else
|
||||
# error "not glibc"
|
||||
#endif
|
||||
EOF
|
||||
$CC -c glibc-check.c && glibc=1 || glibc=0
|
||||
|
||||
grep '^#define USE_INCLUDED_REGEX 1' "$CONFIG_HEADER" \
|
||||
&& included_regex=1 || included_regex=0
|
||||
|
||||
case $glibc:$included_regex in
|
||||
0:0) skip_ 'runs only with glibc or when built with the included regex'
|
||||
esac
|
||||
|
||||
echo a > in || framework_failure_
|
||||
timeout 2 env LC_ALL=en_US.UTF-8 grep -E -w '((()|a)|())*' in || fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -2,7 +2,7 @@
|
||||
# Test for this performance regression:
|
||||
# grep-3.5 and 3.6 would take O(N^2) time for some sets of input regexps.
|
||||
|
||||
# Copyright 2020-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2020-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@ -34,6 +34,9 @@ while :; do
|
||||
small_ms=$(LC_ALL=C user_time_ 1 grep --file=in empty) || fail=1
|
||||
test $small_ms -ge 200 && break
|
||||
n_pat=$(expr $n_pat '*' 2)
|
||||
case $n_pat:$small_ms in
|
||||
640000:0) skip_ 'user_time_ appears always to report 0 elapsed ms';;
|
||||
esac
|
||||
done
|
||||
|
||||
# Now, search for those same digits mapped to A-J.
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Make sure all of these programs work properly
|
||||
# when invoked with --help or --version.
|
||||
|
||||
# Copyright (C) 2000-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2000-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Exercise high-bit-set unibyte-in-[...]-range bug.
|
||||
|
||||
# Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
706
tests/init.sh
706
tests/init.sh
@ -1,706 +0,0 @@
|
||||
# source this file; set up for tests
|
||||
|
||||
# Copyright (C) 2009-2023 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# Using this file in a test
|
||||
# =========================
|
||||
#
|
||||
# The typical skeleton of a test looks like this:
|
||||
#
|
||||
# #!/bin/sh
|
||||
# . "${srcdir=.}/init.sh"; path_prepend_ .
|
||||
# Execute some commands.
|
||||
# Note that these commands are executed in a subdirectory, therefore you
|
||||
# need to prepend "../" to relative filenames in the build directory.
|
||||
# Note that the "path_prepend_ ." is useful only if the body of your
|
||||
# test invokes programs residing in the initial directory.
|
||||
# For example, if the programs you want to test are in src/, and this test
|
||||
# script is named tests/test-1, then you would use "path_prepend_ ../src",
|
||||
# or perhaps export PATH='$(abs_top_builddir)/src$(PATH_SEPARATOR)'"$$PATH"
|
||||
# to all tests via automake's TESTS_ENVIRONMENT.
|
||||
# Set the exit code 0 for success, 77 for skipped, or 1 or other for failure.
|
||||
# Use the skip_ and fail_ functions to print a diagnostic and then exit
|
||||
# with the corresponding exit code.
|
||||
# Exit $?
|
||||
|
||||
# Executing a test that uses this file
|
||||
# ====================================
|
||||
#
|
||||
# Running a single test:
|
||||
# $ make check TESTS=test-foo.sh
|
||||
#
|
||||
# Running a single test, with verbose output:
|
||||
# $ make check TESTS=test-foo.sh VERBOSE=yes
|
||||
#
|
||||
# Running a single test, keeping the temporary directory:
|
||||
# $ make check TESTS=test-foo.sh KEEP=yes
|
||||
#
|
||||
# Running a single test, with single-stepping:
|
||||
# 1. Go into a sub-shell:
|
||||
# $ bash
|
||||
# 2. Set relevant environment variables from TESTS_ENVIRONMENT in the
|
||||
# Makefile:
|
||||
# $ export srcdir=../../tests # this is an example
|
||||
# 3. Execute the commands from the test, copy&pasting them one by one:
|
||||
# $ . "$srcdir/init.sh"; path_prepend_ .
|
||||
# ...
|
||||
# 4. Finally
|
||||
# $ exit
|
||||
|
||||
# =============================================================================
|
||||
# Elementary diagnostics
|
||||
|
||||
ME_=`expr "./$0" : '.*/\(.*\)$'`
|
||||
|
||||
# Prepare PATH_SEPARATOR.
|
||||
# The user is always right.
|
||||
if test "${PATH_SEPARATOR+set}" != set; then
|
||||
# Determine PATH_SEPARATOR by trying to find /bin/sh in a PATH which
|
||||
# contains only /bin. Note that ksh looks also at the FPATH variable,
|
||||
# so we have to set that as well for the test.
|
||||
PATH_SEPARATOR=:
|
||||
(PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 \
|
||||
&& { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 \
|
||||
|| PATH_SEPARATOR=';'
|
||||
}
|
||||
fi
|
||||
|
||||
# We use a trap below for cleanup. This requires us to go through
|
||||
# hoops to get the right exit status transported through the handler.
|
||||
# So use 'Exit STATUS' instead of 'exit STATUS' inside of the tests.
|
||||
# Turn off errexit here so that we don't trip the bug with OSF1/Tru64
|
||||
# sh inside this function.
|
||||
Exit () { set +e; (exit $1); exit $1; }
|
||||
|
||||
# Print warnings (e.g., about skipped and failed tests) to this file number.
|
||||
# Override by defining to say, 9, in init.cfg, and putting say,
|
||||
# export ...ENVVAR_SETTINGS...; $(SHELL) 9>&2
|
||||
# in the definition of TESTS_ENVIRONMENT in your tests/Makefile.am file.
|
||||
# This is useful when using automake's parallel tests mode, to print
|
||||
# the reason for skip/failure to console, rather than to the .log files.
|
||||
: ${stderr_fileno_=2}
|
||||
|
||||
# Note that correct expansion of "$*" depends on IFS starting with ' '.
|
||||
# Always write the full diagnostic to stderr.
|
||||
# When stderr_fileno_ is not 2, also emit the first line of the
|
||||
# diagnostic to that file descriptor.
|
||||
warn_ ()
|
||||
{
|
||||
# If IFS does not start with ' ', set it and emit the warning in a subshell.
|
||||
case $IFS in
|
||||
' '*) printf '%s\n' "$*" >&2
|
||||
test $stderr_fileno_ = 2 \
|
||||
|| { printf '%s\n' "$*" | sed 1q >&$stderr_fileno_ ; } ;;
|
||||
*) (IFS=' '; warn_ "$@");;
|
||||
esac
|
||||
}
|
||||
fail_ () { warn_ "$ME_: failed test: $@"; Exit 1; }
|
||||
skip_ () { warn_ "$ME_: skipped test: $@"; Exit 77; }
|
||||
fatal_ () { warn_ "$ME_: hard error: $@"; Exit 99; }
|
||||
framework_failure_ () { warn_ "$ME_: set-up failure: $@"; Exit 99; }
|
||||
|
||||
# =============================================================================
|
||||
# Ensure the shell supports modern syntax.
|
||||
|
||||
# Sanitize this shell to POSIX mode, if possible.
|
||||
DUALCASE=1; export DUALCASE
|
||||
if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
|
||||
emulate sh
|
||||
NULLCMD=:
|
||||
alias -g '${1+"$@"}'='"$@"'
|
||||
setopt NO_GLOB_SUBST
|
||||
else
|
||||
case `(set -o) 2>/dev/null` in
|
||||
*posix*) set -o posix ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# We require $(...) support unconditionally.
|
||||
# We require that the printf built-in work correctly regarding octal escapes;
|
||||
# this eliminates /bin/sh on AIX 7.2.
|
||||
# We require non-surprising "local" semantics (this eliminates dash).
|
||||
# This takes the admittedly draconian step of eliminating dash, because the
|
||||
# assignment tab=$(printf '\t') works fine, yet preceding it with "local "
|
||||
# transforms it into an assignment that sets the variable to the empty string.
|
||||
# That is too counter-intuitive, and can lead to subtle run-time malfunction.
|
||||
# The example below is less subtle in that with dash, it evokes the run-time
|
||||
# exception "dash: 1: local: 1: bad variable name".
|
||||
# We require a few additional shell features only when $EXEEXT is nonempty,
|
||||
# in order to support automatic $EXEEXT emulation:
|
||||
# - hyphen-containing alias names
|
||||
# - we prefer to use ${var#...} substitution, rather than having
|
||||
# to work around lack of support for that feature.
|
||||
# The following code attempts to find a shell with support for these features.
|
||||
# If the current shell passes the test, we're done. Otherwise, test other
|
||||
# shells until we find one that passes. If one is found, re-exec it.
|
||||
# If no acceptable shell is found, skip the current test.
|
||||
#
|
||||
# The "...set -x; P=1 true 2>err..." test is to disqualify any shell that
|
||||
# emits "P=1" into err, as /bin/sh from SunOS 5.11 and OpenBSD 4.7 do.
|
||||
#
|
||||
# Use "9" to indicate success (rather than 0), in case some shell acts
|
||||
# like Solaris 10's /bin/sh but exits successfully instead of with status 2.
|
||||
|
||||
# Eval this code in a subshell to determine a shell's suitability.
|
||||
# 10 - passes all tests; ok to use
|
||||
# 9 - ok, but enabling "set -x" corrupts app stderr; prefer higher score
|
||||
# ? - not ok
|
||||
gl_shell_test_script_='
|
||||
test $(echo y) = y || exit 1
|
||||
LC_ALL=en_US.UTF-8 printf "\\351" 2>/dev/null \
|
||||
| LC_ALL=C tr "\\351" x | LC_ALL=C grep "^x$" > /dev/null \
|
||||
|| exit 1
|
||||
printf "\\351" 2>/dev/null \
|
||||
| LC_ALL=C tr "\\351" x | LC_ALL=C grep "^x$" > /dev/null \
|
||||
|| exit 1
|
||||
f_local_() { local v=1; }; f_local_ || exit 1
|
||||
f_dash_local_fail_() { local t=$(printf " 1"); }; f_dash_local_fail_
|
||||
score_=10
|
||||
if test "$VERBOSE" = yes; then
|
||||
test -n "$( (exec 3>&1; set -x; P=1 true 2>&3) 2> /dev/null)" && score_=9
|
||||
fi
|
||||
test -z "$EXEEXT" && exit $score_
|
||||
shopt -s expand_aliases
|
||||
alias a-b="echo zoo"
|
||||
v=abx
|
||||
test ${v%x} = ab \
|
||||
&& test ${v#a} = bx \
|
||||
&& test $(a-b) = zoo \
|
||||
&& exit $score_
|
||||
'
|
||||
|
||||
if test "x$1" = "x--no-reexec"; then
|
||||
shift
|
||||
else
|
||||
# Assume a working shell. Export to subshells (setup_ needs this).
|
||||
gl_set_x_corrupts_stderr_=false
|
||||
export gl_set_x_corrupts_stderr_
|
||||
|
||||
# Record the first marginally acceptable shell.
|
||||
marginal_=
|
||||
|
||||
# Search for a shell that meets our requirements.
|
||||
for re_shell_ in __current__ "${CONFIG_SHELL:-no_shell}" \
|
||||
/bin/sh bash dash zsh pdksh fail
|
||||
do
|
||||
test "$re_shell_" = no_shell && continue
|
||||
|
||||
# If we've made it all the way to the sentinel, "fail" without
|
||||
# finding even a marginal shell, skip this test.
|
||||
if test "$re_shell_" = fail; then
|
||||
test -z "$marginal_" && skip_ failed to find an adequate shell
|
||||
re_shell_=$marginal_
|
||||
break
|
||||
fi
|
||||
|
||||
# When testing the current shell, simply "eval" the test code.
|
||||
# Otherwise, run it via $re_shell_ -c ...
|
||||
if test "$re_shell_" = __current__; then
|
||||
# 'eval'ing this code makes Solaris 10's /bin/sh exit with
|
||||
# $? set to 2. It does not evaluate any of the code after the
|
||||
# "unexpected" first '('. Thus, we must run it in a subshell.
|
||||
( eval "$gl_shell_test_script_" ) > /dev/null 2>&1
|
||||
else
|
||||
"$re_shell_" -c "$gl_shell_test_script_" 2>/dev/null
|
||||
fi
|
||||
|
||||
st_=$?
|
||||
|
||||
# $re_shell_ works just fine. Use it.
|
||||
if test $st_ = 10; then
|
||||
gl_set_x_corrupts_stderr_=false
|
||||
break
|
||||
fi
|
||||
|
||||
# If this is our first marginally acceptable shell, remember it.
|
||||
if test "$st_:$marginal_" = 9: ; then
|
||||
marginal_="$re_shell_"
|
||||
gl_set_x_corrupts_stderr_=true
|
||||
fi
|
||||
done
|
||||
|
||||
if test "$re_shell_" != __current__; then
|
||||
# Found a usable shell. Preserve -v and -x.
|
||||
case $- in
|
||||
*v*x* | *x*v*) opts_=-vx ;;
|
||||
*v*) opts_=-v ;;
|
||||
*x*) opts_=-x ;;
|
||||
*) opts_= ;;
|
||||
esac
|
||||
re_shell=$re_shell_
|
||||
export re_shell
|
||||
exec "$re_shell_" $opts_ "$0" --no-reexec "$@"
|
||||
echo "$ME_: exec failed" 1>&2
|
||||
exit 127
|
||||
fi
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Ensure the shell behaves reasonably.
|
||||
|
||||
# If this is bash, turn off all aliases.
|
||||
test -n "$BASH_VERSION" && unalias -a
|
||||
|
||||
# Note that when supporting $EXEEXT (transparently mapping from PROG_NAME to
|
||||
# PROG_NAME.exe), we want to support hyphen-containing names like test-acos.
|
||||
# That is part of the shell-selection test above. Why use aliases rather
|
||||
# than functions? Because support for hyphen-containing aliases is more
|
||||
# widespread than that for hyphen-containing function names.
|
||||
test -n "$EXEEXT" && test -n "$BASH_VERSION" && shopt -s expand_aliases
|
||||
|
||||
# =============================================================================
|
||||
# Creating a temporary directory (needed by the core test framework)
|
||||
|
||||
# Create a temporary directory, much like mktemp -d does.
|
||||
# Written by Jim Meyering.
|
||||
#
|
||||
# Usage: mktempd_ /tmp phoey.XXXXXXXXXX
|
||||
#
|
||||
# First, try to use the mktemp program.
|
||||
# Failing that, we'll roll our own mktemp-like function:
|
||||
# - try to get random bytes from /dev/urandom, mapping them to file-name bytes
|
||||
# - failing that, generate output from a combination of quickly-varying
|
||||
# sources and awk.
|
||||
# - try to create the desired directory.
|
||||
# - make only $MAX_TRIES_ attempts
|
||||
|
||||
# Helper function. Print $N pseudo-random bytes from a-zA-Z0-9.
|
||||
rand_bytes_ ()
|
||||
{
|
||||
n_=$1
|
||||
|
||||
# Maybe try openssl rand -base64 $n_prime_|tr '+/=\012' abcd first?
|
||||
# But if they have openssl, they probably have mktemp, too.
|
||||
|
||||
chars_=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
|
||||
dev_rand_=/dev/urandom
|
||||
if test -r "$dev_rand_"; then
|
||||
# Note: 256-length($chars_) == 194; 3 copies of $chars_ is 186 + 8 = 194.
|
||||
dd ibs=$n_ count=1 if=$dev_rand_ 2>/dev/null \
|
||||
| LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_
|
||||
return
|
||||
fi
|
||||
|
||||
# Fall back on quickly-varying sources + awk.
|
||||
# Limit awk program to 7th Edition Unix so that it works even on Solaris 10.
|
||||
|
||||
(date; date +%N; free; who -a; w; ps auxww; ps -ef) 2>&1 | awk '
|
||||
BEGIN {
|
||||
n = '"$n_"'
|
||||
for (i = 0; i < 256; i++)
|
||||
ordinal[sprintf ("%c", i)] = i
|
||||
}
|
||||
{
|
||||
for (i = 1; i <= length; i++)
|
||||
a[ai++ % n] += ordinal[substr ($0, i, 1)]
|
||||
}
|
||||
END {
|
||||
chars = "'"$chars_"'"
|
||||
charslen = length (chars)
|
||||
for (i = 0; i < n; i++)
|
||||
printf "%s", substr (chars, a[i] % charslen + 1, 1)
|
||||
printf "\n"
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
mktempd_ ()
|
||||
{
|
||||
case $# in
|
||||
2);;
|
||||
*) fail_ "Usage: mktempd_ DIR TEMPLATE";;
|
||||
esac
|
||||
|
||||
destdir_=$1
|
||||
template_=$2
|
||||
|
||||
MAX_TRIES_=4
|
||||
|
||||
# Disallow any trailing slash on specified destdir:
|
||||
# it would subvert the post-mktemp "case"-based destdir test.
|
||||
case $destdir_ in
|
||||
/ | //) destdir_slash_=$destdir;;
|
||||
*/) fail_ "invalid destination dir: remove trailing slash(es)";;
|
||||
*) destdir_slash_=$destdir_/;;
|
||||
esac
|
||||
|
||||
case $template_ in
|
||||
*XXXX) ;;
|
||||
*) fail_ \
|
||||
"invalid template: $template_ (must have a suffix of at least 4 X's)";;
|
||||
esac
|
||||
|
||||
# First, try to use mktemp.
|
||||
d=`unset TMPDIR; { mktemp -d -t -p "$destdir_" "$template_"; } 2>/dev/null` &&
|
||||
|
||||
# The resulting name must be in the specified directory.
|
||||
case $d in "$destdir_slash_"*) :;; *) false;; esac &&
|
||||
|
||||
# It must have created the directory.
|
||||
test -d "$d" &&
|
||||
|
||||
# It must have 0700 permissions. Handle sticky "S" bits.
|
||||
perms=`ls -dgo "$d" 2>/dev/null` &&
|
||||
case $perms in drwx--[-S]---*) :;; *) false;; esac && {
|
||||
echo "$d"
|
||||
return
|
||||
}
|
||||
|
||||
# If we reach this point, we'll have to create a directory manually.
|
||||
|
||||
# Get a copy of the template without its suffix of X's.
|
||||
base_template_=`echo "$template_"|sed 's/XX*$//'`
|
||||
|
||||
# Calculate how many X's we've just removed.
|
||||
template_length_=`echo "$template_" | wc -c`
|
||||
nx_=`echo "$base_template_" | wc -c`
|
||||
nx_=`expr $template_length_ - $nx_`
|
||||
|
||||
err_=
|
||||
i_=1
|
||||
while :; do
|
||||
X_=`rand_bytes_ $nx_`
|
||||
candidate_dir_="$destdir_slash_$base_template_$X_"
|
||||
err_=`mkdir -m 0700 "$candidate_dir_" 2>&1` \
|
||||
&& { echo "$candidate_dir_"; return; }
|
||||
test $MAX_TRIES_ -le $i_ && break;
|
||||
i_=`expr $i_ + 1`
|
||||
done
|
||||
fail_ "$err_"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Core test framework
|
||||
|
||||
# An arbitrary prefix to help distinguish test directories.
|
||||
testdir_prefix_ () { printf gt; }
|
||||
|
||||
# Set up the environment for the test to run in.
|
||||
setup_ ()
|
||||
{
|
||||
if test "$VERBOSE" = yes; then
|
||||
# Test whether set -x may cause the selected shell to corrupt an
|
||||
# application's stderr. Many do, including zsh-4.3.10 and the /bin/sh
|
||||
# from SunOS 5.11, OpenBSD 4.7 and Irix 6.5.
|
||||
# If enabling verbose output this way would cause trouble, simply
|
||||
# issue a warning and refrain.
|
||||
if $gl_set_x_corrupts_stderr_; then
|
||||
warn_ "using SHELL=$SHELL with 'set -x' corrupts stderr"
|
||||
else
|
||||
set -x
|
||||
fi
|
||||
fi
|
||||
|
||||
initial_cwd_=$PWD
|
||||
|
||||
# Create and enter the temporary directory.
|
||||
pfx_=`testdir_prefix_`
|
||||
test_dir_=`mktempd_ "$initial_cwd_" "$pfx_-$ME_.XXXX"` \
|
||||
|| fail_ "failed to create temporary directory in $initial_cwd_"
|
||||
cd "$test_dir_" || fail_ "failed to cd to temporary directory"
|
||||
# Set variables srcdir, builddir, for the convenience of the test.
|
||||
case $srcdir in
|
||||
/* | ?:*) ;;
|
||||
*) srcdir="../$srcdir" ;;
|
||||
esac
|
||||
builddir=".."
|
||||
export srcdir builddir
|
||||
|
||||
# As autoconf-generated configure scripts do, ensure that IFS
|
||||
# is defined initially, so that saving and restoring $IFS works.
|
||||
gl_init_sh_nl_='
|
||||
'
|
||||
IFS=" "" $gl_init_sh_nl_"
|
||||
|
||||
# This trap statement, along with a trap on 0 below, ensure that the
|
||||
# temporary directory, $test_dir_, is removed upon exit as well as
|
||||
# upon receipt of any of the listed signals.
|
||||
for sig_ in 1 2 3 13 15; do
|
||||
eval "trap 'Exit $(expr $sig_ + 128)' $sig_"
|
||||
done
|
||||
|
||||
# Remove relative and non-accessible directories from PATH, including '.'
|
||||
# and Zero-length entries.
|
||||
saved_IFS="$IFS"
|
||||
IFS=:
|
||||
new_PATH=
|
||||
sep_=
|
||||
for dir in $PATH; do
|
||||
case "$dir" in
|
||||
/*) test -d "$dir/." || continue
|
||||
new_PATH="${new_PATH}${sep_}${dir}"
|
||||
sep_=':';;
|
||||
esac
|
||||
done
|
||||
IFS="$saved_IFS"
|
||||
PATH="$new_PATH"
|
||||
export PATH
|
||||
}
|
||||
|
||||
# This is a stub function that is run upon trap (upon regular exit and
|
||||
# interrupt). Override it with a per-test function, e.g., to unmount
|
||||
# a partition, or to undo any other global state changes.
|
||||
cleanup_ () { :; }
|
||||
|
||||
# Run the user-overridable cleanup_ function, remove the temporary
|
||||
# directory and exit with the incoming value of $?.
|
||||
remove_tmp_ ()
|
||||
{
|
||||
__st=$?
|
||||
cleanup_
|
||||
if test "$KEEP" = yes; then
|
||||
echo "Not removing temporary directory $test_dir_"
|
||||
else
|
||||
# cd out of the directory we're about to remove
|
||||
cd "$initial_cwd_" || cd / || cd /tmp
|
||||
chmod -R u+rwx "$test_dir_"
|
||||
# If removal fails and exit status was to be 0, then change it to 1.
|
||||
rm -rf "$test_dir_" || { test $__st = 0 && __st=1; }
|
||||
fi
|
||||
exit $__st
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Prepending directories to PATH
|
||||
|
||||
# Given a directory name, DIR, if every entry in it that matches *.exe
|
||||
# contains only the specified bytes (see the case stmt below), then print
|
||||
# a space-separated list of those names and return 0. Otherwise, don't
|
||||
# print anything and return 1. Naming constraints apply also to DIR.
|
||||
find_exe_basenames_ ()
|
||||
{
|
||||
feb_dir_=$1
|
||||
feb_fail_=0
|
||||
feb_result_=
|
||||
feb_sp_=
|
||||
for feb_file_ in $feb_dir_/*.exe; do
|
||||
# If there was no *.exe file, or there existed a file named "*.exe" that
|
||||
# was deleted between the above glob expansion and the existence test
|
||||
# below, just skip it.
|
||||
test "x$feb_file_" = "x$feb_dir_/*.exe" && test ! -f "$feb_file_" \
|
||||
&& continue
|
||||
# Exempt [.exe, since we can't create a function by that name, yet
|
||||
# we can't invoke [ by PATH search anyways due to shell builtins.
|
||||
test "x$feb_file_" = "x$feb_dir_/[.exe" && continue
|
||||
case $feb_file_ in
|
||||
*[!-a-zA-Z/0-9_.+]*) feb_fail_=1; break;;
|
||||
*) # Remove leading file name components as well as the .exe suffix.
|
||||
feb_file_=${feb_file_##*/}
|
||||
feb_file_=${feb_file_%.exe}
|
||||
feb_result_="$feb_result_$feb_sp_$feb_file_";;
|
||||
esac
|
||||
feb_sp_=' '
|
||||
done
|
||||
test $feb_fail_ = 0 && printf %s "$feb_result_"
|
||||
return $feb_fail_
|
||||
}
|
||||
|
||||
# Consider the files in directory, $1.
|
||||
# For each file name of the form PROG.exe, create an alias named
|
||||
# PROG that simply invokes PROG.exe, then return 0. If any selected
|
||||
# file name or the directory name, $1, contains an unexpected character,
|
||||
# define no alias and return 1.
|
||||
create_exe_shims_ ()
|
||||
{
|
||||
case $EXEEXT in
|
||||
'') return 0 ;;
|
||||
.exe) ;;
|
||||
*) echo "$0: unexpected \$EXEEXT value: $EXEEXT" 1>&2; return 1 ;;
|
||||
esac
|
||||
|
||||
base_names_=`find_exe_basenames_ $1` \
|
||||
|| { echo "$0 (exe_shim): skipping directory: $1" 1>&2; return 0; }
|
||||
|
||||
if test -n "$base_names_"; then
|
||||
for base_ in $base_names_; do
|
||||
alias "$base_"="$base_$EXEEXT"
|
||||
done
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Use this function to prepend to PATH an absolute name for each
|
||||
# specified, possibly-$initial_cwd_-relative, directory.
|
||||
path_prepend_ ()
|
||||
{
|
||||
while test $# != 0; do
|
||||
path_dir_=$1
|
||||
case $path_dir_ in
|
||||
'') fail_ "invalid path dir: '$1'";;
|
||||
/* | ?:*) abs_path_dir_=$path_dir_;;
|
||||
*) abs_path_dir_=$initial_cwd_/$path_dir_;;
|
||||
esac
|
||||
case $abs_path_dir_ in
|
||||
*$PATH_SEPARATOR*) fail_ "invalid path dir: '$abs_path_dir_'";;
|
||||
esac
|
||||
PATH="$abs_path_dir_$PATH_SEPARATOR$PATH"
|
||||
|
||||
# Create an alias, FOO, for each FOO.exe in this directory.
|
||||
create_exe_shims_ "$abs_path_dir_" \
|
||||
|| fail_ "something failed (above): $abs_path_dir_"
|
||||
shift
|
||||
done
|
||||
export PATH
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Convenience environment variables for the tests
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# Enable glibc's malloc-perturbing option.
|
||||
# This is useful for exposing code that depends on the fact that
|
||||
# malloc-related functions often return memory that is mostly zeroed.
|
||||
# If you have the time and cycles, use valgrind to do an even better job.
|
||||
: ${MALLOC_PERTURB_=87}
|
||||
export MALLOC_PERTURB_
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# The interpreter for Bourne-shell scripts.
|
||||
# No special standards compatibility requirements.
|
||||
# Some environments, such as Android, don't have /bin/sh.
|
||||
if test -f /bin/sh$EXEEXT; then
|
||||
BOURNE_SHELL=/bin/sh
|
||||
else
|
||||
BOURNE_SHELL=sh
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# Convenience functions for the tests
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Return value checking
|
||||
|
||||
# This is used to simplify checking of the return value
|
||||
# which is useful when ensuring a command fails as desired.
|
||||
# I.e., just doing `command ... &&fail=1` will not catch
|
||||
# a segfault in command for example. With this helper you
|
||||
# instead check an explicit exit code like
|
||||
# returns_ 1 command ... || fail
|
||||
returns_ () {
|
||||
# Disable tracing so it doesn't interfere with stderr of the wrapped command
|
||||
{ set +x; } 2>/dev/null
|
||||
|
||||
local exp_exit="$1"
|
||||
shift
|
||||
"$@"
|
||||
test $? -eq $exp_exit && ret_=0 || ret_=1
|
||||
|
||||
if test "$VERBOSE" = yes && test "$gl_set_x_corrupts_stderr_" = false; then
|
||||
set -x
|
||||
fi
|
||||
{ return $ret_; } 2>/dev/null
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Text file comparison
|
||||
|
||||
# Emit a header similar to that from diff -u; Print the simulated "diff"
|
||||
# command so that the order of arguments is clear. Don't bother with @@ lines.
|
||||
emit_diff_u_header_ ()
|
||||
{
|
||||
printf '%s\n' "diff -u $*" \
|
||||
"--- $1 1970-01-01" \
|
||||
"+++ $2 1970-01-01"
|
||||
}
|
||||
|
||||
# Arrange not to let diff or cmp operate on /dev/null,
|
||||
# since on some systems (at least OSF/1 5.1), that doesn't work.
|
||||
# When there are not two arguments, or no argument is /dev/null, return 2.
|
||||
# When one argument is /dev/null and the other is not empty,
|
||||
# cat the nonempty file to stderr and return 1.
|
||||
# Otherwise, return 0.
|
||||
compare_dev_null_ ()
|
||||
{
|
||||
test $# = 2 || return 2
|
||||
|
||||
if test "x$1" = x/dev/null; then
|
||||
test -s "$2" || return 0
|
||||
emit_diff_u_header_ "$@"; sed 's/^/+/' "$2"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if test "x$2" = x/dev/null; then
|
||||
test -s "$1" || return 0
|
||||
emit_diff_u_header_ "$@"; sed 's/^/-/' "$1"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 2
|
||||
}
|
||||
|
||||
for diff_opt_ in -u -U3 -c '' no; do
|
||||
test "$diff_opt_" != no &&
|
||||
diff_out_=`exec 2>/dev/null
|
||||
LC_ALL=C diff $diff_opt_ "$0" "$0" < /dev/null` &&
|
||||
break
|
||||
done
|
||||
if test "$diff_opt_" != no; then
|
||||
if test -z "$diff_out_"; then
|
||||
compare_ () { LC_ALL=C diff $diff_opt_ "$@"; }
|
||||
else
|
||||
compare_ ()
|
||||
{
|
||||
# If no differences were found, AIX and HP-UX 'diff' produce output
|
||||
# like "No differences encountered". Hide this output.
|
||||
LC_ALL=C diff $diff_opt_ "$@" > diff.out
|
||||
diff_status_=$?
|
||||
test $diff_status_ -eq 0 || cat diff.out || diff_status_=2
|
||||
rm -f diff.out || diff_status_=2
|
||||
return $diff_status_
|
||||
}
|
||||
fi
|
||||
elif cmp -s /dev/null /dev/null 2>/dev/null; then
|
||||
compare_ () { cmp -s "$@"; }
|
||||
else
|
||||
compare_ () { cmp "$@"; }
|
||||
fi
|
||||
|
||||
# Usage: compare EXPECTED ACTUAL
|
||||
#
|
||||
# Given compare_dev_null_'s preprocessing, defer to compare_ if 2 or more.
|
||||
# Otherwise, propagate $? to caller: any diffs have already been printed.
|
||||
compare ()
|
||||
{
|
||||
# This looks like it can be factored to use a simple "case $?"
|
||||
# after unchecked compare_dev_null_ invocation, but that would
|
||||
# fail in a "set -e" environment.
|
||||
if compare_dev_null_ "$@"; then
|
||||
return 0
|
||||
else
|
||||
case $? in
|
||||
1) return 1;;
|
||||
*) compare_ "$@";;
|
||||
esac
|
||||
fi
|
||||
}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# If you want to override the testdir_prefix_ function,
|
||||
# or to add more utility functions, use this file.
|
||||
test -f "$srcdir/init.cfg" \
|
||||
&& . "$srcdir/init.cfg"
|
||||
|
||||
# =============================================================================
|
||||
# Set up the environment for the test to run in.
|
||||
|
||||
setup_ "$@"
|
||||
# This trap is here, rather than in the setup_ function, because some
|
||||
# shells run the exit trap at shell function exit, rather than script exit.
|
||||
trap remove_tmp_ EXIT
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Exercise -T.
|
||||
|
||||
# Copyright 2016-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2016-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Evoke a segfault in a hard-to-reach code path of kwset.c.
|
||||
# This bug affected grep versions 2.19 through 2.21.
|
||||
#
|
||||
# Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# grep-2.21 would incur a 100x penalty for 10x increase in regexp length
|
||||
|
||||
# Copyright 2015-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2015-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# grep-3.4 would require O(N^2) RSS for N regexps
|
||||
# grep-3.5 requires O(N) in the most common cases.
|
||||
|
||||
# Copyright 2020-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2020-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# grep -F -x -o PAT print an extra newline for each match.
|
||||
# This would fail for grep-2.19 and grep-2.20.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -12,4 +12,20 @@ echo x > exp || framework_failure_
|
||||
yes x | timeout 10 grep -m1 x > out || fail=1
|
||||
compare exp out || fail=1
|
||||
|
||||
# Make sure -m2 stops reading even when output is /dev/null.
|
||||
# In grep 3.11, it would continue reading.
|
||||
printf 'x\nx\nx\n' >in || framework_failure
|
||||
(grep -m2 x >/dev/null && head -n1) <in >out || fail=1
|
||||
compare exp out || fail=1
|
||||
|
||||
# The following two tests would fail before v3.11-70
|
||||
echo x > in || framework_failure_
|
||||
echo in > exp || framework_failure_
|
||||
grep -l -m1 . in > out || fail=1
|
||||
compare exp out || fail=1
|
||||
|
||||
# Ensure that this prints nothing and exits successfully.
|
||||
grep -q -m1 . in > out || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
|
||||
Exit $fail
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Trigger a bug in the DFA matcher.
|
||||
# This would fail for grep-2.20.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# grep would sometimes read beyond end of input, when using a non-UTF8
|
||||
# multibyte locale.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Test for a performance regression with -Fw and a non-UTF8 multibyte locale.
|
||||
|
||||
# Copyright 2019-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2019-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# The 200x is on an Intel i7-based system.
|
||||
# On an AMD FX-4100, it would take up to 2500x longer.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# grep -Fw could false-match when using a non-UTF8 multibyte locale.
|
||||
|
||||
# Copyright 2019-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2019-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test whether \s matches SP and UTF-8 multi-byte white space characters.
|
||||
#
|
||||
# Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2013-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Test a pattern of multiple begin or end line constraints.
|
||||
# This would mistakenly print a line when using grep-2.19.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# Test NUL bytes in patterns and data.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Test for POSIX options for grep
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Simple PCRE tests.
|
||||
#
|
||||
# Copyright (C) 2001, 2006, 2009-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2001, 2006, 2009-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# Show that grep handles PCRE2_ERROR_MATCHLIMIT.
|
||||
# In grep-2.8, it would abort.
|
||||
#
|
||||
# Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2011-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
55
tests/pcre-ascii-digits
Executable file
55
tests/pcre-ascii-digits
Executable file
@ -0,0 +1,55 @@
|
||||
#!/bin/sh
|
||||
# Ensure that grep -P's \d matches only the 10 ASCII digits.
|
||||
# With, grep-3.9, \d would match e.g., the multibyte Arabic digits.
|
||||
# The same applied to \D.
|
||||
#
|
||||
# Copyright (C) 2023-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
# notice and this notice are preserved.
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
require_en_utf8_locale_
|
||||
LC_ALL=en_US.UTF-8
|
||||
export LC_ALL
|
||||
require_pcre_
|
||||
|
||||
echo . | grep -qP '(*UTF).' 2>/dev/null \
|
||||
|| skip_ 'PCRE unicode support is compiled out'
|
||||
echo 0 | grep -qP '(?aD)\d' \
|
||||
|| skip_ 'PCRE 10.42 and older lack (?aD)'
|
||||
|
||||
fail=0
|
||||
|
||||
# $ printf %s ٠١٢٣٤٥٦٧٨٩|od -An -to1 -w10 |sed 's/ /\\/g'; : arabic digits
|
||||
# \331\240\331\241\331\242\331\243\331\244
|
||||
# \331\245\331\246\331\247\331\250\331\251
|
||||
printf '\331\240\331\241\331\242\331\243\331\244' > in || framework_failure_
|
||||
printf '\331\245\331\246\331\247\331\250\331\251' >> in || framework_failure_
|
||||
printf '\n' >> in || framework_failure_
|
||||
|
||||
# Ensure that \d matches no Arabic-Indic digits.
|
||||
returns_ 1 grep -P '\d' in > out || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
|
||||
# Ensure that ^\D+$ matches all the Arabic-Indic digits.
|
||||
grep -P '^\D+$' in > out || fail=1
|
||||
compare in out || fail=1
|
||||
|
||||
# When built with PCRE2 10.43 and newer, one may use (?aD) and (?-aD)
|
||||
# to toggle between modes. (?aD) is the default (making \d == [0-9]).
|
||||
# (?-aD) relaxes \d, making it match "all" digits.
|
||||
# Use mixed digits as input: Arabic-Indic digit zero and ASCII 4.
|
||||
printf '\331\2404\n' > in2 || framework_failure_
|
||||
|
||||
returns_ 1 grep -P '\d\d' in2 > out || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
|
||||
grep -P '(?-aD)\d(?aD)\d' in2 > out || fail=1
|
||||
compare in2 out || fail=1
|
||||
|
||||
returns_ 1 grep -P '\d(?-aD)\d' in2 > out || fail=1
|
||||
compare /dev/null out || fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -2,7 +2,7 @@
|
||||
# grep -P / grep -Pc are inconsistent results
|
||||
# This bug affected grep versions 2.21 through 2.22.
|
||||
#
|
||||
# Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# With some versions of libpcre, apparently including 8.35,
|
||||
# the following would trigger an infinite loop in its match function.
|
||||
|
||||
# Copyright 2014-2023 Free Software Foundation, Inc.
|
||||
# Copyright 2014-2026 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#! /bin/sh
|
||||
# Ensure that grep -oaP doesn't infloop for invalid multi-byte input
|
||||
#
|
||||
# Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
# Copyright (C) 2015-2026 Free Software Foundation, Inc.
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification,
|
||||
# are permitted in any medium without royalty provided the copyright
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user