mirror of
https://https.git.savannah.gnu.org/git/findutils.git
synced 2026-01-26 15:39:06 +00:00
regexprops: don't mention regex dialects we're not going to document.
* lib/regextype.c (get_regex_type_synonym): don't return regex dialect Y as a synonym of dialect X, if we're not in fact going to include X. Accept a CONTEXT parameter in order to identify this situation. This ensures that the bug fixed in commit e2c673cbcdc325a3a2e9dd02169bb4a42c61bc48 stays fixed for any permutation of regex_map. * lib/regextype.h: update prototype of get_regex_type_synonym. * lib/regexprops.c (describe_all): Pass the new context parameter. * doc/regexprops.texi: regenerate this file.
This commit is contained in:
parent
e2c673cbcd
commit
1b53838ddf
@ -11,15 +11,15 @@
|
||||
|
||||
@menu
|
||||
* findutils-default regular expression syntax::
|
||||
* posix-awk regular expression syntax::
|
||||
* posix-basic regular expression syntax::
|
||||
* posix-egrep regular expression syntax::
|
||||
* posix-extended regular expression syntax::
|
||||
* awk regular expression syntax::
|
||||
* egrep regular expression syntax::
|
||||
* emacs regular expression syntax::
|
||||
* gnu-awk regular expression syntax::
|
||||
* grep regular expression syntax::
|
||||
* posix-awk regular expression syntax::
|
||||
* awk regular expression syntax::
|
||||
* posix-basic regular expression syntax::
|
||||
* posix-egrep regular expression syntax::
|
||||
* egrep regular expression syntax::
|
||||
* posix-extended regular expression syntax::
|
||||
@end menu
|
||||
|
||||
@node findutils-default regular expression syntax
|
||||
@ -113,334 +113,6 @@ The character @samp{$} only represents the end of a string when it appears:
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-awk regular expression syntax
|
||||
@subsection @samp{posix-awk} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{(}
|
||||
|
||||
@item After the alternation operator @samp{|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Intervals are specified by @samp{@{} and @samp{@}}.
|
||||
Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-basic regular expression syntax
|
||||
@subsection @samp{posix-basic} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item \+
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item \?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item + and ?
|
||||
match themselves.
|
||||
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are supported:
|
||||
@enumerate
|
||||
|
||||
@item @samp{\w} matches a character within a word
|
||||
|
||||
@item @samp{\W} matches a character which is not within a word
|
||||
|
||||
@item @samp{\<} matches the beginning of a word
|
||||
|
||||
@item @samp{\>} matches the end of a word
|
||||
|
||||
@item @samp{\b} matches a word boundary
|
||||
|
||||
@item @samp{\B} matches characters which are not a word boundary
|
||||
|
||||
@item @samp{\`} matches the beginning of the whole input
|
||||
|
||||
@item @samp{\'} matches the end of the whole input
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
|
||||
|
||||
The alternation operator is @samp{\|}.
|
||||
|
||||
The character @samp{^} only represents the beginning of a string when it appears:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{\(}
|
||||
|
||||
|
||||
@item After the alternation operator @samp{\|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
The character @samp{$} only represents the end of a string when it appears:
|
||||
@enumerate
|
||||
|
||||
@item At the end of a regular expression
|
||||
|
||||
@item Before a close-group, signified by @samp{\)}
|
||||
|
||||
@item Before the alternation operator @samp{\|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{\(}
|
||||
|
||||
@item After the alternation operator @samp{\|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Intervals are specified by @samp{\@{} and @samp{\@}}.
|
||||
Invalid intervals such as @samp{a\@{1z} are not accepted.
|
||||
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-egrep regular expression syntax
|
||||
@subsection @samp{posix-egrep} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are supported:
|
||||
@enumerate
|
||||
|
||||
@item @samp{\w} matches a character within a word
|
||||
|
||||
@item @samp{\W} matches a character which is not within a word
|
||||
|
||||
@item @samp{\<} matches the beginning of a word
|
||||
|
||||
@item @samp{\>} matches the end of a word
|
||||
|
||||
@item @samp{\b} matches a word boundary
|
||||
|
||||
@item @samp{\B} matches characters which are not a word boundary
|
||||
|
||||
@item @samp{\`} matches the beginning of the whole input
|
||||
|
||||
@item @samp{\'} matches the end of the whole input
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.
|
||||
|
||||
|
||||
Intervals are specified by @samp{@{} and @samp{@}}.
|
||||
Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-extended regular expression syntax
|
||||
@subsection @samp{posix-extended} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are supported:
|
||||
@enumerate
|
||||
|
||||
@item @samp{\w} matches a character within a word
|
||||
|
||||
@item @samp{\W} matches a character which is not within a word
|
||||
|
||||
@item @samp{\<} matches the beginning of a word
|
||||
|
||||
@item @samp{\>} matches the end of a word
|
||||
|
||||
@item @samp{\b} matches a word boundary
|
||||
|
||||
@item @samp{\B} matches characters which are not a word boundary
|
||||
|
||||
@item @samp{\`} matches the beginning of the whole input
|
||||
|
||||
@item @samp{\'} matches the end of the whole input
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{(}
|
||||
|
||||
@item After the alternation operator @samp{|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Intervals are specified by @samp{@{} and @samp{@}}.
|
||||
Invalid intervals such as @samp{a@{1z} are not accepted.
|
||||
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node awk regular expression syntax
|
||||
@subsection @samp{awk} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit matches that digit.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{(}
|
||||
|
||||
@item After the alternation operator @samp{|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node egrep regular expression syntax
|
||||
@subsection @samp{egrep} regular expression syntax
|
||||
This is a synonym for posix-egrep.
|
||||
@node emacs regular expression syntax
|
||||
@subsection @samp{emacs} regular expression syntax
|
||||
|
||||
@ -699,3 +371,331 @@ Invalid intervals such as @samp{a\@{1z} are not accepted.
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-awk regular expression syntax
|
||||
@subsection @samp{posix-awk} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{(}
|
||||
|
||||
@item After the alternation operator @samp{|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Intervals are specified by @samp{@{} and @samp{@}}.
|
||||
Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node awk regular expression syntax
|
||||
@subsection @samp{awk} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} can be used to quote the following character. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are not supported and so @samp{\w}, @samp{\W}, @samp{\<}, @samp{\>}, @samp{\b}, @samp{\B}, @samp{\`}, and @samp{\'} match @samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit matches that digit.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{(}
|
||||
|
||||
@item After the alternation operator @samp{|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-basic regular expression syntax
|
||||
@subsection @samp{posix-basic} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item \+
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item \?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item + and ?
|
||||
match themselves.
|
||||
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are supported:
|
||||
@enumerate
|
||||
|
||||
@item @samp{\w} matches a character within a word
|
||||
|
||||
@item @samp{\W} matches a character which is not within a word
|
||||
|
||||
@item @samp{\<} matches the beginning of a word
|
||||
|
||||
@item @samp{\>} matches the end of a word
|
||||
|
||||
@item @samp{\b} matches a word boundary
|
||||
|
||||
@item @samp{\B} matches characters which are not a word boundary
|
||||
|
||||
@item @samp{\`} matches the beginning of the whole input
|
||||
|
||||
@item @samp{\'} matches the end of the whole input
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Grouping is performed with backslashes followed by parentheses @samp{\(}, @samp{\)}. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{\(}.
|
||||
|
||||
The alternation operator is @samp{\|}.
|
||||
|
||||
The character @samp{^} only represents the beginning of a string when it appears:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{\(}
|
||||
|
||||
|
||||
@item After the alternation operator @samp{\|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
The character @samp{$} only represents the end of a string when it appears:
|
||||
@enumerate
|
||||
|
||||
@item At the end of a regular expression
|
||||
|
||||
@item Before a close-group, signified by @samp{\)}
|
||||
|
||||
@item Before the alternation operator @samp{\|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
@samp{\*}, @samp{\+} and @samp{\?} are special at any point in a regular expression except:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{\(}
|
||||
|
||||
@item After the alternation operator @samp{\|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Intervals are specified by @samp{\@{} and @samp{\@}}.
|
||||
Invalid intervals such as @samp{a\@{1z} are not accepted.
|
||||
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node posix-egrep regular expression syntax
|
||||
@subsection @samp{posix-egrep} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are supported:
|
||||
@enumerate
|
||||
|
||||
@item @samp{\w} matches a character within a word
|
||||
|
||||
@item @samp{\W} matches a character which is not within a word
|
||||
|
||||
@item @samp{\<} matches the beginning of a word
|
||||
|
||||
@item @samp{\>} matches the end of a word
|
||||
|
||||
@item @samp{\b} matches a word boundary
|
||||
|
||||
@item @samp{\B} matches characters which are not a word boundary
|
||||
|
||||
@item @samp{\`} matches the beginning of the whole input
|
||||
|
||||
@item @samp{\'} matches the end of the whole input
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.
|
||||
|
||||
|
||||
Intervals are specified by @samp{@{} and @samp{@}}.
|
||||
Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\@{1}
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@node egrep regular expression syntax
|
||||
@subsection @samp{egrep} regular expression syntax
|
||||
This is a synonym for posix-egrep.
|
||||
@node posix-extended regular expression syntax
|
||||
@subsection @samp{posix-extended} regular expression syntax
|
||||
|
||||
|
||||
The character @samp{.} matches any single character except the null character.
|
||||
|
||||
|
||||
@table @samp
|
||||
|
||||
@item +
|
||||
indicates that the regular expression should match one or more occurrences of the previous atom or regexp.
|
||||
@item ?
|
||||
indicates that the regular expression should match zero or one occurrence of the previous atom or regexp.
|
||||
@item \+
|
||||
matches a @samp{+}
|
||||
@item \?
|
||||
matches a @samp{?}.
|
||||
@end table
|
||||
|
||||
|
||||
Bracket expressions are used to match ranges of characters. Bracket expressions where the range is backward, for example @samp{[z-a]}, are invalid. Within square brackets, @samp{\} is taken literally. Character classes are supported; for example @samp{[[:digit:]]} will match a single decimal digit.
|
||||
|
||||
|
||||
GNU extensions are supported:
|
||||
@enumerate
|
||||
|
||||
@item @samp{\w} matches a character within a word
|
||||
|
||||
@item @samp{\W} matches a character which is not within a word
|
||||
|
||||
@item @samp{\<} matches the beginning of a word
|
||||
|
||||
@item @samp{\>} matches the end of a word
|
||||
|
||||
@item @samp{\b} matches a word boundary
|
||||
|
||||
@item @samp{\B} matches characters which are not a word boundary
|
||||
|
||||
@item @samp{\`} matches the beginning of the whole input
|
||||
|
||||
@item @samp{\'} matches the end of the whole input
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Grouping is performed with parentheses @samp{()}. An unmatched @samp{)} matches just itself. A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis @samp{(}.
|
||||
|
||||
The alternation operator is @samp{|}.
|
||||
|
||||
The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified.
|
||||
|
||||
|
||||
@samp{*}, @samp{+} and @samp{?} are special at any point in a regular expression except the following places, where they are not allowed:
|
||||
@enumerate
|
||||
|
||||
@item At the beginning of a regular expression
|
||||
|
||||
@item After an open-group, signified by @samp{(}
|
||||
|
||||
@item After the alternation operator @samp{|}
|
||||
|
||||
@end enumerate
|
||||
|
||||
|
||||
Intervals are specified by @samp{@{} and @samp{@}}.
|
||||
Invalid intervals such as @samp{a@{1z} are not accepted.
|
||||
|
||||
|
||||
The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.
|
||||
|
||||
|
||||
@ -558,7 +558,7 @@ describe_all (const char *contextname,
|
||||
if (NULL == next)
|
||||
next = "";
|
||||
begin_subsection (name, next, previous, up);
|
||||
parent = get_regex_type_synonym (i);
|
||||
parent = get_regex_type_synonym (i, context);
|
||||
if (parent >= 0)
|
||||
{
|
||||
content ("This is a synonym for ");
|
||||
|
||||
@ -56,19 +56,17 @@ struct tagRegexTypeMap
|
||||
struct tagRegexTypeMap regex_map[] =
|
||||
{
|
||||
{ "findutils-default", CONTEXT_FINDUTILS, RE_SYNTAX_EMACS|RE_DOT_NEWLINE },
|
||||
|
||||
{ "posix-awk", CONTEXT_ALL, RE_SYNTAX_POSIX_AWK },
|
||||
{ "posix-basic", CONTEXT_ALL, RE_SYNTAX_POSIX_BASIC },
|
||||
{ "posix-egrep", CONTEXT_ALL, RE_SYNTAX_POSIX_EGREP },
|
||||
{ "posix-extended", CONTEXT_ALL, RE_SYNTAX_POSIX_EXTENDED },
|
||||
{ "posix-minimal-basic", CONTEXT_GENERIC, RE_SYNTAX_POSIX_MINIMAL_BASIC },
|
||||
|
||||
{ "awk", CONTEXT_ALL, RE_SYNTAX_AWK },
|
||||
{ "ed", CONTEXT_GENERIC, RE_SYNTAX_ED },
|
||||
{ "egrep", CONTEXT_ALL, RE_SYNTAX_EGREP },
|
||||
{ "emacs", CONTEXT_ALL, RE_SYNTAX_EMACS },
|
||||
{ "gnu-awk", CONTEXT_ALL, RE_SYNTAX_GNU_AWK },
|
||||
{ "grep", CONTEXT_ALL, RE_SYNTAX_GREP },
|
||||
{ "posix-awk", CONTEXT_ALL, RE_SYNTAX_POSIX_AWK },
|
||||
{ "awk", CONTEXT_ALL, RE_SYNTAX_AWK },
|
||||
{ "posix-basic", CONTEXT_ALL, RE_SYNTAX_POSIX_BASIC },
|
||||
{ "posix-egrep", CONTEXT_ALL, RE_SYNTAX_POSIX_EGREP },
|
||||
{ "egrep", CONTEXT_ALL, RE_SYNTAX_EGREP },
|
||||
{ "posix-extended", CONTEXT_ALL, RE_SYNTAX_POSIX_EXTENDED },
|
||||
{ "posix-minimal-basic", CONTEXT_GENERIC, RE_SYNTAX_POSIX_MINIMAL_BASIC },
|
||||
{ "sed", CONTEXT_GENERIC, RE_SYNTAX_SED },
|
||||
/* ,{ "posix-common", CONTEXT_GENERIC, _RE_SYNTAX_POSIX_COMMON } */
|
||||
};
|
||||
@ -140,18 +138,26 @@ unsigned int get_regex_type_context (unsigned int ix)
|
||||
}
|
||||
|
||||
int
|
||||
get_regex_type_synonym (unsigned int ix)
|
||||
get_regex_type_synonym (unsigned int ix, unsigned int context)
|
||||
{
|
||||
unsigned i;
|
||||
int flags;
|
||||
|
||||
if (ix >= N_REGEX_MAP_ENTRIES)
|
||||
return -1;
|
||||
|
||||
flags = regex_map[ix].option_val;
|
||||
/* Terminate the loop before we get to IX, so that we always
|
||||
consistently choose the same entry as a synonym (rather than
|
||||
stating that x and y are synonyms of each other). */
|
||||
for (i=0u; i<ix; ++i)
|
||||
{
|
||||
if (flags == regex_map[i].option_val)
|
||||
if ((regex_map[i].context & context) == 0)
|
||||
{
|
||||
/* It is pointless to state that "x is a synonym of y" if we
|
||||
are not in fact going to include y. */
|
||||
continue;
|
||||
}
|
||||
else if (flags == regex_map[i].option_val)
|
||||
{
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -44,10 +44,11 @@ const char * get_regex_type_name(unsigned int ix);
|
||||
*/
|
||||
int get_regex_type_flags(unsigned int ix);
|
||||
|
||||
/* If regular expression type IX (which is a regular expression type index) has
|
||||
* one or more synonyms, return the index of one of them. Otherwise, return -1.
|
||||
/* If regular expression type IX (which is a regular expression type
|
||||
* index) has one or more synonyms which is interesting in context
|
||||
* CONTEXT, return the index of one of them. Otherwise, return -1.
|
||||
*/
|
||||
int get_regex_type_synonym(unsigned int ix);
|
||||
int get_regex_type_synonym(unsigned int ix, unsigned int context);
|
||||
|
||||
/* Returns one of CONTEXT_FINDUTILS, CONTEXT_GENERIC or CONTEXT_ALL.
|
||||
* This identifies whether this regular expression type index is relevant for,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user