mirror of
https://github.com/Perl/perl5.git
synced 2026-01-27 01:44:43 +00:00
regcomp.c: Add ability to take union of a complement
Previous commits have added the ability to the inversion list intersection routine to take the complement of one of its inputs. Likewise, for unions, this will be a frequent paradigm, and it is cheaper to do the complement of an input in the routine than to construct a new temporary that is the desired complement, and throw it away.
This commit is contained in:
parent
3f80b5713f
commit
164173a25b
@ -1378,7 +1378,8 @@ EsMR |IV |invlist_search |NN SV* const invlist|const UV cp
|
||||
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
|
||||
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
|
||||
EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i
|
||||
EXpM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
|
||||
EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
|
||||
EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output
|
||||
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
|
||||
EXpM |void |_invlist_invert|NN SV* const invlist
|
||||
EXpM |void |_invlist_invert_prop|NN SV* const invlist
|
||||
|
||||
2
embed.h
2
embed.h
@ -953,7 +953,7 @@
|
||||
#define _invlist_invert(a) Perl__invlist_invert(aTHX_ a)
|
||||
#define _invlist_invert_prop(a) Perl__invlist_invert_prop(aTHX_ a)
|
||||
#define _invlist_populate_swatch(a,b,c,d) Perl__invlist_populate_swatch(aTHX_ a,b,c,d)
|
||||
#define _invlist_union(a,b,c) Perl__invlist_union(aTHX_ a,b,c)
|
||||
#define _invlist_union_maybe_complement_2nd(a,b,c,d) Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
|
||||
#define _new_invlist(a) Perl__new_invlist(aTHX_ a)
|
||||
#define _swash_inversion_hash(a) Perl__swash_inversion_hash(aTHX_ a)
|
||||
#define _swash_to_invlist(a) Perl__swash_to_invlist(aTHX_ a)
|
||||
|
||||
10
proto.h
10
proto.h
@ -6582,10 +6582,14 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const
|
||||
__attribute__nonnull__(pTHX_2)
|
||||
__attribute__nonnull__(pTHX_3); */
|
||||
|
||||
PERL_CALLCONV void Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
/* PERL_CALLCONV void _invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
__attribute__nonnull__(pTHX_2)
|
||||
__attribute__nonnull__(pTHX_3);
|
||||
#define PERL_ARGS_ASSERT__INVLIST_UNION \
|
||||
__attribute__nonnull__(pTHX_3); */
|
||||
|
||||
PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
|
||||
__attribute__nonnull__(pTHX_2)
|
||||
__attribute__nonnull__(pTHX_4);
|
||||
#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \
|
||||
assert(b); assert(output)
|
||||
|
||||
PERL_CALLCONV SV* Perl__new_invlist(pTHX_ IV initial_size)
|
||||
|
||||
57
regcomp.c
57
regcomp.c
@ -6536,12 +6536,14 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
|
||||
}
|
||||
|
||||
void
|
||||
Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
|
||||
{
|
||||
/* Take the union of two inversion lists and point <output> to it. *output
|
||||
* should be defined upon input, and if it points to one of the two lists,
|
||||
* the reference count to that list will be decremented. The first list,
|
||||
* <a>, may be NULL, in which case a copy of the second list is returned.
|
||||
* If <complement_b> is TRUE, the union is taken of the complement
|
||||
* (inversion) of <b> instead of b itself.
|
||||
*
|
||||
* The basis for this comes from "Unicode Demystified" Chapter 13 by
|
||||
* Richard Gillam, published by Addison-Wesley, and explained at some
|
||||
@ -6577,7 +6579,7 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
*/
|
||||
UV count = 0;
|
||||
|
||||
PERL_ARGS_ASSERT__INVLIST_UNION;
|
||||
PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
|
||||
assert(a != b);
|
||||
|
||||
/* If either one is empty, the union is the other one */
|
||||
@ -6589,6 +6591,9 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
}
|
||||
if (*output != b) {
|
||||
*output = invlist_clone(b);
|
||||
if (complement_b) {
|
||||
_invlist_invert(*output);
|
||||
}
|
||||
} /* else *output already = b; */
|
||||
return;
|
||||
}
|
||||
@ -6596,10 +6601,20 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
if (*output == b) {
|
||||
SvREFCNT_dec(b);
|
||||
}
|
||||
if (*output != a) {
|
||||
*output = invlist_clone(a);
|
||||
}
|
||||
/* else *output already = a; */
|
||||
|
||||
/* The complement of an empty list is a list that has everything in it,
|
||||
* so the union with <a> includes everything too */
|
||||
if (complement_b) {
|
||||
if (a == *output) {
|
||||
SvREFCNT_dec(a);
|
||||
}
|
||||
*output = _new_invlist(1);
|
||||
_append_range_to_invlist(*output, 0, UV_MAX);
|
||||
}
|
||||
else if (*output != a) {
|
||||
*output = invlist_clone(a);
|
||||
}
|
||||
/* else *output already = a; */
|
||||
return;
|
||||
}
|
||||
|
||||
@ -6607,6 +6622,31 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
array_a = invlist_array(a);
|
||||
array_b = invlist_array(b);
|
||||
|
||||
/* If are to take the union of 'a' with the complement of b, set it
|
||||
* up so are looking at b's complement. */
|
||||
if (complement_b) {
|
||||
|
||||
/* To complement, we invert: if the first element is 0, remove it. To
|
||||
* do this, we just pretend the array starts one later, and clear the
|
||||
* flag as we don't have to do anything else later */
|
||||
if (array_b[0] == 0) {
|
||||
array_b++;
|
||||
len_b--;
|
||||
complement_b = FALSE;
|
||||
}
|
||||
else {
|
||||
|
||||
/* But if the first element is not zero, we unshift a 0 before the
|
||||
* array. The data structure reserves a space for that 0 (which
|
||||
* should be a '1' right now), so physical shifting is unneeded,
|
||||
* but temporarily change that element to 0. Before exiting the
|
||||
* routine, we must restore the element to '1' */
|
||||
array_b--;
|
||||
len_b++;
|
||||
array_b[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Size the union for the worst case: that the sets are completely
|
||||
* disjoint */
|
||||
u = _new_invlist(len_a + len_b);
|
||||
@ -6725,6 +6765,11 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
|
||||
SvREFCNT_dec(*output);
|
||||
}
|
||||
|
||||
/* If we've changed b, restore it */
|
||||
if (complement_b) {
|
||||
array_b[0] = 1;
|
||||
}
|
||||
|
||||
*output = u;
|
||||
return;
|
||||
}
|
||||
|
||||
1
regexp.h
1
regexp.h
@ -56,6 +56,7 @@ typedef struct regexp_paren_pair {
|
||||
} regexp_paren_pair;
|
||||
|
||||
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
|
||||
#define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output)
|
||||
#define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output)
|
||||
|
||||
/* Subtracting b from a leaves in a everything that was there that isn't in b,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user