regcomp.c: Add ability to take union of a complement

Previous commits have added the ability to the inversion list
intersection routine to take the complement of one of its inputs.
Likewise, for unions, this will be a frequent paradigm, and it is
cheaper to do the complement of an input in the routine than to
construct a new temporary that is the desired complement, and throw it
away.
This commit is contained in:
Karl Williamson 2012-02-03 11:40:34 -07:00
parent 3f80b5713f
commit 164173a25b
5 changed files with 62 additions and 11 deletions

View File

@ -1378,7 +1378,8 @@ EsMR |IV |invlist_search |NN SV* const invlist|const UV cp
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i
EXpM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
EXpM |void |_invlist_invert|NN SV* const invlist
EXpM |void |_invlist_invert_prop|NN SV* const invlist

View File

@ -953,7 +953,7 @@
#define _invlist_invert(a) Perl__invlist_invert(aTHX_ a)
#define _invlist_invert_prop(a) Perl__invlist_invert_prop(aTHX_ a)
#define _invlist_populate_swatch(a,b,c,d) Perl__invlist_populate_swatch(aTHX_ a,b,c,d)
#define _invlist_union(a,b,c) Perl__invlist_union(aTHX_ a,b,c)
#define _invlist_union_maybe_complement_2nd(a,b,c,d) Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
#define _new_invlist(a) Perl__new_invlist(aTHX_ a)
#define _swash_inversion_hash(a) Perl__swash_inversion_hash(aTHX_ a)
#define _swash_to_invlist(a) Perl__swash_to_invlist(aTHX_ a)

10
proto.h
View File

@ -6582,10 +6582,14 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
PERL_CALLCONV void Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
/* PERL_CALLCONV void _invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3);
#define PERL_ARGS_ASSERT__INVLIST_UNION \
__attribute__nonnull__(pTHX_3); */
PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \
assert(b); assert(output)
PERL_CALLCONV SV* Perl__new_invlist(pTHX_ IV initial_size)

View File

@ -6536,12 +6536,14 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
}
void
Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
{
/* Take the union of two inversion lists and point <output> to it. *output
* should be defined upon input, and if it points to one of the two lists,
* the reference count to that list will be decremented. The first list,
* <a>, may be NULL, in which case a copy of the second list is returned.
* If <complement_b> is TRUE, the union is taken of the complement
* (inversion) of <b> instead of b itself.
*
* The basis for this comes from "Unicode Demystified" Chapter 13 by
* Richard Gillam, published by Addison-Wesley, and explained at some
@ -6577,7 +6579,7 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
*/
UV count = 0;
PERL_ARGS_ASSERT__INVLIST_UNION;
PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
assert(a != b);
/* If either one is empty, the union is the other one */
@ -6589,6 +6591,9 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
}
if (*output != b) {
*output = invlist_clone(b);
if (complement_b) {
_invlist_invert(*output);
}
} /* else *output already = b; */
return;
}
@ -6596,10 +6601,20 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
if (*output == b) {
SvREFCNT_dec(b);
}
if (*output != a) {
*output = invlist_clone(a);
}
/* else *output already = a; */
/* The complement of an empty list is a list that has everything in it,
* so the union with <a> includes everything too */
if (complement_b) {
if (a == *output) {
SvREFCNT_dec(a);
}
*output = _new_invlist(1);
_append_range_to_invlist(*output, 0, UV_MAX);
}
else if (*output != a) {
*output = invlist_clone(a);
}
/* else *output already = a; */
return;
}
@ -6607,6 +6622,31 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
array_a = invlist_array(a);
array_b = invlist_array(b);
/* If are to take the union of 'a' with the complement of b, set it
* up so are looking at b's complement. */
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
* do this, we just pretend the array starts one later, and clear the
* flag as we don't have to do anything else later */
if (array_b[0] == 0) {
array_b++;
len_b--;
complement_b = FALSE;
}
else {
/* But if the first element is not zero, we unshift a 0 before the
* array. The data structure reserves a space for that 0 (which
* should be a '1' right now), so physical shifting is unneeded,
* but temporarily change that element to 0. Before exiting the
* routine, we must restore the element to '1' */
array_b--;
len_b++;
array_b[0] = 0;
}
}
/* Size the union for the worst case: that the sets are completely
* disjoint */
u = _new_invlist(len_a + len_b);
@ -6725,6 +6765,11 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
SvREFCNT_dec(*output);
}
/* If we've changed b, restore it */
if (complement_b) {
array_b[0] = 1;
}
*output = u;
return;
}

View File

@ -56,6 +56,7 @@ typedef struct regexp_paren_pair {
} regexp_paren_pair;
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
#define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output)
#define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output)
/* Subtracting b from a leaves in a everything that was there that isn't in b,