[Backport #13671] Fix that "ss" in look-behind causes syntax error

Fixes k-takata/Onigmo#92.

This fix was ported from oniguruma:
257082dac8

b1a5445fbe
This commit is contained in:
K.Takata 2019-01-25 18:54:41 +09:00 committed by Takashi Kokubun
parent badb360009
commit 3150a1d989
3 changed files with 46 additions and 16 deletions

View File

@ -3301,6 +3301,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
}
#endif
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
#define IN_REPEAT (1<<2)
#define IN_VAR_REPEAT (1<<3)
#define IN_CALL (1<<4)
#define IN_RECCALL (1<<5)
#define IN_LOOK_BEHIND (1<<6)
/* divide different length alternatives in look-behind.
(?<=A|B) ==> (?<=A)|(?<=B)
(?<!A|B) ==> (?<!A)(?<!B)
@ -3597,24 +3605,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
return ONIGERR_MEMORY;
}
static int
expand_case_fold_string(Node* node, regex_t* reg)
{
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
static int
expand_case_fold_string(Node* node, regex_t* reg, int state)
{
int r, n, len, alt_num;
int varlen = 0;
int is_in_look_behind;
UChar *start, *end, *p;
Node *top_root, *root, *snode, *prev_node;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
StrNode* sn = NSTR(node);
StrNode* sn;
if (NSTRING_IS_AMBIG(node)) return 0;
sn = NSTR(node);
start = sn->s;
end = sn->end;
if (start >= end) return 0;
is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
r = 0;
top_root = root = prev_node = snode = NULL_NODE;
alt_num = 1;
@ -3630,7 +3643,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
len = enclen(reg->enc, p, end);
varlen = is_case_fold_variable_len(n, items, len);
if (n == 0 || varlen == 0) {
if (n == 0 || varlen == 0 || is_in_look_behind) {
if (IS_NULL(snode)) {
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
onig_node_free(top_root);
@ -3889,13 +3902,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
#endif
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
#define IN_REPEAT (1<<2)
#define IN_VAR_REPEAT (1<<3)
#define IN_CALL (1<<4)
#define IN_RECCALL (1<<5)
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
2. expand ignore-case in char class.
@ -3937,7 +3943,7 @@ restart:
case NT_STR:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
r = expand_case_fold_string(node, reg);
r = expand_case_fold_string(node, reg, state);
}
break;
@ -4180,7 +4186,7 @@ restart:
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
if (NTYPE(node) != NT_ANCHOR) goto restart;
r = setup_tree(an->target, reg, state, env);
r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}
@ -4193,7 +4199,8 @@ restart:
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
if (NTYPE(node) != NT_ANCHOR) goto restart;
r = setup_tree(an->target, reg, (state | IN_NOT), env);
r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
env);
if (r != 0) return r;
r = setup_look_behind(node, reg, env);
}

View File

@ -112,7 +112,7 @@ describe "Literal Regexps" do
/foo.(?<=\d)/.match("fooA foo1").to_a.should == ["foo1"]
end
ruby_bug "#13671", ""..."3.6" do # https://bugs.ruby-lang.org/issues/13671
ruby_bug "#13671", ""..."3.4.8" do # https://bugs.ruby-lang.org/issues/13671
it "handles a lookbehind with ss characters" do
r = Regexp.new("(?<!dss)", Regexp::IGNORECASE)
r.should =~ ""

View File

@ -1615,6 +1615,29 @@ class TestRegexp < Test::Unit::TestCase
assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') }
end
def test_ss_in_look_behind
assert_match_at("(?i:ss)", "ss", [[0, 2]])
assert_match_at("(?i:ss)", "Ss", [[0, 2]])
assert_match_at("(?i:ss)", "SS", [[0, 2]])
assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S
assert_match_at("(?i:ss)", "s\u017f", [[0, 2]])
assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S
assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S
assert_match_at("(?i:xssy)", "xssy", [[0, 4]])
assert_match_at("(?i:xssy)", "xSsy", [[0, 4]])
assert_match_at("(?i:xssy)", "xSSy", [[0, 4]])
assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]])
assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]])
assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]])
assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]])
assert_match_at("(?i:\u00df)", "ss", [[0, 2]])
assert_match_at("(?i:\u00df)", "SS", [[0, 2]])
assert_match_at("(?i:[\u00df])", "ss", [[0, 2]])
assert_match_at("(?i:[\u00df])", "SS", [[0, 2]])
assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92
assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]])
end
def test_options_in_look_behind
assert_nothing_raised {
assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]])