mirror of
https://github.com/ruby/ruby.git
synced 2026-01-27 04:24:23 +00:00
[Backport #13671] Fix that "ss" in look-behind causes syntax error
Fixes k-takata/Onigmo#92. This fix was ported from oniguruma:257082dac8b1a5445fbe
This commit is contained in:
parent
badb360009
commit
3150a1d989
37
regcomp.c
37
regcomp.c
@ -3301,6 +3301,14 @@ setup_subexp_call(Node* node, ScanEnv* env)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define IN_ALT (1<<0)
|
||||
#define IN_NOT (1<<1)
|
||||
#define IN_REPEAT (1<<2)
|
||||
#define IN_VAR_REPEAT (1<<3)
|
||||
#define IN_CALL (1<<4)
|
||||
#define IN_RECCALL (1<<5)
|
||||
#define IN_LOOK_BEHIND (1<<6)
|
||||
|
||||
/* divide different length alternatives in look-behind.
|
||||
(?<=A|B) ==> (?<=A)|(?<=B)
|
||||
(?<!A|B) ==> (?<!A)(?<!B)
|
||||
@ -3597,24 +3605,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
|
||||
static int
|
||||
expand_case_fold_string(Node* node, regex_t* reg)
|
||||
{
|
||||
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
|
||||
|
||||
static int
|
||||
expand_case_fold_string(Node* node, regex_t* reg, int state)
|
||||
{
|
||||
int r, n, len, alt_num;
|
||||
int varlen = 0;
|
||||
int is_in_look_behind;
|
||||
UChar *start, *end, *p;
|
||||
Node *top_root, *root, *snode, *prev_node;
|
||||
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
||||
StrNode* sn = NSTR(node);
|
||||
StrNode* sn;
|
||||
|
||||
if (NSTRING_IS_AMBIG(node)) return 0;
|
||||
|
||||
sn = NSTR(node);
|
||||
|
||||
start = sn->s;
|
||||
end = sn->end;
|
||||
if (start >= end) return 0;
|
||||
|
||||
is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
|
||||
|
||||
r = 0;
|
||||
top_root = root = prev_node = snode = NULL_NODE;
|
||||
alt_num = 1;
|
||||
@ -3630,7 +3643,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
|
||||
len = enclen(reg->enc, p, end);
|
||||
|
||||
varlen = is_case_fold_variable_len(n, items, len);
|
||||
if (n == 0 || varlen == 0) {
|
||||
if (n == 0 || varlen == 0 || is_in_look_behind) {
|
||||
if (IS_NULL(snode)) {
|
||||
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
||||
onig_node_free(top_root);
|
||||
@ -3889,13 +3902,6 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define IN_ALT (1<<0)
|
||||
#define IN_NOT (1<<1)
|
||||
#define IN_REPEAT (1<<2)
|
||||
#define IN_VAR_REPEAT (1<<3)
|
||||
#define IN_CALL (1<<4)
|
||||
#define IN_RECCALL (1<<5)
|
||||
|
||||
/* setup_tree does the following work.
|
||||
1. check empty loop. (set qn->target_empty_info)
|
||||
2. expand ignore-case in char class.
|
||||
@ -3937,7 +3943,7 @@ restart:
|
||||
|
||||
case NT_STR:
|
||||
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
|
||||
r = expand_case_fold_string(node, reg);
|
||||
r = expand_case_fold_string(node, reg, state);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -4180,7 +4186,7 @@ restart:
|
||||
if (r < 0) return r;
|
||||
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
||||
if (NTYPE(node) != NT_ANCHOR) goto restart;
|
||||
r = setup_tree(an->target, reg, state, env);
|
||||
r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
|
||||
if (r != 0) return r;
|
||||
r = setup_look_behind(node, reg, env);
|
||||
}
|
||||
@ -4193,7 +4199,8 @@ restart:
|
||||
if (r < 0) return r;
|
||||
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
||||
if (NTYPE(node) != NT_ANCHOR) goto restart;
|
||||
r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
||||
r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
|
||||
env);
|
||||
if (r != 0) return r;
|
||||
r = setup_look_behind(node, reg, env);
|
||||
}
|
||||
|
||||
@ -112,7 +112,7 @@ describe "Literal Regexps" do
|
||||
/foo.(?<=\d)/.match("fooA foo1").to_a.should == ["foo1"]
|
||||
end
|
||||
|
||||
ruby_bug "#13671", ""..."3.6" do # https://bugs.ruby-lang.org/issues/13671
|
||||
ruby_bug "#13671", ""..."3.4.8" do # https://bugs.ruby-lang.org/issues/13671
|
||||
it "handles a lookbehind with ss characters" do
|
||||
r = Regexp.new("(?<!dss)", Regexp::IGNORECASE)
|
||||
r.should =~ "✨"
|
||||
|
||||
@ -1615,6 +1615,29 @@ class TestRegexp < Test::Unit::TestCase
|
||||
assert_raise(RegexpError, bug12418){ Regexp.new('(0?0|(?(5)||)|(?(5)||))?') }
|
||||
end
|
||||
|
||||
def test_ss_in_look_behind
|
||||
assert_match_at("(?i:ss)", "ss", [[0, 2]])
|
||||
assert_match_at("(?i:ss)", "Ss", [[0, 2]])
|
||||
assert_match_at("(?i:ss)", "SS", [[0, 2]])
|
||||
assert_match_at("(?i:ss)", "\u017fS", [[0, 2]]) # LATIN SMALL LETTER LONG S
|
||||
assert_match_at("(?i:ss)", "s\u017f", [[0, 2]])
|
||||
assert_match_at("(?i:ss)", "\u00df", [[0, 1]]) # LATIN SMALL LETTER SHARP S
|
||||
assert_match_at("(?i:ss)", "\u1e9e", [[0, 1]]) # LATIN CAPITAL LETTER SHARP S
|
||||
assert_match_at("(?i:xssy)", "xssy", [[0, 4]])
|
||||
assert_match_at("(?i:xssy)", "xSsy", [[0, 4]])
|
||||
assert_match_at("(?i:xssy)", "xSSy", [[0, 4]])
|
||||
assert_match_at("(?i:xssy)", "x\u017fSy", [[0, 4]])
|
||||
assert_match_at("(?i:xssy)", "xs\u017fy", [[0, 4]])
|
||||
assert_match_at("(?i:xssy)", "x\u00dfy", [[0, 3]])
|
||||
assert_match_at("(?i:xssy)", "x\u1e9ey", [[0, 3]])
|
||||
assert_match_at("(?i:\u00df)", "ss", [[0, 2]])
|
||||
assert_match_at("(?i:\u00df)", "SS", [[0, 2]])
|
||||
assert_match_at("(?i:[\u00df])", "ss", [[0, 2]])
|
||||
assert_match_at("(?i:[\u00df])", "SS", [[0, 2]])
|
||||
assert_match_at("(?i)(?<!ss)\u2728", "qq\u2728", [[2, 3]]) # Issue #92
|
||||
assert_match_at("(?i)(?<!xss)\u2728", "qq\u2728", [[2, 3]])
|
||||
end
|
||||
|
||||
def test_options_in_look_behind
|
||||
assert_nothing_raised {
|
||||
assert_match_at("(?<=(?i)ab)cd", "ABcd", [[2,4]])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user