8137240: Negative lookahead in RegEx breaks backreference

Fri, 24 Jun 2016 12:39:42 +0200

author
hannesw
date
Fri, 24 Jun 2016 12:39:42 +0200
changeset 1841
d95a6070758d
parent 1840
68e45b084515
child 1842
29f97057e4e1

8137240: Negative lookahead in RegEx breaks backreference
Reviewed-by: mhaupt

src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java file | annotate | diff | comparison | revisions
test/script/basic/JDK-8137240.js file | annotate | diff | comparison | revisions
     1.1 --- a/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Mon Jun 20 11:44:29 2016 +0200
     1.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Fri Jun 24 12:39:42 2016 +0200
     1.3 @@ -80,8 +80,17 @@
     1.4              this.negLookaheadLevel = negLookaheadLevel;
     1.5          }
     1.6  
     1.7 -        boolean isContained(final int group, final int level) {
     1.8 -            return group == this.negLookaheadGroup && level >= this.negLookaheadLevel;
     1.9 +        /**
    1.10 +         * Returns true if this Capture can be referenced from the position specified by the
    1.11 +         * group and level parameters. This is the case if either the group is not within
    1.12 +         * a negative lookahead, or the position of the referrer is in the same negative lookahead.
    1.13 +         *
    1.14 +         * @param group current negative lookahead group
    1.15 +         * @param level current negative lokahead level
    1.16 +         * @return true if this capture group can be referenced from the given position
    1.17 +         */
    1.18 +        boolean canBeReferencedFrom(final int group, final int level) {
    1.19 +            return this.negLookaheadLevel == 0 || (group == this.negLookaheadGroup && level >= this.negLookaheadLevel);
    1.20          }
    1.21  
    1.22      }
    1.23 @@ -671,8 +680,9 @@
    1.24  
    1.25                  } else if (decimalValue <= caps.size()) {
    1.26                      //  Captures inside a negative lookahead are undefined when referenced from the outside.
    1.27 -                    if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) {
    1.28 -                        // Reference to capture in negative lookahead, omit from output buffer.
    1.29 +                    final Capture capture = caps.get(decimalValue - 1);
    1.30 +                    if (!capture.canBeReferencedFrom(negLookaheadGroup, negLookaheadLevel)) {
    1.31 +                        // Outside reference to capture in negative lookahead, omit from output buffer.
    1.32                          sb.setLength(sb.length() - 1);
    1.33                      } else {
    1.34                          // Append backreference to output buffer.
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/script/basic/JDK-8137240.js	Fri Jun 24 12:39:42 2016 +0200
     2.3 @@ -0,0 +1,44 @@
     2.4 +/*
     2.5 + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
     2.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.7 + *
     2.8 + * This code is free software; you can redistribute it and/or modify it
     2.9 + * under the terms of the GNU General Public License version 2 only, as
    2.10 + * published by the Free Software Foundation.
    2.11 + *
    2.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    2.15 + * version 2 for more details (a copy is included in the LICENSE file that
    2.16 + * accompanied this code).
    2.17 + *
    2.18 + * You should have received a copy of the GNU General Public License version
    2.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    2.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    2.21 + *
    2.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    2.23 + * or visit www.oracle.com if you need additional information or have any
    2.24 + * questions.
    2.25 + */
    2.26 +
    2.27 +/**
    2.28 + * JDK-8137240: Negative lookahead in RegEx breaks backreference
    2.29 + *
    2.30 + * @test
    2.31 + * @run
    2.32 + */
    2.33 +
    2.34 +
    2.35 +Assert.assertEquals('aa'.replace(/(a)(?!b)\1/gm, 'c'), 'c');
    2.36 +
    2.37 +var result = 'aa'.match(/(a)(?!b)\1/);
    2.38 +Assert.assertTrue(result.length === 2);
    2.39 +Assert.assertTrue(result[0] === 'aa');
    2.40 +Assert.assertTrue(result[1] === 'a');
    2.41 +
    2.42 +result = 'aa'.match(/(a)(?!(b))\2(a)/);
    2.43 +Assert.assertTrue(result.length === 4);
    2.44 +Assert.assertTrue(result[0] === 'aa');
    2.45 +Assert.assertTrue(result[1] === 'a');
    2.46 +Assert.assertTrue(result[2] === undefined);
    2.47 +Assert.assertTrue(result[3] === 'a');

mercurial