8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines

Fri, 05 Apr 2013 19:50:10 +0200

author
hannesw
date
Fri, 05 Apr 2013 19:50:10 +0200
changeset 179
1c29dc809de2
parent 178
050fd5696bcb
child 180
437861485ffa

8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
Reviewed-by: jlaskey, lagergren

src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java file | annotate | diff | comparison | revisions
test/script/basic/JDK-8009230.js file | annotate | diff | comparison | revisions
test/script/basic/JDK-8009230.js.EXPECTED file | annotate | diff | comparison | revisions
     1.1 --- a/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Thu Apr 04 18:32:00 2013 +0200
     1.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Fri Apr 05 19:50:10 2013 +0200
     1.3 @@ -26,11 +26,10 @@
     1.4  package jdk.nashorn.internal.runtime.regexp;
     1.5  
     1.6  import java.util.HashMap;
     1.7 -import java.util.LinkedHashSet;
     1.8 +import java.util.Iterator;
     1.9  import java.util.LinkedList;
    1.10  import java.util.List;
    1.11  import java.util.Map;
    1.12 -import java.util.Set;
    1.13  import java.util.regex.PatternSyntaxException;
    1.14  
    1.15  import jdk.nashorn.internal.parser.Lexer;
    1.16 @@ -58,7 +57,7 @@
    1.17      private final List<Capture> caps = new LinkedList<>();
    1.18  
    1.19      /** Forward references to capturing parenthesis to be resolved later.*/
    1.20 -    private final Set<Integer> forwardReferences = new LinkedHashSet<>();
    1.21 +    private final LinkedList<Integer> forwardReferences = new LinkedList<>();
    1.22  
    1.23      /** Current level of zero-width negative lookahead assertions. */
    1.24      private int negativeLookaheadLevel;
    1.25 @@ -104,10 +103,20 @@
    1.26              return;
    1.27          }
    1.28  
    1.29 -        for (final Integer ref : forwardReferences) {
    1.30 -            if (ref.intValue() > caps.size()) {
    1.31 -                neverMatches = true;
    1.32 -                break;
    1.33 +        Iterator<Integer> iterator = forwardReferences.descendingIterator();
    1.34 +        while (iterator.hasNext()) {
    1.35 +            final int pos = iterator.next();
    1.36 +            final int num = iterator.next();
    1.37 +            if (num > caps.size()) {
    1.38 +                // Non-existing reference should never match, if smaller than 8 convert to octal escape
    1.39 +                // to be compatible with other engines.
    1.40 +                if (num < 8) {
    1.41 +                    String escape = "\\x0" + num;
    1.42 +                    sb.insert(pos, escape);
    1.43 +                } else {
    1.44 +                    neverMatches = true;
    1.45 +                    break;
    1.46 +                }
    1.47              }
    1.48          }
    1.49  
    1.50 @@ -402,6 +411,10 @@
    1.51              if (ch0 == '}') {
    1.52                  pop('}');
    1.53                  commit(1);
    1.54 +            } else {
    1.55 +                // Bad quantifier should be rejected but is accepted by all major engines
    1.56 +                restart(startIn, startOut);
    1.57 +                return false;
    1.58              }
    1.59  
    1.60              return true;
    1.61 @@ -637,7 +650,16 @@
    1.62              throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException
    1.63          }
    1.64          // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
    1.65 -        if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
    1.66 +        if (ch0 == 'c') {
    1.67 +            // Ignore invalid control letter escape if within a character class
    1.68 +            if (inCharClass && ch1 != ']') {
    1.69 +                sb.setLength(sb.length() - 1);
    1.70 +                skip(2);
    1.71 +                return true;
    1.72 +            } else {
    1.73 +                sb.append('\\'); // Treat invalid \c control sequence as \\c
    1.74 +            }
    1.75 +        } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
    1.76              sb.setLength(sb.length() - 1);
    1.77          }
    1.78          return commit(1);
    1.79 @@ -677,8 +699,9 @@
    1.80                      // Forward reference to a capture group. Forward references are always undefined so we
    1.81                      // can omit it from the output buffer. Additionally, if the capture group does not exist
    1.82                      // the whole regexp becomes invalid, so register the reference for later processing.
    1.83 +                    sb.setLength(sb.length() - 1);
    1.84                      forwardReferences.add(num);
    1.85 -                    sb.setLength(sb.length() - 1);
    1.86 +                    forwardReferences.add(sb.length());
    1.87                      skip(1);
    1.88                      return true;
    1.89                  }
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/script/basic/JDK-8009230.js	Fri Apr 05 19:50:10 2013 +0200
     2.3 @@ -0,0 +1,93 @@
     2.4 +/*
     2.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     2.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.7 + *
     2.8 + * This code is free software; you can redistribute it and/or modify it
     2.9 + * under the terms of the GNU General Public License version 2 only, as
    2.10 + * published by the Free Software Foundation.
    2.11 + *
    2.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    2.15 + * version 2 for more details (a copy is included in the LICENSE file that
    2.16 + * accompanied this code).
    2.17 + *
    2.18 + * You should have received a copy of the GNU General Public License version
    2.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    2.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    2.21 + *
    2.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    2.23 + * or visit www.oracle.com if you need additional information or have any
    2.24 + * questions.
    2.25 + */
    2.26 +
    2.27 +/**
    2.28 + * JDK-8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
    2.29 + *
    2.30 + * @test
    2.31 + * @run
    2.32 + */
    2.33 +
    2.34 +
    2.35 +// Invalid ControlEscape/IdentityEscape character treated as literal.
    2.36 +print(/\z/.exec("z"));  // Invalid escape, same as /z/
    2.37 +// Incomplete/Invalid ControlEscape treated as "\\c"
    2.38 +print(/\c/.exec("\\c"));  // same as /\\c/
    2.39 +print(/\c2/.exec("\\c2"));  // same as /\\c2/
    2.40 +print(/\C/.exec("C"));  // same as /C/
    2.41 +print(/\C2/.exec("C2"));  // same as /C2/
    2.42 +// Incomplete HexEscapeSequence escape treated as "x".
    2.43 +print(/\x/.exec("x"));  // incomplete x-escape
    2.44 +print(/\x1/.exec("x1"));  // incomplete x-escape
    2.45 +print(/\x1z/.exec("x1z"));  // incomplete x-escape
    2.46 +// Incomplete UnicodeEscapeSequence escape treated as "u".
    2.47 +print(/\u/.exec("u"));  // incomplete u-escape
    2.48 +print(/\uz/.exec("uz"));  // incomplete u-escape
    2.49 +print(/\u1/.exec("u1"));  // incomplete u-escape
    2.50 +print(/\u1z/.exec("u1z"));  // incomplete u-escape
    2.51 +print(/\u12/.exec("u12"));  // incomplete u-escape
    2.52 +print(/\u12z/.exec("u12z"));  // incomplete u-escape
    2.53 +print(/\u123/.exec("u123"));  // incomplete u-escape
    2.54 +print(/\u123z/.exec("u123z"));  // incomplete u-escape
    2.55 +// Bad quantifier range:
    2.56 +print(/x{z/.exec("x{z"));  // same as /x\{z/
    2.57 +print(/x{1z/.exec("x{1z"));  // same as /x\{1z/
    2.58 +print(/x{1,z/.exec("x{1,z"));  // same as /x\{1,z/
    2.59 +print(/x{1,2z/.exec("x{1,2z"));  // same as /x\{1,2z/
    2.60 +print(/x{10000,20000z/.exec("x{10000,20000z"));  // same as /x\{10000,20000z/
    2.61 +// Notice: It needs arbitrary lookahead to determine the invalidity,
    2.62 +// except Mozilla that limits the numbers.
    2.63 +
    2.64 +// Zero-initialized Octal escapes.
    2.65 +/\012/;    // same as /\x0a/
    2.66 +
    2.67 +// Nonexisting back-references smaller than 8 treated as octal escapes:
    2.68 +print(/\5/.exec("\u0005"));  // same as /\x05/
    2.69 +print(/\7/.exec("\u0007"));  // same as /\x07/
    2.70 +print(/\8/.exec("\u0008"));  // does not match
    2.71 +
    2.72 +// Invalid PatternCharacter accepted unescaped
    2.73 +print(/]/.exec("]"));
    2.74 +print(/{/.exec("{"));
    2.75 +print(/}/.exec("}"));
    2.76 +
    2.77 +// Bad escapes also inside CharacterClass.
    2.78 +print(/[\z]/.exec("z"));
    2.79 +print(/[\c]/.exec("c"));
    2.80 +print(/[\c2]/.exec("c"));
    2.81 +print(/[\x]/.exec("x"));
    2.82 +print(/[\x1]/.exec("x1"));
    2.83 +print(/[\x1z]/.exec("x1z"));
    2.84 +print(/[\u]/.exec("u"));
    2.85 +print(/[\uz]/.exec("u"));
    2.86 +print(/[\u1]/.exec("u"));
    2.87 +print(/[\u1z]/.exec("u"));
    2.88 +print(/[\u12]/.exec("u"));
    2.89 +print(/[\u12z]/.exec("u"));
    2.90 +print(/[\u123]/.exec("u"));
    2.91 +print(/[\u123z]/.exec("u"));
    2.92 +print(/[\012]/.exec("0"));
    2.93 +print(/[\5]/.exec("5"));
    2.94 +// And in addition:
    2.95 +print(/[\B]/.exec("B"));
    2.96 +print(/()()[\2]/.exec(""));  // Valid backreference should be invalid.
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/script/basic/JDK-8009230.js.EXPECTED	Fri Apr 05 19:50:10 2013 +0200
     3.3 @@ -0,0 +1,45 @@
     3.4 +z
     3.5 +\c
     3.6 +\c2
     3.7 +C
     3.8 +C2
     3.9 +x
    3.10 +x1
    3.11 +x1z
    3.12 +u
    3.13 +uz
    3.14 +u1
    3.15 +u1z
    3.16 +u12
    3.17 +u12z
    3.18 +u123
    3.19 +u123z
    3.20 +x{z
    3.21 +x{1z
    3.22 +x{1,z
    3.23 +x{1,2z
    3.24 +x{10000,20000z
    3.25 +
    3.26 +
    3.27 +null
    3.28 +]
    3.29 +{
    3.30 +}
    3.31 +z
    3.32 +c
    3.33 +null
    3.34 +x
    3.35 +x
    3.36 +x
    3.37 +u
    3.38 +u
    3.39 +u
    3.40 +u
    3.41 +u
    3.42 +u
    3.43 +u
    3.44 +u
    3.45 +null
    3.46 +null
    3.47 +B
    3.48 +null

mercurial