Fri, 05 Apr 2013 19:50:10 +0200
8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
Reviewed-by: jlaskey, lagergren
1.1 --- a/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java Thu Apr 04 18:32:00 2013 +0200 1.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java Fri Apr 05 19:50:10 2013 +0200 1.3 @@ -26,11 +26,10 @@ 1.4 package jdk.nashorn.internal.runtime.regexp; 1.5 1.6 import java.util.HashMap; 1.7 -import java.util.LinkedHashSet; 1.8 +import java.util.Iterator; 1.9 import java.util.LinkedList; 1.10 import java.util.List; 1.11 import java.util.Map; 1.12 -import java.util.Set; 1.13 import java.util.regex.PatternSyntaxException; 1.14 1.15 import jdk.nashorn.internal.parser.Lexer; 1.16 @@ -58,7 +57,7 @@ 1.17 private final List<Capture> caps = new LinkedList<>(); 1.18 1.19 /** Forward references to capturing parenthesis to be resolved later.*/ 1.20 - private final Set<Integer> forwardReferences = new LinkedHashSet<>(); 1.21 + private final LinkedList<Integer> forwardReferences = new LinkedList<>(); 1.22 1.23 /** Current level of zero-width negative lookahead assertions. */ 1.24 private int negativeLookaheadLevel; 1.25 @@ -104,10 +103,20 @@ 1.26 return; 1.27 } 1.28 1.29 - for (final Integer ref : forwardReferences) { 1.30 - if (ref.intValue() > caps.size()) { 1.31 - neverMatches = true; 1.32 - break; 1.33 + Iterator<Integer> iterator = forwardReferences.descendingIterator(); 1.34 + while (iterator.hasNext()) { 1.35 + final int pos = iterator.next(); 1.36 + final int num = iterator.next(); 1.37 + if (num > caps.size()) { 1.38 + // Non-existing reference should never match, if smaller than 8 convert to octal escape 1.39 + // to be compatible with other engines. 1.40 + if (num < 8) { 1.41 + String escape = "\\x0" + num; 1.42 + sb.insert(pos, escape); 1.43 + } else { 1.44 + neverMatches = true; 1.45 + break; 1.46 + } 1.47 } 1.48 } 1.49 1.50 @@ -402,6 +411,10 @@ 1.51 if (ch0 == '}') { 1.52 pop('}'); 1.53 commit(1); 1.54 + } else { 1.55 + // Bad quantifier should be rejected but is accepted by all major engines 1.56 + restart(startIn, startOut); 1.57 + return false; 1.58 } 1.59 1.60 return true; 1.61 @@ -637,7 +650,16 @@ 1.62 throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException 1.63 } 1.64 // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here. 1.65 - if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) { 1.66 + if (ch0 == 'c') { 1.67 + // Ignore invalid control letter escape if within a character class 1.68 + if (inCharClass && ch1 != ']') { 1.69 + sb.setLength(sb.length() - 1); 1.70 + skip(2); 1.71 + return true; 1.72 + } else { 1.73 + sb.append('\\'); // Treat invalid \c control sequence as \\c 1.74 + } 1.75 + } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) { 1.76 sb.setLength(sb.length() - 1); 1.77 } 1.78 return commit(1); 1.79 @@ -677,8 +699,9 @@ 1.80 // Forward reference to a capture group. Forward references are always undefined so we 1.81 // can omit it from the output buffer. Additionally, if the capture group does not exist 1.82 // the whole regexp becomes invalid, so register the reference for later processing. 1.83 + sb.setLength(sb.length() - 1); 1.84 forwardReferences.add(num); 1.85 - sb.setLength(sb.length() - 1); 1.86 + forwardReferences.add(sb.length()); 1.87 skip(1); 1.88 return true; 1.89 }
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/test/script/basic/JDK-8009230.js Fri Apr 05 19:50:10 2013 +0200 2.3 @@ -0,0 +1,93 @@ 2.4 +/* 2.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 2.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.7 + * 2.8 + * This code is free software; you can redistribute it and/or modify it 2.9 + * under the terms of the GNU General Public License version 2 only, as 2.10 + * published by the Free Software Foundation. 2.11 + * 2.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 2.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 2.15 + * version 2 for more details (a copy is included in the LICENSE file that 2.16 + * accompanied this code). 2.17 + * 2.18 + * You should have received a copy of the GNU General Public License version 2.19 + * 2 along with this work; if not, write to the Free Software Foundation, 2.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 2.21 + * 2.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 2.23 + * or visit www.oracle.com if you need additional information or have any 2.24 + * questions. 2.25 + */ 2.26 + 2.27 +/** 2.28 + * JDK-8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines 2.29 + * 2.30 + * @test 2.31 + * @run 2.32 + */ 2.33 + 2.34 + 2.35 +// Invalid ControlEscape/IdentityEscape character treated as literal. 2.36 +print(/\z/.exec("z")); // Invalid escape, same as /z/ 2.37 +// Incomplete/Invalid ControlEscape treated as "\\c" 2.38 +print(/\c/.exec("\\c")); // same as /\\c/ 2.39 +print(/\c2/.exec("\\c2")); // same as /\\c2/ 2.40 +print(/\C/.exec("C")); // same as /C/ 2.41 +print(/\C2/.exec("C2")); // same as /C2/ 2.42 +// Incomplete HexEscapeSequence escape treated as "x". 2.43 +print(/\x/.exec("x")); // incomplete x-escape 2.44 +print(/\x1/.exec("x1")); // incomplete x-escape 2.45 +print(/\x1z/.exec("x1z")); // incomplete x-escape 2.46 +// Incomplete UnicodeEscapeSequence escape treated as "u". 2.47 +print(/\u/.exec("u")); // incomplete u-escape 2.48 +print(/\uz/.exec("uz")); // incomplete u-escape 2.49 +print(/\u1/.exec("u1")); // incomplete u-escape 2.50 +print(/\u1z/.exec("u1z")); // incomplete u-escape 2.51 +print(/\u12/.exec("u12")); // incomplete u-escape 2.52 +print(/\u12z/.exec("u12z")); // incomplete u-escape 2.53 +print(/\u123/.exec("u123")); // incomplete u-escape 2.54 +print(/\u123z/.exec("u123z")); // incomplete u-escape 2.55 +// Bad quantifier range: 2.56 +print(/x{z/.exec("x{z")); // same as /x\{z/ 2.57 +print(/x{1z/.exec("x{1z")); // same as /x\{1z/ 2.58 +print(/x{1,z/.exec("x{1,z")); // same as /x\{1,z/ 2.59 +print(/x{1,2z/.exec("x{1,2z")); // same as /x\{1,2z/ 2.60 +print(/x{10000,20000z/.exec("x{10000,20000z")); // same as /x\{10000,20000z/ 2.61 +// Notice: It needs arbitrary lookahead to determine the invalidity, 2.62 +// except Mozilla that limits the numbers. 2.63 + 2.64 +// Zero-initialized Octal escapes. 2.65 +/\012/; // same as /\x0a/ 2.66 + 2.67 +// Nonexisting back-references smaller than 8 treated as octal escapes: 2.68 +print(/\5/.exec("\u0005")); // same as /\x05/ 2.69 +print(/\7/.exec("\u0007")); // same as /\x07/ 2.70 +print(/\8/.exec("\u0008")); // does not match 2.71 + 2.72 +// Invalid PatternCharacter accepted unescaped 2.73 +print(/]/.exec("]")); 2.74 +print(/{/.exec("{")); 2.75 +print(/}/.exec("}")); 2.76 + 2.77 +// Bad escapes also inside CharacterClass. 2.78 +print(/[\z]/.exec("z")); 2.79 +print(/[\c]/.exec("c")); 2.80 +print(/[\c2]/.exec("c")); 2.81 +print(/[\x]/.exec("x")); 2.82 +print(/[\x1]/.exec("x1")); 2.83 +print(/[\x1z]/.exec("x1z")); 2.84 +print(/[\u]/.exec("u")); 2.85 +print(/[\uz]/.exec("u")); 2.86 +print(/[\u1]/.exec("u")); 2.87 +print(/[\u1z]/.exec("u")); 2.88 +print(/[\u12]/.exec("u")); 2.89 +print(/[\u12z]/.exec("u")); 2.90 +print(/[\u123]/.exec("u")); 2.91 +print(/[\u123z]/.exec("u")); 2.92 +print(/[\012]/.exec("0")); 2.93 +print(/[\5]/.exec("5")); 2.94 +// And in addition: 2.95 +print(/[\B]/.exec("B")); 2.96 +print(/()()[\2]/.exec("")); // Valid backreference should be invalid.
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/test/script/basic/JDK-8009230.js.EXPECTED Fri Apr 05 19:50:10 2013 +0200 3.3 @@ -0,0 +1,45 @@ 3.4 +z 3.5 +\c 3.6 +\c2 3.7 +C 3.8 +C2 3.9 +x 3.10 +x1 3.11 +x1z 3.12 +u 3.13 +uz 3.14 +u1 3.15 +u1z 3.16 +u12 3.17 +u12z 3.18 +u123 3.19 +u123z 3.20 +x{z 3.21 +x{1z 3.22 +x{1,z 3.23 +x{1,2z 3.24 +x{10000,20000z 3.25 + 3.26 + 3.27 +null 3.28 +] 3.29 +{ 3.30 +} 3.31 +z 3.32 +c 3.33 +null 3.34 +x 3.35 +x 3.36 +x 3.37 +u 3.38 +u 3.39 +u 3.40 +u 3.41 +u 3.42 +u 3.43 +u 3.44 +u 3.45 +null 3.46 +null 3.47 +B 3.48 +null