Mon, 26 Aug 2013 15:59:41 +0200
8023650: Regexp m flag does not recognize CRNL or CR
Reviewed-by: jlaskey, lagergren
1.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java Fri Aug 23 12:20:19 2013 -0300 1.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java Mon Aug 26 15:59:41 2013 +0200 1.3 @@ -26,7 +26,6 @@ 1.4 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol; 1.5 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol; 1.6 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion; 1.7 -import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl; 1.8 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine; 1.9 1.10 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 1.11 @@ -500,7 +499,7 @@ 1.12 1.13 private void opAnyChar() { 1.14 if (s >= range) {opFail(); return;} 1.15 - if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;} 1.16 + if (isNewLine(chars[s])) {opFail(); return;} 1.17 s++; 1.18 sprev = sbegin; // break; 1.19 } 1.20 @@ -538,7 +537,7 @@ 1.21 while (s < range) { 1.22 char b = chars[s]; 1.23 if (c == b) pushAlt(ip + 1, s, sprev); 1.24 - if (b == EncodingHelper.NEW_LINE) {opFail(); return;} 1.25 + if (isNewLine(b)) {opFail(); return;} 1.26 sprev = s; 1.27 s++; 1.28 } 1.29 @@ -617,7 +616,7 @@ 1.30 if (s == str) { 1.31 if (isNotBol(msaOptions)) opFail(); 1.32 return; 1.33 - } else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) { 1.34 + } else if (isNewLine(chars, sprev, end) && s != end) { 1.35 return; 1.36 } 1.37 opFail(); 1.38 @@ -626,7 +625,7 @@ 1.39 private void opEndLine() { 1.40 if (s == end) { 1.41 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 1.42 - if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) { 1.43 + if (str == end || !isNewLine(chars, sprev, end)) { 1.44 if (isNotEol(msaOptions)) opFail(); 1.45 } 1.46 return; 1.47 @@ -634,7 +633,7 @@ 1.48 if (isNotEol(msaOptions)) opFail(); 1.49 return; 1.50 } 1.51 - } else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) { 1.52 + } else if (isNewLine(chars, s, end)) { 1.53 return; 1.54 } 1.55 opFail(); 1.56 @@ -653,9 +652,6 @@ 1.57 } 1.58 } else if (isNewLine(chars, s, end) && s + 1 == end) { 1.59 return; 1.60 - } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) { 1.61 - int ss = s + 2; 1.62 - if (ss == end) return; 1.63 } 1.64 opFail(); 1.65 }
2.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java Fri Aug 23 12:20:19 2013 -0300 2.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java Mon Aug 26 15:59:41 2013 +0200 2.3 @@ -29,7 +29,6 @@ 2.4 final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30); 2.5 final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR; 2.6 final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN; 2.7 - final boolean USE_CRNL_AS_LINE_TERMINATOR = false; 2.8 2.9 final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */ 2.10 final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */
3.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java Fri Aug 23 12:20:19 2013 -0300 3.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java Mon Aug 26 15:59:41 2013 +0200 3.3 @@ -24,10 +24,12 @@ 3.4 3.5 import java.util.Arrays; 3.6 3.7 -public class EncodingHelper { 3.8 +public final class EncodingHelper { 3.9 3.10 - public final static char NEW_LINE = 0xa; 3.11 - public final static char RETURN = 0xd; 3.12 + final static int NEW_LINE = 0x000a; 3.13 + final static int RETURN = 0x000d; 3.14 + final static int LINE_SEPARATOR = 0x2028; 3.15 + final static int PARAGRAPH_SEPARATOR = 0x2029; 3.16 3.17 final static char[] EMPTYCHARS = new char[0]; 3.18 final static int[][] codeRanges = new int[15][]; 3.19 @@ -64,15 +66,11 @@ 3.20 } 3.21 3.22 public static boolean isNewLine(int code) { 3.23 - return code == NEW_LINE; 3.24 + return code == NEW_LINE || code == RETURN || code == LINE_SEPARATOR || code == PARAGRAPH_SEPARATOR; 3.25 } 3.26 3.27 public static boolean isNewLine(char[] chars, int p, int end) { 3.28 - return p < end && chars[p] == NEW_LINE; 3.29 - } 3.30 - 3.31 - public static boolean isCrnl(char[] chars, int p, int end) { 3.32 - return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE; 3.33 + return p < end && isNewLine(chars[p]); 3.34 } 3.35 3.36 // Encoding.prevCharHead 3.37 @@ -194,7 +192,7 @@ 3.38 int type; 3.39 switch (ctype) { 3.40 case CharacterType.NEWLINE: 3.41 - return code == EncodingHelper.NEW_LINE; 3.42 + return isNewLine(code); 3.43 case CharacterType.ALPHA: 3.44 return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0; 3.45 case CharacterType.BLANK:
4.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java Fri Aug 23 12:20:19 2013 -0300 4.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java Mon Aug 26 15:59:41 2013 +0200 4.3 @@ -732,7 +732,7 @@ 4.4 if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); 4.5 break; 4.6 case '$': 4.7 - if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE); 4.8 + if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE); 4.9 break; 4.10 case '[': 4.11 if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN;
5.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java Fri Aug 23 12:20:19 2013 -0300 5.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java Mon Aug 26 15:59:41 2013 +0200 5.3 @@ -141,7 +141,7 @@ 5.4 continue retry; 5.5 } 5.6 } 5.7 - } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) { 5.8 + } else if (!EncodingHelper.isNewLine(chars, p, end)) { 5.9 //if () break; 5.10 // goto retry_gate; 5.11 pprev = p; 5.12 @@ -226,7 +226,7 @@ 5.13 continue retry; 5.14 } 5.15 } 5.16 - } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) { 5.17 + } else if (!EncodingHelper.isNewLine(chars, p, end)) { 5.18 p = EncodingHelper.prevCharHead(adjrange, p); 5.19 if (p == -1) return false; 5.20 continue retry; 5.21 @@ -330,12 +330,6 @@ 5.22 maxSemiEnd = end; 5.23 if (EncodingHelper.isNewLine(chars, preEnd, end)) { 5.24 minSemiEnd = preEnd; 5.25 - if (Config.USE_CRNL_AS_LINE_TERMINATOR) { 5.26 - preEnd = EncodingHelper.stepBack(str, preEnd, 1); 5.27 - if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) { 5.28 - minSemiEnd = preEnd; 5.29 - } 5.30 - } 5.31 if (minSemiEnd > str && start <= minSemiEnd) { 5.32 // !goto end_buf;! 5.33 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
6.1 --- a/src/jdk/nashorn/tools/Shell.java Fri Aug 23 12:20:19 2013 -0300 6.2 +++ b/src/jdk/nashorn/tools/Shell.java Mon Aug 26 15:59:41 2013 +0200 6.3 @@ -445,7 +445,7 @@ 6.4 continue; 6.5 } 6.6 6.7 - if (res != null && res != ScriptRuntime.UNDEFINED) { 6.8 + if (res != ScriptRuntime.UNDEFINED) { 6.9 err.println(JSType.toString(res)); 6.10 } 6.11 }
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/test/script/basic/JDK-8023650.js Mon Aug 26 15:59:41 2013 +0200 7.3 @@ -0,0 +1,109 @@ 7.4 +/* 7.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 7.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7.7 + * 7.8 + * This code is free software; you can redistribute it and/or modify it 7.9 + * under the terms of the GNU General Public License version 2 only, as 7.10 + * published by the Free Software Foundation. 7.11 + * 7.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 7.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 7.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 7.15 + * version 2 for more details (a copy is included in the LICENSE file that 7.16 + * accompanied this code). 7.17 + * 7.18 + * You should have received a copy of the GNU General Public License version 7.19 + * 2 along with this work; if not, write to the Free Software Foundation, 7.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 7.21 + * 7.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 7.23 + * or visit www.oracle.com if you need additional information or have any 7.24 + * questions. 7.25 + */ 7.26 + 7.27 +/** 7.28 + * JDK-8023650: Regexp m flag does not recognize CRNL or CR 7.29 + * 7.30 + * @test 7.31 + * @run 7.32 + */ 7.33 + 7.34 +if (!/^Connection: close$/m.test('\r\n\r\nConnection: close\r\n\r\n')) { 7.35 + throw new Error(); 7.36 +} 7.37 + 7.38 +if (!/^Connection: close$/m.test('\n\nConnection: close\n\n')) { 7.39 + throw new Error(); 7.40 +} 7.41 + 7.42 +if (!/^Connection: close$/m.test('\r\rConnection: close\r\r')) { 7.43 + throw new Error(); 7.44 +} 7.45 + 7.46 +if (!/^Connection: close$/m.test('\u2028\u2028Connection: close\u2028\u2028')) { 7.47 + throw new Error(); 7.48 +} 7.49 + 7.50 +if (!/^Connection: close$/m.test('\u2029\u2029Connection: close\u2029\u2029')) { 7.51 + throw new Error(); 7.52 +} 7.53 + 7.54 +var result = /a(.*)/.exec("a\r"); 7.55 +if (!result || result[0] != 'a' || result[1] != '') { 7.56 + throw new Error(); 7.57 +} 7.58 + 7.59 +result = /a(.*)/m.exec("a\r"); 7.60 +if (!result || result[0] != 'a' || result[1] != '') { 7.61 + throw new Error(); 7.62 +} 7.63 + 7.64 +result = /a(.*)/.exec("a\n"); 7.65 +if (!result || result[0] != 'a' || result[1] != '') { 7.66 + throw new Error(); 7.67 +} 7.68 + 7.69 +result = /a(.*)/m.exec("a\n"); 7.70 +if (!result || result[0] != 'a' || result[1] != '') { 7.71 + throw new Error(); 7.72 +} 7.73 + 7.74 +result = /a(.*)/.exec("a\r\n"); 7.75 +if (!result || result[0] != 'a' || result[1] != '') { 7.76 + throw new Error(); 7.77 +} 7.78 + 7.79 +result = /a(.*)/m.exec("a\r\n"); 7.80 +if (!result || result[0] != 'a' || result[1] != '') { 7.81 + throw new Error(); 7.82 +} 7.83 + 7.84 +result = /a(.*)/.exec("a\u2028"); 7.85 +if (!result || result[0] != 'a' || result[1] != '') { 7.86 + throw new Error(); 7.87 +} 7.88 + 7.89 +result = /a(.*)/m.exec("a\u2029"); 7.90 +if (!result || result[0] != 'a' || result[1] != '') { 7.91 + throw new Error(); 7.92 +} 7.93 + 7.94 +if (/a$/.test("a\n")) { 7.95 + throw new Error(); 7.96 +} 7.97 + 7.98 +if (/a$/.test("a\r")) { 7.99 + throw new Error(); 7.100 +} 7.101 + 7.102 +if (/a$/.test("a\r\n")) { 7.103 + throw new Error(); 7.104 +} 7.105 + 7.106 +if (/a$/.test("a\u2028")) { 7.107 + throw new Error(); 7.108 +} 7.109 + 7.110 +if (/a$/.test("a\u2029")) { 7.111 + throw new Error(); 7.112 +}