8023650: Regexp m flag does not recognize CRNL or CR

Mon, 26 Aug 2013 15:59:41 +0200

author
hannesw
date
Mon, 26 Aug 2013 15:59:41 +0200
changeset 528
c19c66e661a9
parent 527
12820c8d0a5d
child 529
99e48c76d11f

8023650: Regexp m flag does not recognize CRNL or CR
Reviewed-by: jlaskey, lagergren

src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/joni/Config.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/tools/Shell.java file | annotate | diff | comparison | revisions
test/script/basic/JDK-8023650.js file | annotate | diff | comparison | revisions
     1.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java	Fri Aug 23 12:20:19 2013 -0300
     1.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java	Mon Aug 26 15:59:41 2013 +0200
     1.3 @@ -26,7 +26,6 @@
     1.4  import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
     1.5  import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
     1.6  import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
     1.7 -import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isCrnl;
     1.8  import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
     1.9  
    1.10  import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
    1.11 @@ -500,7 +499,7 @@
    1.12  
    1.13      private void opAnyChar() {
    1.14          if (s >= range) {opFail(); return;}
    1.15 -        if (chars[s] == EncodingHelper.NEW_LINE) {opFail(); return;}
    1.16 +        if (isNewLine(chars[s])) {opFail(); return;}
    1.17          s++;
    1.18          sprev = sbegin; // break;
    1.19      }
    1.20 @@ -538,7 +537,7 @@
    1.21          while (s < range) {
    1.22              char b = chars[s];
    1.23              if (c == b) pushAlt(ip + 1, s, sprev);
    1.24 -            if (b == EncodingHelper.NEW_LINE) {opFail(); return;}
    1.25 +            if (isNewLine(b)) {opFail(); return;}
    1.26              sprev = s;
    1.27              s++;
    1.28          }
    1.29 @@ -617,7 +616,7 @@
    1.30          if (s == str) {
    1.31              if (isNotBol(msaOptions)) opFail();
    1.32              return;
    1.33 -        } else if (EncodingHelper.isNewLine(chars, sprev, end) && s != end) {
    1.34 +        } else if (isNewLine(chars, sprev, end) && s != end) {
    1.35              return;
    1.36          }
    1.37          opFail();
    1.38 @@ -626,7 +625,7 @@
    1.39      private void opEndLine()  {
    1.40          if (s == end) {
    1.41              if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
    1.42 -                if (str == end || !EncodingHelper.isNewLine(chars, sprev, end)) {
    1.43 +                if (str == end || !isNewLine(chars, sprev, end)) {
    1.44                      if (isNotEol(msaOptions)) opFail();
    1.45                  }
    1.46                  return;
    1.47 @@ -634,7 +633,7 @@
    1.48                  if (isNotEol(msaOptions)) opFail();
    1.49                  return;
    1.50              }
    1.51 -        } else if (isNewLine(chars, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end))) {
    1.52 +        } else if (isNewLine(chars, s, end)) {
    1.53              return;
    1.54          }
    1.55          opFail();
    1.56 @@ -653,9 +652,6 @@
    1.57              }
    1.58          } else if (isNewLine(chars, s, end) && s + 1 == end) {
    1.59              return;
    1.60 -        } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && isCrnl(chars, s, end)) {
    1.61 -            int ss = s + 2;
    1.62 -            if (ss == end) return;
    1.63          }
    1.64          opFail();
    1.65      }
     2.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java	Fri Aug 23 12:20:19 2013 -0300
     2.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Config.java	Mon Aug 26 15:59:41 2013 +0200
     2.3 @@ -29,7 +29,6 @@
     2.4      final int INTERNAL_ENC_CASE_FOLD_MULTI_CHAR = (1<<30);
     2.5      final int ENC_CASE_FOLD_MIN = INTERNAL_ENC_CASE_FOLD_MULTI_CHAR;
     2.6      final int ENC_CASE_FOLD_DEFAULT = ENC_CASE_FOLD_MIN;
     2.7 -    final boolean USE_CRNL_AS_LINE_TERMINATOR = false;
     2.8  
     2.9      final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */
    2.10      final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true;     /* /\n$/ =~ "\n" */
     3.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java	Fri Aug 23 12:20:19 2013 -0300
     3.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/EncodingHelper.java	Mon Aug 26 15:59:41 2013 +0200
     3.3 @@ -24,10 +24,12 @@
     3.4  
     3.5  import java.util.Arrays;
     3.6  
     3.7 -public class EncodingHelper {
     3.8 +public final class EncodingHelper {
     3.9  
    3.10 -    public final static char NEW_LINE = 0xa;
    3.11 -    public final static char RETURN   = 0xd;
    3.12 +    final static int NEW_LINE            = 0x000a;
    3.13 +    final static int RETURN              = 0x000d;
    3.14 +    final static int LINE_SEPARATOR      = 0x2028;
    3.15 +    final static int PARAGRAPH_SEPARATOR = 0x2029;
    3.16  
    3.17      final static char[] EMPTYCHARS = new char[0];
    3.18      final static int[][] codeRanges = new int[15][];
    3.19 @@ -64,15 +66,11 @@
    3.20      }
    3.21  
    3.22      public static boolean isNewLine(int code) {
    3.23 -        return code == NEW_LINE;
    3.24 +        return code == NEW_LINE || code == RETURN || code == LINE_SEPARATOR || code == PARAGRAPH_SEPARATOR;
    3.25      }
    3.26  
    3.27      public static boolean isNewLine(char[] chars, int p, int end) {
    3.28 -        return p < end && chars[p] == NEW_LINE;
    3.29 -    }
    3.30 -
    3.31 -    public static boolean isCrnl(char[] chars, int p, int end) {
    3.32 -        return p + 1 < end && chars[p] == RETURN && chars[p + 1] == NEW_LINE;
    3.33 +        return p < end && isNewLine(chars[p]);
    3.34      }
    3.35  
    3.36      // Encoding.prevCharHead
    3.37 @@ -194,7 +192,7 @@
    3.38          int type;
    3.39          switch (ctype) {
    3.40              case CharacterType.NEWLINE:
    3.41 -                return code == EncodingHelper.NEW_LINE;
    3.42 +                return isNewLine(code);
    3.43              case CharacterType.ALPHA:
    3.44                  return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
    3.45              case CharacterType.BLANK:
     4.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java	Fri Aug 23 12:20:19 2013 -0300
     4.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Lexer.java	Mon Aug 26 15:59:41 2013 +0200
     4.3 @@ -732,7 +732,7 @@
     4.4                          if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
     4.5                          break;
     4.6                      case '$':
     4.7 -                        if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE);
     4.8 +                        if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
     4.9                          break;
    4.10                      case '[':
    4.11                          if (syntax.opBracketCC()) token.type = TokenType.CC_CC_OPEN;
     5.1 --- a/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java	Fri Aug 23 12:20:19 2013 -0300
     5.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/joni/Matcher.java	Mon Aug 26 15:59:41 2013 +0200
     5.3 @@ -141,7 +141,7 @@
     5.4                                      continue retry;
     5.5                                  }
     5.6                              }
     5.7 -                        } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
     5.8 +                        } else if (!EncodingHelper.isNewLine(chars, p, end)) {
     5.9                              //if () break;
    5.10                              // goto retry_gate;
    5.11                              pprev = p;
    5.12 @@ -226,7 +226,7 @@
    5.13                                      continue retry;
    5.14                                  }
    5.15                              }
    5.16 -                        } else if (!EncodingHelper.isNewLine(chars, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !EncodingHelper.isCrnl(chars, p, end))) {
    5.17 +                        } else if (!EncodingHelper.isNewLine(chars, p, end)) {
    5.18                              p = EncodingHelper.prevCharHead(adjrange, p);
    5.19                              if (p == -1) return false;
    5.20                              continue retry;
    5.21 @@ -330,12 +330,6 @@
    5.22                  maxSemiEnd = end;
    5.23                  if (EncodingHelper.isNewLine(chars, preEnd, end)) {
    5.24                      minSemiEnd = preEnd;
    5.25 -                    if (Config.USE_CRNL_AS_LINE_TERMINATOR) {
    5.26 -                        preEnd = EncodingHelper.stepBack(str, preEnd, 1);
    5.27 -                        if (preEnd != -1 && EncodingHelper.isCrnl(chars, preEnd, end)) {
    5.28 -                            minSemiEnd = preEnd;
    5.29 -                        }
    5.30 -                    }
    5.31                      if (minSemiEnd > str && start <= minSemiEnd) {
    5.32                          // !goto end_buf;!
    5.33                          if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
     6.1 --- a/src/jdk/nashorn/tools/Shell.java	Fri Aug 23 12:20:19 2013 -0300
     6.2 +++ b/src/jdk/nashorn/tools/Shell.java	Mon Aug 26 15:59:41 2013 +0200
     6.3 @@ -445,7 +445,7 @@
     6.4                      continue;
     6.5                  }
     6.6  
     6.7 -                if (res != null && res != ScriptRuntime.UNDEFINED) {
     6.8 +                if (res != ScriptRuntime.UNDEFINED) {
     6.9                      err.println(JSType.toString(res));
    6.10                  }
    6.11              }
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/test/script/basic/JDK-8023650.js	Mon Aug 26 15:59:41 2013 +0200
     7.3 @@ -0,0 +1,109 @@
     7.4 +/*
     7.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     7.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     7.7 + * 
     7.8 + * This code is free software; you can redistribute it and/or modify it
     7.9 + * under the terms of the GNU General Public License version 2 only, as
    7.10 + * published by the Free Software Foundation.
    7.11 + * 
    7.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    7.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    7.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    7.15 + * version 2 for more details (a copy is included in the LICENSE file that
    7.16 + * accompanied this code).
    7.17 + * 
    7.18 + * You should have received a copy of the GNU General Public License version
    7.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    7.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    7.21 + * 
    7.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    7.23 + * or visit www.oracle.com if you need additional information or have any
    7.24 + * questions.
    7.25 + */
    7.26 +
    7.27 +/**
    7.28 + * JDK-8023650: Regexp m flag does not recognize CRNL or CR
    7.29 + *
    7.30 + * @test
    7.31 + * @run
    7.32 + */
    7.33 +
    7.34 +if (!/^Connection: close$/m.test('\r\n\r\nConnection: close\r\n\r\n')) {
    7.35 +    throw new Error();
    7.36 +}
    7.37 +
    7.38 +if (!/^Connection: close$/m.test('\n\nConnection: close\n\n')) {
    7.39 +    throw new Error();
    7.40 +}
    7.41 +
    7.42 +if (!/^Connection: close$/m.test('\r\rConnection: close\r\r')) {
    7.43 +    throw new Error();
    7.44 +}
    7.45 +
    7.46 +if (!/^Connection: close$/m.test('\u2028\u2028Connection: close\u2028\u2028')) {
    7.47 +    throw new Error();
    7.48 +}
    7.49 +
    7.50 +if (!/^Connection: close$/m.test('\u2029\u2029Connection: close\u2029\u2029')) {
    7.51 +    throw new Error();
    7.52 +}
    7.53 +
    7.54 +var result = /a(.*)/.exec("a\r");
    7.55 +if (!result || result[0] != 'a' || result[1] != '') {
    7.56 +    throw new Error();
    7.57 +}
    7.58 +
    7.59 +result = /a(.*)/m.exec("a\r");
    7.60 +if (!result || result[0] != 'a' || result[1] != '') {
    7.61 +    throw new Error();
    7.62 +}
    7.63 +
    7.64 +result = /a(.*)/.exec("a\n");
    7.65 +if (!result || result[0] != 'a' || result[1] != '') {
    7.66 +    throw new Error();
    7.67 +}
    7.68 +
    7.69 +result = /a(.*)/m.exec("a\n");
    7.70 +if (!result || result[0] != 'a' || result[1] != '') {
    7.71 +    throw new Error();
    7.72 +}
    7.73 +
    7.74 +result = /a(.*)/.exec("a\r\n");
    7.75 +if (!result || result[0] != 'a' || result[1] != '') {
    7.76 +    throw new Error();
    7.77 +}
    7.78 +
    7.79 +result = /a(.*)/m.exec("a\r\n");
    7.80 +if (!result || result[0] != 'a' || result[1] != '') {
    7.81 +    throw new Error();
    7.82 +}
    7.83 +
    7.84 +result = /a(.*)/.exec("a\u2028");
    7.85 +if (!result || result[0] != 'a' || result[1] != '') {
    7.86 +    throw new Error();
    7.87 +}
    7.88 +
    7.89 +result = /a(.*)/m.exec("a\u2029");
    7.90 +if (!result || result[0] != 'a' || result[1] != '') {
    7.91 +    throw new Error();
    7.92 +}
    7.93 +
    7.94 +if (/a$/.test("a\n")) {
    7.95 +    throw new Error();
    7.96 +}
    7.97 +
    7.98 +if (/a$/.test("a\r")) {
    7.99 +    throw new Error();
   7.100 +}
   7.101 +
   7.102 +if (/a$/.test("a\r\n")) {
   7.103 +    throw new Error();
   7.104 +}
   7.105 +
   7.106 +if (/a$/.test("a\u2028")) {
   7.107 +    throw new Error();
   7.108 +}
   7.109 +
   7.110 +if (/a$/.test("a\u2029")) {
   7.111 +    throw new Error();
   7.112 +}

mercurial