8011749: Bugs with empty character class handling

Wed, 10 Apr 2013 14:05:11 +0200

author
hannesw
date
Wed, 10 Apr 2013 14:05:11 +0200
changeset 188
b4ea8678bf15
parent 187
635a93b61d34
child 189
8ae9ed1ac1e2

8011749: Bugs with empty character class handling
Reviewed-by: lagergren, attila

src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java file | annotate | diff | comparison | revisions
test/script/basic/JDK-8011749.js file | annotate | diff | comparison | revisions
test/script/basic/JDK-8011749.js.EXPECTED file | annotate | diff | comparison | revisions
     1.1 --- a/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Wed Apr 10 14:00:11 2013 +0200
     1.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Wed Apr 10 14:05:11 2013 +0200
     1.3 @@ -47,9 +47,6 @@
     1.4       */
     1.5      private final StringBuilder sb;
     1.6  
     1.7 -    /** Is this the special case of a regexp that never matches anything */
     1.8 -    private boolean neverMatches;
     1.9 -
    1.10      /** Expected token table */
    1.11      private final Map<Character, Integer> expected = new HashMap<>();
    1.12  
    1.13 @@ -99,9 +96,6 @@
    1.14      }
    1.15  
    1.16      private void processForwardReferences() {
    1.17 -        if (neverMatches()) {
    1.18 -            return;
    1.19 -        }
    1.20  
    1.21          Iterator<Integer> iterator = forwardReferences.descendingIterator();
    1.22          while (iterator.hasNext()) {
    1.23 @@ -136,9 +130,6 @@
    1.24          }
    1.25  
    1.26          scanner.processForwardReferences();
    1.27 -        if (scanner.neverMatches()) {
    1.28 -            return null; // never matches
    1.29 -        }
    1.30  
    1.31          // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors
    1.32          if (scanner.position != string.length()) {
    1.33 @@ -147,16 +138,6 @@
    1.34          }
    1.35  
    1.36          return scanner;
    1.37 -     }
    1.38 -
    1.39 -    /**
    1.40 -     * Does this regexp ever match anything? Use of e.g. [], which is legal in JavaScript,
    1.41 -     * is an example where we never match
    1.42 -     *
    1.43 -     * @return boolean
    1.44 -     */
    1.45 -    private boolean neverMatches() {
    1.46 -        return neverMatches;
    1.47      }
    1.48  
    1.49      final StringBuilder getStringBuilder() {
    1.50 @@ -278,23 +259,16 @@
    1.51          }
    1.52  
    1.53          if (atom()) {
    1.54 -            boolean emptyCharacterClass = false;
    1.55 +            // Check for character classes that never or always match
    1.56              if (sb.toString().endsWith("[]")) {
    1.57 -                emptyCharacterClass = true;
    1.58 +                sb.setLength(sb.length() - 1);
    1.59 +                sb.append("^\\s\\S]");
    1.60              } else if (sb.toString().endsWith("[^]")) {
    1.61                  sb.setLength(sb.length() - 2);
    1.62                  sb.append("\\s\\S]");
    1.63              }
    1.64  
    1.65 -            boolean quantifier = quantifier();
    1.66 -
    1.67 -            if (emptyCharacterClass) {
    1.68 -                if (!quantifier) {
    1.69 -                    neverMatches = true; //never matches ever.
    1.70 -                }
    1.71 -                // Note: we could check if quantifier has min zero to mark empty character class as dead.
    1.72 -            }
    1.73 -
    1.74 +            quantifier();
    1.75              return true;
    1.76          }
    1.77  
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/script/basic/JDK-8011749.js	Wed Apr 10 14:05:11 2013 +0200
     2.3 @@ -0,0 +1,38 @@
     2.4 +/*
     2.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     2.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.7 + *
     2.8 + * This code is free software; you can redistribute it and/or modify it
     2.9 + * under the terms of the GNU General Public License version 2 only, as
    2.10 + * published by the Free Software Foundation.
    2.11 + *
    2.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    2.15 + * version 2 for more details (a copy is included in the LICENSE file that
    2.16 + * accompanied this code).
    2.17 + *
    2.18 + * You should have received a copy of the GNU General Public License version
    2.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    2.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    2.21 + *
    2.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    2.23 + * or visit www.oracle.com if you need additional information or have any
    2.24 + * questions.
    2.25 + */
    2.26 +
    2.27 +/**
    2.28 + * JDK-8011749: Bugs with empty character class handling
    2.29 + *
    2.30 + * @test
    2.31 + * @run
    2.32 + */
    2.33 +
    2.34 +// empty class in alternative
    2.35 +print(/[]|[^]/.exec("a"));
    2.36 +print(/[]|[]/.test("a"));
    2.37 +print(/[]|[]|[a]/.exec("a"));
    2.38 +
    2.39 +// empty class in negative lookahead
    2.40 +print(/(?![])/.test(""));
    2.41 +print(/(?![])./.exec("a"));
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/script/basic/JDK-8011749.js.EXPECTED	Wed Apr 10 14:05:11 2013 +0200
     3.3 @@ -0,0 +1,5 @@
     3.4 +a
     3.5 +false
     3.6 +a
     3.7 +true
     3.8 +a

mercurial