jdk8-mips64-public/nashorn: src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java@b4ea8678bf15

8011749: Bugs with empty character class handling
Reviewed-by: lagergren, attila

     1 /*

     2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package jdk.nashorn.internal.runtime.regexp;

    28 import java.util.HashMap;

    29 import java.util.Iterator;

    30 import java.util.LinkedList;

    31 import java.util.List;

    32 import java.util.Map;

    33 import java.util.regex.PatternSyntaxException;

    35 import jdk.nashorn.internal.parser.Lexer;

    36 import jdk.nashorn.internal.parser.Scanner;

    37 import jdk.nashorn.internal.runtime.BitVector;

    39 /**

    40  * Scan a JavaScript regexp, converting to Java regex if necessary.

    41  *

    42  */

    43 final class RegExpScanner extends Scanner {

    45     /**

    46      * String builder used to rewrite the pattern for the currently used regexp factory.

    47      */

    48     private final StringBuilder sb;

    50     /** Expected token table */

    51     private final Map<Character, Integer> expected = new HashMap<>();

    53     /** Capturing parenthesis that have been found so far. */

    54     private final List<Capture> caps = new LinkedList<>();

    56     /** Forward references to capturing parenthesis to be resolved later.*/

    57     private final LinkedList<Integer> forwardReferences = new LinkedList<>();

    59     /** Current level of zero-width negative lookahead assertions. */

    60     private int negativeLookaheadLevel;

    62     /** Are we currently inside a character class? */

    63     private boolean inCharClass = false;

    65     /** Are we currently inside a negated character class? */

    66     private boolean inNegativeClass = false;

    68     private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?";

    70     private static class Capture {

    71         /**

    72          * Zero-width negative lookaheads enclosing the capture.

    73          */

    74         private final int negativeLookaheadLevel;

    76         Capture(final int negativeLookaheadLevel) {

    77             this.negativeLookaheadLevel = negativeLookaheadLevel;

    78         }

    80         public int getNegativeLookaheadLevel() {

    81             return negativeLookaheadLevel;

    82         }

    84     }

    86     /**

    87      * Constructor

    88      * @param string the JavaScript regexp to parse

    89      */

    90     private RegExpScanner(final String string) {

    91         super(string);

    92         sb = new StringBuilder(limit);

    93         reset(0);

    94         expected.put(']', 0);

    95         expected.put('}', 0);

    96     }

    98     private void processForwardReferences() {

   100         Iterator<Integer> iterator = forwardReferences.descendingIterator();

   101         while (iterator.hasNext()) {

   102             final int pos = iterator.next();

   103             final int num = iterator.next();

   104             if (num > caps.size()) {

   105                 // Non-existing backreference. If the number begins with a valid octal convert it to

   106                 // Unicode escape and append the rest to a literal character sequence.

   107                 final StringBuilder buffer = new StringBuilder();

   108                 octalOrLiteral(Integer.toString(num), buffer);

   109                 sb.insert(pos, buffer);

   110             }

   111         }

   113         forwardReferences.clear();

   114     }

   116     /**

   117      * Scan a JavaScript regexp string returning a Java safe regex string.

   118      *

   119      * @param string

   120      *            JavaScript regexp string.

   121      * @return Java safe regex string.

   122      */

   123     public static RegExpScanner scan(final String string) {

   124         final RegExpScanner scanner = new RegExpScanner(string);

   126         try {

   127             scanner.disjunction();

   128         } catch (final Exception e) {

   129             throw new PatternSyntaxException(e.getMessage(), string, scanner.position);

   130         }

   132         scanner.processForwardReferences();

   134         // Throw syntax error unless we parsed the entire JavaScript regexp without syntax errors

   135         if (scanner.position != string.length()) {

   136             final String p = scanner.getStringBuilder().toString();

   137             throw new PatternSyntaxException(string, p, p.length() + 1);

   138         }

   140         return scanner;

   141     }

   143     final StringBuilder getStringBuilder() {

   144         return sb;

   145     }

   147     String getJavaPattern() {

   148         return sb.toString();

   149     }

   151     BitVector getGroupsInNegativeLookahead() {

   152         BitVector vec = null;

   153         for (int i = 0; i < caps.size(); i++) {

   154             final Capture cap = caps.get(i);

   155             if (cap.getNegativeLookaheadLevel() > 0) {

   156                 if (vec == null) {

   157                     vec = new BitVector(caps.size() + 1);

   158                 }

   159                 vec.set(i + 1);

   160             }

   161         }

   162         return vec;

   163     }

   165     /**

   166      * Commit n characters to the builder and to a given token

   167      * @param n     Number of characters.

   168      * @return Committed token

   169      */

   170     private boolean commit(final int n) {

   171         switch (n) {

   172         case 1:

   173             sb.append(ch0);

   174             skip(1);

   175             break;

   176         case 2:

   177             sb.append(ch0);

   178             sb.append(ch1);

   179             skip(2);

   180             break;

   181         case 3:

   182             sb.append(ch0);

   183             sb.append(ch1);

   184             sb.append(ch2);

   185             skip(3);

   186             break;

   187         default:

   188             assert false : "Should not reach here";

   189         }

   191         return true;

   192     }

   194     /**

   195      * Restart the buffers back at an earlier position.

   196      *

   197      * @param startIn

   198      *            Position in the input stream.

   199      * @param startOut

   200      *            Position in the output stream.

   201      */

   202     private void restart(final int startIn, final int startOut) {

   203         reset(startIn);

   204         sb.setLength(startOut);

   205     }

   207     private void push(final char ch) {

   208         expected.put(ch, expected.get(ch) + 1);

   209     }

   211     private void pop(final char ch) {

   212         expected.put(ch, Math.min(0, expected.get(ch) - 1));

   213     }

   215     /*

   216      * Recursive descent tokenizer starts below.

   217      */

   219     /*

   220      * Disjunction ::

   221      *      Alternative

   222      *      Alternative | Disjunction

   223      */

   224     private void disjunction() {

   225         while (true) {

   226             alternative();

   228             if (ch0 == '|') {

   229                 commit(1);

   230             } else {

   231                 break;

   232             }

   233         }

   234     }

   236     /*

   237      * Alternative ::

   238      *      [empty]

   239      *      Alternative Term

   240      */

   241     private void alternative() {

   242         while (term()) {

   243             // do nothing

   244         }

   245     }

   247     /*

   248      * Term ::

   249      *      Assertion

   250      *      Atom

   251      *      Atom Quantifier

   252      */

   253     private boolean term() {

   254         final int startIn  = position;

   255         final int startOut = sb.length();

   257         if (assertion()) {

   258             return true;

   259         }

   261         if (atom()) {

   262             // Check for character classes that never or always match

   263             if (sb.toString().endsWith("[]")) {

   264                 sb.setLength(sb.length() - 1);

   265                 sb.append("^\\s\\S]");

   266             } else if (sb.toString().endsWith("[^]")) {

   267                 sb.setLength(sb.length() - 2);

   268                 sb.append("\\s\\S]");

   269             }

   271             quantifier();

   272             return true;

   273         }

   275         restart(startIn, startOut);

   276         return false;

   277     }

   279     /*

   280      * Assertion ::

   281      *      ^

   282      *      $

   283      *      \b

   284      *      \B

   285      *      ( ? = Disjunction )

   286      *      ( ? ! Disjunction )

   287      */

   288     private boolean assertion() {

   289         final int startIn  = position;

   290         final int startOut = sb.length();

   292         switch (ch0) {

   293         case '^':

   294         case '$':

   295             return commit(1);

   297         case '\\':

   298             if (ch1 == 'b' || ch1 == 'B') {

   299                 return commit(2);

   300             }

   301             break;

   303         case '(':

   304             if (ch1 != '?') {

   305                 break;

   306             }

   307             if (ch2 != '=' && ch2 != '!') {

   308                 break;

   309             }

   310             final boolean isNegativeLookahead = (ch2 == '!');

   311             commit(3);

   313             if (isNegativeLookahead) {

   314                 negativeLookaheadLevel++;

   315             }

   316             disjunction();

   317             if (isNegativeLookahead) {

   318                 negativeLookaheadLevel--;

   319             }

   321             if (ch0 == ')') {

   322                 return commit(1);

   323             }

   324             break;

   326         default:

   327             break;

   328         }

   330         restart(startIn, startOut);

   331         return false;

   332     }

   334     /*

   335      * Quantifier ::

   336      *      QuantifierPrefix

   337      *      QuantifierPrefix ?

   338      */

   339     private boolean quantifier() {

   340         if (quantifierPrefix()) {

   341             if (ch0 == '?') {

   342                 commit(1);

   343             }

   344             return true;

   345         }

   346         return false;

   347     }

   349     /*

   350      * QuantifierPrefix ::

   351      *      *

   352      *      +

   353      *      ?

   354      *      { DecimalDigits }

   355      *      { DecimalDigits , }

   356      *      { DecimalDigits , DecimalDigits }

   357      */

   358     private boolean quantifierPrefix() {

   359         final int startIn  = position;

   360         final int startOut = sb.length();

   362         switch (ch0) {

   363         case '*':

   364         case '+':

   365         case '?':

   366             return commit(1);

   368         case '{':

   369             commit(1);

   371             if (!decimalDigits()) {

   372                 break; // not a quantifier - back out

   373             }

   374             push('}');

   376             if (ch0 == ',') {

   377                 commit(1);

   378                 decimalDigits();

   379             }

   381             if (ch0 == '}') {

   382                 pop('}');

   383                 commit(1);

   384             } else {

   385                 // Bad quantifier should be rejected but is accepted by all major engines

   386                 restart(startIn, startOut);

   387                 return false;

   388             }

   390             return true;

   392         default:

   393             break;

   394         }

   396         restart(startIn, startOut);

   397         return false;

   398     }

   400     /*

   401      * Atom ::

   402      *      PatternCharacter

   403      *      .

   404      *      \ AtomEscape

   405      *      CharacterClass

   406      *      ( Disjunction )

   407      *      ( ? : Disjunction )

   408      *

   409      */

   410     private boolean atom() {

   411         final int startIn  = position;

   412         final int startOut = sb.length();

   414         if (patternCharacter()) {

   415             return true;

   416         }

   418         if (ch0 == '.') {

   419             return commit(1);

   420         }

   422         if (ch0 == '\\') {

   423             commit(1);

   425             if (atomEscape()) {

   426                 return true;

   427             }

   428         }

   430         if (characterClass()) {

   431             return true;

   432         }

   434         if (ch0 == '(') {

   435             boolean capturingParens = true;

   436             commit(1);

   437             if (ch0 == '?' && ch1 == ':') {

   438                 capturingParens = false;

   439                 commit(2);

   440             }

   442             disjunction();

   444             if (ch0 == ')') {

   445                 commit(1);

   446                 if (capturingParens) {

   447                     caps.add(new Capture(negativeLookaheadLevel));

   448                 }

   449                 return true;

   450             }

   451         }

   453         restart(startIn, startOut);

   454         return false;

   455     }

   457     /*

   458      * PatternCharacter ::

   459      *      SourceCharacter but not any of: ^$\.*+?()[]{}|

   460      */

   461     @SuppressWarnings("fallthrough")

   462     private boolean patternCharacter() {

   463         if (atEOF()) {

   464             return false;

   465         }

   467         switch (ch0) {

   468         case '^':

   469         case '$':

   470         case '\\':

   471         case '.':

   472         case '*':

   473         case '+':

   474         case '?':

   475         case '(':

   476         case ')':

   477         case '[':

   478         case '|':

   479             return false;

   481         case '}':

   482         case ']':

   483             final int n = expected.get(ch0);

   484             if (n != 0) {

   485                 return false;

   486             }

   488        case '{':

   489            // if not a valid quantifier escape curly brace to match itself

   490            // this ensures compatibility with other JS implementations

   491            if (!quantifierPrefix()) {

   492                sb.append('\\');

   493                return commit(1);

   494            }

   495            return false;

   497         default:

   498             return commit(1); // SOURCECHARACTER

   499         }

   500     }

   502     /*

   503      * AtomEscape ::

   504      *      DecimalEscape

   505      *      CharacterEscape

   506      *      CharacterClassEscape

   507      */

   508     private boolean atomEscape() {

   509         // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all

   510         return decimalEscape() || characterClassEscape() || characterEscape() || identityEscape();

   511     }

   513     /*

   514      * CharacterEscape ::

   515      *      ControlEscape

   516      *      c ControlLetter

   517      *      HexEscapeSequence

   518      *      UnicodeEscapeSequence

   519      *      IdentityEscape

   520      */

   521     private boolean characterEscape() {

   522         final int startIn  = position;

   523         final int startOut = sb.length();

   525         if (controlEscape()) {

   526             return true;

   527         }

   529         if (ch0 == 'c') {

   530             commit(1);

   531             if (controlLetter()) {

   532                 return true;

   533             }

   534             restart(startIn, startOut);

   535         }

   537         if (hexEscapeSequence() || unicodeEscapeSequence()) {

   538             return true;

   539         }

   541         restart(startIn, startOut);

   542         return false;

   543     }

   545     private boolean scanEscapeSequence(final char leader, final int length) {

   546         final int startIn  = position;

   547         final int startOut = sb.length();

   549         if (ch0 != leader) {

   550             return false;

   551         }

   553         commit(1);

   554         for (int i = 0; i < length; i++) {

   555             final char ch0l = Character.toLowerCase(ch0);

   556             if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {

   557                 commit(1);

   558             } else {

   559                 restart(startIn, startOut);

   560                 return false;

   561             }

   562         }

   564         return true;

   565     }

   567     private boolean hexEscapeSequence() {

   568         return scanEscapeSequence('x', 2);

   569     }

   571     private boolean unicodeEscapeSequence() {

   572         return scanEscapeSequence('u', 4);

   573     }

   575     /*

   576      * ControlEscape ::

   577      *      one of fnrtv

   578      */

   579     private boolean controlEscape() {

   580         switch (ch0) {

   581         case 'f':

   582         case 'n':

   583         case 'r':

   584         case 't':

   585         case 'v':

   586             return commit(1);

   588         default:

   589             return false;

   590         }

   591     }

   593     /*

   594      * ControlLetter ::

   595      *      one of abcdefghijklmnopqrstuvwxyz

   596      *      ABCDEFGHIJKLMNOPQRSTUVWXYZ

   597      */

   598     private boolean controlLetter() {

   599         final char c = Character.toUpperCase(ch0);

   600         if (c >= 'A' && c <= 'Z') {

   601             // for some reason java regexps don't like control characters on the

   602             // form "\\ca".match([string with ascii 1 at char0]). Translating

   603             // them to unicode does it though.

   604             sb.setLength(sb.length() - 1);

   605             unicode(c - 'A' + 1, sb);

   606             skip(1);

   607             return true;

   608         }

   609         return false;

   610     }

   612     /*

   613      * IdentityEscape ::

   614      *      SourceCharacter but not IdentifierPart

   615      *      <ZWJ>  (200c)

   616      *      <ZWNJ> (200d)

   617      */

   618     private boolean identityEscape() {

   619         if (atEOF()) {

   620             throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException

   621         }

   622         // ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.

   623         if (ch0 == 'c') {

   624             // Ignore invalid control letter escape if within a character class

   625             if (inCharClass && ch1 != ']') {

   626                 sb.setLength(sb.length() - 1);

   627                 skip(2);

   628                 return true;

   629             } else {

   630                 sb.append('\\'); // Treat invalid \c control sequence as \\c

   631             }

   632         } else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {

   633             sb.setLength(sb.length() - 1);

   634         }

   635         return commit(1);

   636     }

   638     /*

   639      * DecimalEscape ::

   640      *      DecimalIntegerLiteral [lookahead DecimalDigit]

   641      */

   642     private boolean decimalEscape() {

   643         final int startIn  = position;

   644         final int startOut = sb.length();

   646         if (ch0 == '0' && !isOctalDigit(ch1)) {

   647             skip(1);

   648             //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);

   649             sb.append("\u0000");

   650             return true;

   651         }

   653         if (isDecimalDigit(ch0)) {

   655             if (ch0 == '0') {

   656                 // We know this is an octal escape.

   657                 if (inCharClass) {

   658                     // Convert octal escape to unicode escape if inside character class.

   659                     int octalValue = 0;

   660                     while (isOctalDigit(ch0)) {

   661                         octalValue = octalValue * 8 + ch0 - '0';

   662                         skip(1);

   663                     }

   665                     unicode(octalValue, sb);

   667                 } else {

   668                     // Copy decimal escape as-is

   669                     decimalDigits();

   670                 }

   671             } else {

   672                 // This should be a backreference, but could also be an octal escape or even a literal string.

   673                 int decimalValue = 0;

   674                 while (isDecimalDigit(ch0)) {

   675                     decimalValue = decimalValue * 10 + ch0 - '0';

   676                     skip(1);

   677                 }

   679                 if (inCharClass) {

   680                     // No backreferences in character classes. Encode as unicode escape or literal char sequence

   681                     sb.setLength(sb.length() - 1);

   682                     octalOrLiteral(Integer.toString(decimalValue), sb);

   684                 } else if (decimalValue <= caps.size() && caps.get(decimalValue - 1).getNegativeLookaheadLevel() > 0) {

   685                     //  Captures that live inside a negative lookahead are dead after the

   686                     //  lookahead and will be undefined if referenced from outside.

   687                     if (caps.get(decimalValue - 1).getNegativeLookaheadLevel() > negativeLookaheadLevel) {

   688                         sb.setLength(sb.length() - 1);

   689                     } else {

   690                         sb.append(decimalValue);

   691                     }

   692                 } else if (decimalValue > caps.size()) {

   693                     // Forward reference to a capture group. Forward references are always undefined so we can omit

   694                     // it from the output buffer. However, if the target capture does not exist, we need to rewrite

   695                     // the reference as hex escape or literal string, so register the reference for later processing.

   696                     sb.setLength(sb.length() - 1);

   697                     forwardReferences.add(decimalValue);

   698                     forwardReferences.add(sb.length());

   699                 } else {

   700                     // Append as backreference

   701                     sb.append(decimalValue);

   702                 }

   704             }

   705             return true;

   706         }

   708         restart(startIn, startOut);

   709         return false;

   710     }

   712     /*

   713      * CharacterClassEscape ::

   714      *  one of dDsSwW

   715      */

   716     private boolean characterClassEscape() {

   717         switch (ch0) {

   718         // java.util.regex requires translation of \s and \S to explicit character list

   719         case 's':

   720             if (RegExpFactory.usesJavaUtilRegex()) {

   721                 sb.setLength(sb.length() - 1);

   722                 // No nested class required if we already are inside a character class

   723                 if (inCharClass) {

   724                     sb.append(Lexer.getWhitespaceRegExp());

   725                 } else {

   726                     sb.append('[').append(Lexer.getWhitespaceRegExp()).append(']');

   727                 }

   728                 skip(1);

   729                 return true;

   730             }

   731             return commit(1);

   732         case 'S':

   733             if (RegExpFactory.usesJavaUtilRegex()) {

   734                 sb.setLength(sb.length() - 1);

   735                 // In negative class we must use intersection to get double negation ("not anything else than space")

   736                 sb.append(inNegativeClass ? "&&[" : "[^").append(Lexer.getWhitespaceRegExp()).append(']');

   737                 skip(1);

   738                 return true;

   739             }

   740             return commit(1);

   741         case 'd':

   742         case 'D':

   743         case 'w':

   744         case 'W':

   745             return commit(1);

   747         default:

   748             return false;

   749         }

   750     }

   752     /*

   753      * CharacterClass ::

   754      *      [ [lookahead {^}] ClassRanges ]

   755      *      [ ^ ClassRanges ]

   756      */

   757     private boolean characterClass() {

   758         final int startIn  = position;

   759         final int startOut = sb.length();

   761         if (ch0 == '[') {

   762             try {

   763                 inCharClass = true;

   764                 push(']');

   765                 commit(1);

   767                 if (ch0 == '^') {

   768                     inNegativeClass = true;

   769                     commit(1);

   770                 }

   772                 if (classRanges() && ch0 == ']') {

   773                     pop(']');

   774                     return commit(1);

   775                 }

   776             } finally {

   777                 inCharClass = false;  // no nested character classes in JavaScript

   778                 inNegativeClass = false;

   779             }

   780         }

   782         restart(startIn, startOut);

   783         return false;

   784     }

   786     /*

   787      * ClassRanges ::

   788      *      [empty]

   789      *      NonemptyClassRanges

   790      */

   791     private boolean classRanges() {

   792         nonemptyClassRanges();

   793         return true;

   794     }

   796     /*

   797      * NonemptyClassRanges ::

   798      *      ClassAtom

   799      *      ClassAtom NonemptyClassRangesNoDash

   800      *      ClassAtom - ClassAtom ClassRanges

   801      */

   802     private boolean nonemptyClassRanges() {

   803         final int startIn  = position;

   804         final int startOut = sb.length();

   806         if (classAtom()) {

   808             if (ch0 == '-') {

   809                 commit(1);

   811                 if (classAtom() && classRanges()) {

   812                     return true;

   813                 }

   814             }

   816             nonemptyClassRangesNoDash();

   818             return true;

   819         }

   821         restart(startIn, startOut);

   822         return false;

   823     }

   825     /*

   826      * NonemptyClassRangesNoDash ::

   827      *      ClassAtom

   828      *      ClassAtomNoDash NonemptyClassRangesNoDash

   829      *      ClassAtomNoDash - ClassAtom ClassRanges

   830      */

   831     private boolean nonemptyClassRangesNoDash() {

   832         final int startIn  = position;

   833         final int startOut = sb.length();

   835         if (classAtomNoDash()) {

   837             // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom

   838             if (ch0 == '-') {

   839                commit(1);

   841                if (classAtom() && classRanges()) {

   842                    return true;

   843                }

   844                //fallthru

   845            }

   847             nonemptyClassRangesNoDash();

   848             return true; // still a class atom

   849         }

   851         if (classAtom()) {

   852             return true;

   853         }

   855         restart(startIn, startOut);

   856         return false;

   857     }

   859     /*

   860      * ClassAtom : - ClassAtomNoDash

   861      */

   862     private boolean classAtom() {

   864         if (ch0 == '-') {

   865             return commit(1);

   866         }

   868         return classAtomNoDash();

   869     }

   871     /*

   872      * ClassAtomNoDash ::

   873      *      SourceCharacter but not one of \ or ] or -

   874      *      \ ClassEscape

   875      */

   876     private boolean classAtomNoDash() {

   877         final int startIn  = position;

   878         final int startOut = sb.length();

   880         switch (ch0) {

   881         case ']':

   882         case '-':

   883         case '\0':

   884             return false;

   886         case '[':

   887             // unescaped left square bracket - add escape

   888             sb.append('\\');

   889             return commit(1);

   891         case '\\':

   892             commit(1);

   893             if (classEscape()) {

   894                 return true;

   895             }

   897             restart(startIn, startOut);

   898             return false;

   900         default:

   901             return commit(1);

   902         }

   903     }

   905     /*

   906      * ClassEscape ::

   907      *      DecimalEscape

   908      *      b

   909      *      CharacterEscape

   910      *      CharacterClassEscape

   911      */

   912     private boolean classEscape() {

   914         if (decimalEscape()) {

   915             return true;

   916         }

   918         if (ch0 == 'b') {

   919             sb.setLength(sb.length() - 1);

   920             sb.append('\b');

   921             skip(1);

   922             return true;

   923         }

   925         // Note that contrary to ES 5.1 spec we put identityEscape() last because it acts as a catch-all

   926         return characterEscape() || characterClassEscape() || identityEscape();

   927     }

   929     /*

   930      * DecimalDigits

   931      */

   932     private boolean decimalDigits() {

   933         if (!isDecimalDigit(ch0)) {

   934             return false;

   935         }

   937         while (isDecimalDigit(ch0)) {

   938             commit(1);

   939         }

   941         return true;

   942     }

   944     private void unicode(final int value, final StringBuilder buffer) {

   945         final String hex = Integer.toHexString(value);

   946         buffer.append('u');

   947         for (int i = 0; i < 4 - hex.length(); i++) {

   948             buffer.append('0');

   949         }

   950         buffer.append(hex);

   951     }

   953     // Convert what would have been a backreference into a unicode escape, or a number literal, or both.

   954     private void octalOrLiteral(final String numberLiteral, final StringBuilder buffer) {

   955         final int length = numberLiteral.length();

   956         int octalValue = 0;

   957         int pos = 0;

   958         // Maximum value for octal escape is 0377 (255) so we stop the loop at 32

   959         while (pos < length && octalValue < 0x20) {

   960             final char ch = numberLiteral.charAt(pos);

   961             if (isOctalDigit(ch)) {

   962                 octalValue = octalValue * 8 + ch - '0';

   963             } else {

   964                 break;

   965             }

   966             pos++;

   967         }

   968         if (octalValue > 0) {

   969             buffer.append('\\');

   970             unicode(octalValue, buffer);

   971             buffer.append(numberLiteral.substring(pos));

   972         } else {

   973             buffer.append(numberLiteral);

   974         }

   975     }

   977     private static boolean isOctalDigit(final char ch) {

   978         return ch >= '0' && ch <= '7';

   979     }

   981     private static boolean isDecimalDigit(final char ch) {

   982         return ch >= '0' && ch <= '9';

   983     }

   984 }

Mercurial > jdk8-mips64-public > nashorn / file revision

src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java@b4ea8678bf15

src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java