8008093: Make RegExp engine pluggable

Fri, 22 Feb 2013 16:31:10 +0100

author
hannesw
date
Fri, 22 Feb 2013 16:31:10 +0100
changeset 114
508da3c7fc3a
parent 113
3f0ff84aaf36
child 115
e42fd1640ff9

8008093: Make RegExp engine pluggable
Reviewed-by: lagergren, attila

src/jdk/nashorn/internal/objects/Global.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/objects/NativeRegExp.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/objects/NativeRegExpExecResult.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/objects/NativeString.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/parser/AbstractParser.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/RegExp.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/RegExpMatch.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/RegExpScanner.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/DefaultRegExp.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/RegExp.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/RegExpMatcher.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/RegExpResult.java file | annotate | diff | comparison | revisions
src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/jdk/nashorn/internal/objects/Global.java	Fri Feb 22 10:39:00 2013 -0400
     1.2 +++ b/src/jdk/nashorn/internal/objects/Global.java	Fri Feb 22 16:31:10 2013 +0100
     1.3 @@ -50,7 +50,7 @@
     1.4  import jdk.nashorn.internal.runtime.NativeJavaPackage;
     1.5  import jdk.nashorn.internal.runtime.OptionsObject;
     1.6  import jdk.nashorn.internal.runtime.PropertyDescriptor;
     1.7 -import jdk.nashorn.internal.runtime.RegExpMatch;
     1.8 +import jdk.nashorn.internal.runtime.regexp.RegExpResult;
     1.9  import jdk.nashorn.internal.runtime.Scope;
    1.10  import jdk.nashorn.internal.runtime.ScriptFunction;
    1.11  import jdk.nashorn.internal.runtime.ScriptObject;
    1.12 @@ -339,7 +339,7 @@
    1.13      private ClassCache classCache;
    1.14  
    1.15      // Used to store the last RegExp result to support deprecated RegExp constructor properties
    1.16 -    private RegExpMatch lastRegExpMatch;
    1.17 +    private RegExpResult lastRegExpResult;
    1.18  
    1.19      private static final MethodHandle EVAL    = findOwnMH("eval",    Object.class, Object.class, Object.class);
    1.20      private static final MethodHandle PRINT   = findOwnMH("print",   Object.class, Object.class, Object[].class);
    1.21 @@ -1709,12 +1709,12 @@
    1.22          return MH.findStatic(MethodHandles.publicLookup(), Global.class, name, MH.type(rtype, types));
    1.23      }
    1.24  
    1.25 -    RegExpMatch getLastRegExpMatch() {
    1.26 -        return lastRegExpMatch;
    1.27 +    RegExpResult getLastRegExpResult() {
    1.28 +        return lastRegExpResult;
    1.29      }
    1.30  
    1.31 -    void setLastRegExpMatch(RegExpMatch regExpMatch) {
    1.32 -        this.lastRegExpMatch = regExpMatch;
    1.33 +    void setLastRegExpResult(final RegExpResult regExpResult) {
    1.34 +        this.lastRegExpResult = regExpResult;
    1.35      }
    1.36  
    1.37  }
     2.1 --- a/src/jdk/nashorn/internal/objects/NativeRegExp.java	Fri Feb 22 10:39:00 2013 -0400
     2.2 +++ b/src/jdk/nashorn/internal/objects/NativeRegExp.java	Fri Feb 22 16:31:10 2013 +0100
     2.3 @@ -31,8 +31,7 @@
     2.4  import java.util.ArrayList;
     2.5  import java.util.Arrays;
     2.6  import java.util.List;
     2.7 -import java.util.regex.Matcher;
     2.8 -import java.util.regex.Pattern;
     2.9 +
    2.10  import jdk.nashorn.internal.objects.annotations.Attribute;
    2.11  import jdk.nashorn.internal.objects.annotations.Constructor;
    2.12  import jdk.nashorn.internal.objects.annotations.Function;
    2.13 @@ -44,8 +43,10 @@
    2.14  import jdk.nashorn.internal.runtime.BitVector;
    2.15  import jdk.nashorn.internal.runtime.JSType;
    2.16  import jdk.nashorn.internal.runtime.ParserException;
    2.17 -import jdk.nashorn.internal.runtime.RegExp;
    2.18 -import jdk.nashorn.internal.runtime.RegExpMatch;
    2.19 +import jdk.nashorn.internal.runtime.regexp.RegExp;
    2.20 +import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
    2.21 +import jdk.nashorn.internal.runtime.regexp.RegExpResult;
    2.22 +import jdk.nashorn.internal.runtime.regexp.RegExpMatcher;
    2.23  import jdk.nashorn.internal.runtime.ScriptFunction;
    2.24  import jdk.nashorn.internal.runtime.ScriptObject;
    2.25  import jdk.nashorn.internal.runtime.ScriptRuntime;
    2.26 @@ -59,35 +60,15 @@
    2.27      @Property(attributes = Attribute.NOT_ENUMERABLE | Attribute.NOT_CONFIGURABLE)
    2.28      public Object lastIndex;
    2.29  
    2.30 -    /** Pattern string. */
    2.31 -    private String input;
    2.32 -
    2.33 -    /** Global search flag for this regexp. */
    2.34 -    private boolean global;
    2.35 -
    2.36 -    /** Case insensitive flag for this regexp */
    2.37 -    private boolean ignoreCase;
    2.38 -
    2.39 -    /** Multi-line flag for this regexp */
    2.40 -    private boolean multiline;
    2.41 -
    2.42 -    /** Java regex pattern to use for match. We compile to one of these */
    2.43 -    private Pattern pattern;
    2.44 -
    2.45 -    private BitVector groupsInNegativeLookahead;
    2.46 +    /** Compiled regexp */
    2.47 +    private RegExp regexp;
    2.48  
    2.49      // Reference to global object needed to support static RegExp properties
    2.50      private Global globalObject;
    2.51  
    2.52 -    /*
    2.53 -    public NativeRegExp() {
    2.54 -        init();
    2.55 -    }*/
    2.56 -
    2.57      NativeRegExp(final String input, final String flagString) {
    2.58 -        RegExp regExp = null;
    2.59          try {
    2.60 -            regExp = new RegExp(input, flagString);
    2.61 +            this.regexp = RegExpFactory.create(input, flagString);
    2.62          } catch (final ParserException e) {
    2.63              // translate it as SyntaxError object and throw it
    2.64              e.throwAsEcmaException();
    2.65 @@ -95,13 +76,6 @@
    2.66          }
    2.67  
    2.68          this.setLastIndex(0);
    2.69 -        this.input = regExp.getInput();
    2.70 -        this.global = regExp.isGlobal();
    2.71 -        this.ignoreCase = regExp.isIgnoreCase();
    2.72 -        this.multiline = regExp.isMultiline();
    2.73 -        this.pattern = regExp.getPattern();
    2.74 -        this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
    2.75 -
    2.76          init();
    2.77      }
    2.78  
    2.79 @@ -110,24 +84,8 @@
    2.80      }
    2.81  
    2.82      NativeRegExp(final NativeRegExp regExp) {
    2.83 -        this.input      = regExp.getInput();
    2.84 -        this.global     = regExp.getGlobal();
    2.85 -        this.multiline  = regExp.getMultiline();
    2.86 -        this.ignoreCase = regExp.getIgnoreCase();
    2.87          this.lastIndex  = regExp.getLastIndexObject();
    2.88 -        this.pattern    = regExp.getPattern();
    2.89 -        this.groupsInNegativeLookahead = regExp.getGroupsInNegativeLookahead();
    2.90 -
    2.91 -        init();
    2.92 -    }
    2.93 -
    2.94 -    NativeRegExp(final Pattern pattern) {
    2.95 -        this.input      = pattern.pattern();
    2.96 -        this.multiline  = (pattern.flags() & Pattern.MULTILINE) != 0;
    2.97 -        this.ignoreCase = (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
    2.98 -        this.lastIndex  = 0;
    2.99 -        this.pattern    = pattern;
   2.100 -
   2.101 +        this.regexp      = regExp.getRegExp();
   2.102          init();
   2.103      }
   2.104  
   2.105 @@ -232,16 +190,59 @@
   2.106          return new NativeRegExp(patternString, flagString);
   2.107      }
   2.108  
   2.109 +    /**
   2.110 +     * Build a regexp that matches {@code string} as-is. All meta-characters will be escaped.
   2.111 +     *
   2.112 +     * @param string pattern string
   2.113 +     * @return flat regexp
   2.114 +     */
   2.115 +    static NativeRegExp flatRegExp(String string) {
   2.116 +        // escape special characters
   2.117 +        StringBuilder sb = null;
   2.118 +        final int length = string.length();
   2.119 +
   2.120 +        for (int i = 0; i < length; i++) {
   2.121 +            final char c = string.charAt(i);
   2.122 +            switch (c) {
   2.123 +                case '^':
   2.124 +                case '$':
   2.125 +                case '\\':
   2.126 +                case '.':
   2.127 +                case '*':
   2.128 +                case '+':
   2.129 +                case '?':
   2.130 +                case '(':
   2.131 +                case ')':
   2.132 +                case '[':
   2.133 +                case '{':
   2.134 +                case '|':
   2.135 +                    if (sb == null) {
   2.136 +                        sb = new StringBuilder(length * 2);
   2.137 +                        sb.append(string, 0, i);
   2.138 +                    }
   2.139 +                    sb.append('\\');
   2.140 +                    sb.append(c);
   2.141 +                    break;
   2.142 +                default:
   2.143 +                    if (sb != null) {
   2.144 +                        sb.append(c);
   2.145 +                    }
   2.146 +                    break;
   2.147 +            }
   2.148 +        }
   2.149 +        return new NativeRegExp(sb == null ? string : sb.toString(), "");
   2.150 +    }
   2.151 +
   2.152      private String getFlagString() {
   2.153 -        final StringBuilder sb = new StringBuilder();
   2.154 +        final StringBuilder sb = new StringBuilder(3);
   2.155  
   2.156 -        if (global) {
   2.157 +        if (regexp.isGlobal()) {
   2.158              sb.append('g');
   2.159          }
   2.160 -        if (ignoreCase) {
   2.161 +        if (regexp.isIgnoreCase()) {
   2.162              sb.append('i');
   2.163          }
   2.164 -        if (multiline) {
   2.165 +        if (regexp.isMultiline()) {
   2.166              sb.append('m');
   2.167          }
   2.168  
   2.169 @@ -255,7 +256,7 @@
   2.170  
   2.171      @Override
   2.172      public String toString() {
   2.173 -        return "/" + input + "/" + getFlagString();
   2.174 +        return "/" + regexp.getSource() + "/" + getFlagString();
   2.175      }
   2.176  
   2.177      /**
   2.178 @@ -270,13 +271,8 @@
   2.179      public static Object compile(final Object self, final Object pattern, final Object flags) {
   2.180          final NativeRegExp regExp   = checkRegExp(self);
   2.181          final NativeRegExp compiled = newRegExp(pattern, flags);
   2.182 -        // copy over fields to 'self'
   2.183 -        regExp.setInput(compiled.getInput());
   2.184 -        regExp.setGlobal(compiled.getGlobal());
   2.185 -        regExp.setIgnoreCase(compiled.getIgnoreCase());
   2.186 -        regExp.setMultiline(compiled.getMultiline());
   2.187 -        regExp.setPattern(compiled.getPattern());
   2.188 -        regExp.setGroupsInNegativeLookahead(compiled.getGroupsInNegativeLookahead());
   2.189 +        // copy over regexp to 'self'
   2.190 +        regExp.setRegExp(compiled.getRegExp());
   2.191  
   2.192          // Some implementations return undefined. Some return 'self'. Since return
   2.193          // value is most likely be ignored, we can play safe and return 'self'.
   2.194 @@ -326,7 +322,7 @@
   2.195       */
   2.196      @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
   2.197      public static Object source(final Object self) {
   2.198 -        return checkRegExp(self).input;
   2.199 +        return checkRegExp(self).getRegExp().getSource();
   2.200      }
   2.201  
   2.202      /**
   2.203 @@ -337,7 +333,7 @@
   2.204       */
   2.205      @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
   2.206      public static Object global(final Object self) {
   2.207 -        return checkRegExp(self).global;
   2.208 +        return checkRegExp(self).getRegExp().isGlobal();
   2.209      }
   2.210  
   2.211      /**
   2.212 @@ -348,7 +344,7 @@
   2.213       */
   2.214      @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
   2.215      public static Object ignoreCase(final Object self) {
   2.216 -        return checkRegExp(self).ignoreCase;
   2.217 +        return checkRegExp(self).getRegExp().isIgnoreCase();
   2.218      }
   2.219  
   2.220      /**
   2.221 @@ -359,7 +355,7 @@
   2.222       */
   2.223      @Getter(attributes = Attribute.NON_ENUMERABLE_CONSTANT)
   2.224      public static Object multiline(final Object self) {
   2.225 -        return checkRegExp(self).multiline;
   2.226 +        return checkRegExp(self).getRegExp().isMultiline();
   2.227      }
   2.228  
   2.229      /**
   2.230 @@ -369,7 +365,7 @@
   2.231       */
   2.232      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "input")
   2.233      public static Object getLastInput(Object self) {
   2.234 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.235 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.236          return match == null ? "" : match.getInput();
   2.237      }
   2.238  
   2.239 @@ -390,7 +386,7 @@
   2.240       */
   2.241      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastMatch")
   2.242      public static Object getLastMatch(Object self) {
   2.243 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.244 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.245          return match == null ? "" : match.getGroup(0);
   2.246      }
   2.247  
   2.248 @@ -401,7 +397,7 @@
   2.249       */
   2.250      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "lastParen")
   2.251      public static Object getLastParen(Object self) {
   2.252 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.253 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.254          return match == null ? "" : match.getLastParen();
   2.255      }
   2.256  
   2.257 @@ -412,7 +408,7 @@
   2.258       */
   2.259      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "leftContext")
   2.260      public static Object getLeftContext(Object self) {
   2.261 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.262 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.263          return match == null ? "" : match.getInput().substring(0, match.getIndex());
   2.264      }
   2.265  
   2.266 @@ -423,7 +419,7 @@
   2.267       */
   2.268      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "rightContext")
   2.269      public static Object getRightContext(Object self) {
   2.270 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.271 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.272          return match == null ? "" : match.getInput().substring(match.getIndex() + match.length());
   2.273      }
   2.274  
   2.275 @@ -434,7 +430,7 @@
   2.276       */
   2.277      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$1")
   2.278      public static Object getGroup1(Object self) {
   2.279 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.280 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.281          return match == null ? "" : match.getGroup(1);
   2.282      }
   2.283  
   2.284 @@ -445,7 +441,7 @@
   2.285       */
   2.286      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$2")
   2.287      public static Object getGroup2(Object self) {
   2.288 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.289 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.290          return match == null ? "" : match.getGroup(2);
   2.291      }
   2.292  
   2.293 @@ -456,7 +452,7 @@
   2.294       */
   2.295      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$3")
   2.296      public static Object getGroup3(Object self) {
   2.297 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.298 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.299          return match == null ? "" : match.getGroup(3);
   2.300      }
   2.301  
   2.302 @@ -467,7 +463,7 @@
   2.303       */
   2.304      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$4")
   2.305      public static Object getGroup4(Object self) {
   2.306 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.307 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.308          return match == null ? "" : match.getGroup(4);
   2.309      }
   2.310  
   2.311 @@ -478,7 +474,7 @@
   2.312       */
   2.313      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$5")
   2.314      public static Object getGroup5(Object self) {
   2.315 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.316 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.317          return match == null ? "" : match.getGroup(5);
   2.318      }
   2.319  
   2.320 @@ -489,7 +485,7 @@
   2.321       */
   2.322      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$6")
   2.323      public static Object getGroup6(Object self) {
   2.324 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.325 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.326          return match == null ? "" : match.getGroup(6);
   2.327      }
   2.328  
   2.329 @@ -500,7 +496,7 @@
   2.330       */
   2.331      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$7")
   2.332      public static Object getGroup7(Object self) {
   2.333 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.334 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.335          return match == null ? "" : match.getGroup(7);
   2.336      }
   2.337  
   2.338 @@ -511,7 +507,7 @@
   2.339       */
   2.340      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$8")
   2.341      public static Object getGroup8(Object self) {
   2.342 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.343 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.344          return match == null ? "" : match.getGroup(8);
   2.345      }
   2.346  
   2.347 @@ -522,34 +518,30 @@
   2.348       */
   2.349      @Getter(where = Where.CONSTRUCTOR, attributes = Attribute.CONSTANT, name = "$9")
   2.350      public static Object getGroup9(Object self) {
   2.351 -        final RegExpMatch match = Global.instance().getLastRegExpMatch();
   2.352 +        final RegExpResult match = Global.instance().getLastRegExpResult();
   2.353          return match == null ? "" : match.getGroup(9);
   2.354      }
   2.355  
   2.356 -    private RegExpMatch execInner(final String string) {
   2.357 -        if (this.pattern == null) {
   2.358 -            return null; // never matches or similar, e.g. a[]
   2.359 -        }
   2.360 +    private RegExpResult execInner(final String string) {
   2.361  
   2.362 -        final Matcher matcher = pattern.matcher(string);
   2.363 -        final int start = this.global ? getLastIndex() : 0;
   2.364 -
   2.365 +        final int start = regexp.isGlobal() ? getLastIndex() : 0;
   2.366          if (start < 0 || start > string.length()) {
   2.367              setLastIndex(0);
   2.368              return null;
   2.369          }
   2.370  
   2.371 -        if (!matcher.find(start)) {
   2.372 +        final RegExpMatcher matcher = regexp.match(string);
   2.373 +        if (matcher == null || !matcher.search(start)) {
   2.374              setLastIndex(0);
   2.375              return null;
   2.376          }
   2.377  
   2.378 -        if (global) {
   2.379 +        if (regexp.isGlobal()) {
   2.380              setLastIndex(matcher.end());
   2.381          }
   2.382  
   2.383 -        final RegExpMatch match = new RegExpMatch(string, matcher.start(), groups(matcher));
   2.384 -        globalObject.setLastRegExpMatch(match);
   2.385 +        final RegExpResult match = new RegExpResult(string, matcher.start(), groups(matcher));
   2.386 +        globalObject.setLastRegExpResult(match);
   2.387          return match;
   2.388      }
   2.389  
   2.390 @@ -557,9 +549,11 @@
   2.391       * Convert java.util.regex.Matcher groups to JavaScript groups.
   2.392       * That is, replace null and groups that didn't match with undefined.
   2.393       */
   2.394 -    private Object[] groups(final Matcher matcher) {
   2.395 +    private Object[] groups(final RegExpMatcher matcher) {
   2.396          final int groupCount = matcher.groupCount();
   2.397          final Object[] groups = new Object[groupCount + 1];
   2.398 +        final BitVector groupsInNegativeLookahead  = regexp.getGroupsInNegativeLookahead();
   2.399 +
   2.400          for (int i = 0, lastGroupStart = matcher.start(); i <= groupCount; i++) {
   2.401              final int groupStart = matcher.start(i);
   2.402              if (lastGroupStart > groupStart
   2.403 @@ -586,7 +580,7 @@
   2.404       * @return NativeArray of matches, string or null.
   2.405       */
   2.406      public Object exec(final String string) {
   2.407 -        final RegExpMatch match = execInner(string);
   2.408 +        final RegExpResult match = execInner(string);
   2.409  
   2.410          if (match == null) {
   2.411              return null;
   2.412 @@ -617,7 +611,12 @@
   2.413       * @return String with substitutions.
   2.414       */
   2.415      Object replace(final String string, final String replacement, final ScriptFunction function) {
   2.416 -        final Matcher matcher = pattern.matcher(string);
   2.417 +        final RegExpMatcher matcher = regexp.match(string);
   2.418 +
   2.419 +        if (matcher == null) {
   2.420 +            return string;
   2.421 +        }
   2.422 +
   2.423          /*
   2.424           * $$ -> $
   2.425           * $& -> the matched substring
   2.426 @@ -628,8 +627,8 @@
   2.427           */
   2.428          String replace = replacement;
   2.429  
   2.430 -        if (!global) {
   2.431 -            if (!matcher.find()) {
   2.432 +        if (!regexp.isGlobal()) {
   2.433 +            if (!matcher.search(0)) {
   2.434                  return string;
   2.435              }
   2.436  
   2.437 @@ -642,45 +641,39 @@
   2.438              return sb.toString();
   2.439          }
   2.440  
   2.441 -        int end = 0; // a.k.a. lastAppendPosition
   2.442          setLastIndex(0);
   2.443  
   2.444 -        boolean found;
   2.445 -        try {
   2.446 -            found = matcher.find(end);
   2.447 -        } catch (final IndexOutOfBoundsException e) {
   2.448 -            found = false;
   2.449 -        }
   2.450 -
   2.451 -        if (!found) {
   2.452 +        if (!matcher.search(0)) {
   2.453              return string;
   2.454          }
   2.455  
   2.456 +        int thisIndex = 0;
   2.457          int previousLastIndex = 0;
   2.458          final StringBuilder sb = new StringBuilder();
   2.459 +
   2.460          do {
   2.461              if (function != null) {
   2.462                  replace = callReplaceValue(function, matcher, string);
   2.463              }
   2.464 -            appendReplacement(matcher, string, replace, sb, end);
   2.465 -            end = matcher.end();
   2.466 +
   2.467 +            appendReplacement(matcher, string, replace, sb, thisIndex);
   2.468  
   2.469              // ECMA 15.5.4.10 String.prototype.match(regexp)
   2.470 -            final int thisIndex = end;
   2.471 +            thisIndex = matcher.end();
   2.472              if (thisIndex == previousLastIndex) {
   2.473                  setLastIndex(thisIndex + 1);
   2.474                  previousLastIndex = thisIndex + 1;
   2.475              } else {
   2.476                  previousLastIndex = thisIndex;
   2.477              }
   2.478 -        } while (matcher.find());
   2.479 +        } while (previousLastIndex <= string.length() && matcher.search(previousLastIndex));
   2.480  
   2.481 -        sb.append(string, end, string.length());
   2.482 +        sb.append(string, thisIndex, string.length());
   2.483  
   2.484          return sb.toString();
   2.485      }
   2.486  
   2.487 -    private void appendReplacement(final Matcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
   2.488 +    private void appendReplacement(final RegExpMatcher matcher, final String text, final String replacement, final StringBuilder sb, final int lastAppendPosition) {
   2.489          // Process substitution string to replace group references with groups
   2.490          int cursor = 0;
   2.491          final StringBuilder result = new StringBuilder();
   2.492 @@ -748,7 +741,7 @@
   2.493          sb.append(result);
   2.494      }
   2.495  
   2.496 -    private String callReplaceValue(final ScriptFunction function, final Matcher matcher, final String string) {
   2.497 +    private String callReplaceValue(final ScriptFunction function, final RegExpMatcher matcher, final String string) {
   2.498          final Object[] groups = groups(matcher);
   2.499          final Object[] args   = Arrays.copyOf(groups, groups.length + 2);
   2.500  
   2.501 @@ -782,7 +775,7 @@
   2.502              return new NativeArray();
   2.503          }
   2.504  
   2.505 -        RegExpMatch match;
   2.506 +        RegExpResult match;
   2.507          final int inputLength = input.length();
   2.508          int lastLength = -1;
   2.509          int lastLastIndex = 0;
   2.510 @@ -834,7 +827,7 @@
   2.511       * @return Index of match.
   2.512       */
   2.513      Object search(final String string) {
   2.514 -        final RegExpMatch match = execInner(string);
   2.515 +        final RegExpResult match = execInner(string);
   2.516  
   2.517          if (match == null) {
   2.518              return -1;
   2.519 @@ -884,52 +877,20 @@
   2.520          }
   2.521      }
   2.522  
   2.523 -    private String getInput() {
   2.524 -        return input;
   2.525 -    }
   2.526 -
   2.527 -    private void setInput(final String input) {
   2.528 -        this.input = input;
   2.529 +    private void setGlobal(final boolean global) {
   2.530 +        regexp.setGlobal(global);
   2.531      }
   2.532  
   2.533      boolean getGlobal() {
   2.534 -        return global;
   2.535 +        return regexp.isGlobal();
   2.536      }
   2.537  
   2.538 -    private void setGlobal(final boolean global) {
   2.539 -        this.global = global;
   2.540 +    private RegExp getRegExp() {
   2.541 +        return regexp;
   2.542      }
   2.543  
   2.544 -    private boolean getIgnoreCase() {
   2.545 -        return ignoreCase;
   2.546 -    }
   2.547 -
   2.548 -    private void setIgnoreCase(final boolean ignoreCase) {
   2.549 -        this.ignoreCase = ignoreCase;
   2.550 -    }
   2.551 -
   2.552 -    private boolean getMultiline() {
   2.553 -        return multiline;
   2.554 -    }
   2.555 -
   2.556 -    private void setMultiline(final boolean multiline) {
   2.557 -        this.multiline = multiline;
   2.558 -    }
   2.559 -
   2.560 -    private Pattern getPattern() {
   2.561 -        return pattern;
   2.562 -    }
   2.563 -
   2.564 -    private void setPattern(final Pattern pattern) {
   2.565 -        this.pattern = pattern;
   2.566 -    }
   2.567 -
   2.568 -    private BitVector getGroupsInNegativeLookahead() {
   2.569 -        return groupsInNegativeLookahead;
   2.570 -    }
   2.571 -
   2.572 -    private void setGroupsInNegativeLookahead(final BitVector groupsInNegativeLookahead) {
   2.573 -        this.groupsInNegativeLookahead = groupsInNegativeLookahead;
   2.574 +    private void setRegExp(final RegExp regexp) {
   2.575 +        this.regexp = regexp;
   2.576      }
   2.577  
   2.578  }
     3.1 --- a/src/jdk/nashorn/internal/objects/NativeRegExpExecResult.java	Fri Feb 22 10:39:00 2013 -0400
     3.2 +++ b/src/jdk/nashorn/internal/objects/NativeRegExpExecResult.java	Fri Feb 22 16:31:10 2013 +0100
     3.3 @@ -31,7 +31,7 @@
     3.4  import jdk.nashorn.internal.objects.annotations.ScriptClass;
     3.5  import jdk.nashorn.internal.objects.annotations.Setter;
     3.6  import jdk.nashorn.internal.runtime.JSType;
     3.7 -import jdk.nashorn.internal.runtime.RegExpMatch;
     3.8 +import jdk.nashorn.internal.runtime.regexp.RegExpResult;
     3.9  import jdk.nashorn.internal.runtime.ScriptObject;
    3.10  import jdk.nashorn.internal.runtime.arrays.ArrayData;
    3.11  
    3.12 @@ -49,11 +49,11 @@
    3.13      @Property
    3.14      public Object input;
    3.15  
    3.16 -    NativeRegExpExecResult(final RegExpMatch match) {
    3.17 +    NativeRegExpExecResult(final RegExpResult result) {
    3.18          setProto(Global.instance().getArrayPrototype());
    3.19 -        this.setArray(ArrayData.allocate(match.getGroups().clone()));
    3.20 -        this.index = match.getIndex();
    3.21 -        this.input = match.getInput();
    3.22 +        this.setArray(ArrayData.allocate(result.getGroups().clone()));
    3.23 +        this.index = result.getIndex();
    3.24 +        this.input = result.getInput();
    3.25      }
    3.26  
    3.27      /**
     4.1 --- a/src/jdk/nashorn/internal/objects/NativeString.java	Fri Feb 22 10:39:00 2013 -0400
     4.2 +++ b/src/jdk/nashorn/internal/objects/NativeString.java	Fri Feb 22 16:31:10 2013 +0100
     4.3 @@ -38,7 +38,6 @@
     4.4  import java.util.Arrays;
     4.5  import java.util.LinkedList;
     4.6  import java.util.List;
     4.7 -import java.util.regex.Pattern;
     4.8  import jdk.internal.dynalink.CallSiteDescriptor;
     4.9  import jdk.internal.dynalink.linker.GuardedInvocation;
    4.10  import jdk.internal.dynalink.linker.LinkRequest;
    4.11 @@ -712,7 +711,7 @@
    4.12          if (string instanceof NativeRegExp) {
    4.13              nativeRegExp = (NativeRegExp) string;
    4.14          } else {
    4.15 -            nativeRegExp = new NativeRegExp(Pattern.compile(JSType.toString(string), Pattern.LITERAL));
    4.16 +            nativeRegExp = NativeRegExp.flatRegExp(JSType.toString(string));
    4.17          }
    4.18  
    4.19          if (replacement instanceof ScriptFunction) {
     5.1 --- a/src/jdk/nashorn/internal/parser/AbstractParser.java	Fri Feb 22 10:39:00 2013 -0400
     5.2 +++ b/src/jdk/nashorn/internal/parser/AbstractParser.java	Fri Feb 22 16:31:10 2013 +0100
     5.3 @@ -37,7 +37,7 @@
     5.4  import jdk.nashorn.internal.runtime.ErrorManager;
     5.5  import jdk.nashorn.internal.runtime.JSErrorType;
     5.6  import jdk.nashorn.internal.runtime.ParserException;
     5.7 -import jdk.nashorn.internal.runtime.RegExp;
     5.8 +import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
     5.9  import jdk.nashorn.internal.runtime.Source;
    5.10  
    5.11  /**
    5.12 @@ -427,7 +427,7 @@
    5.13              if (value instanceof RegexToken) {
    5.14                  final RegexToken regex = (RegexToken)value;
    5.15                  try {
    5.16 -                    RegExp.validate(regex.getExpression(), regex.getOptions());
    5.17 +                    RegExpFactory.validate(regex.getExpression(), regex.getOptions());
    5.18                  } catch (final ParserException e) {
    5.19                      error(e.getMessage());
    5.20                  }
     6.1 --- a/src/jdk/nashorn/internal/runtime/RegExp.java	Fri Feb 22 10:39:00 2013 -0400
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,177 +0,0 @@
     6.4 -/*
     6.5 - * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     6.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     6.7 - *
     6.8 - * This code is free software; you can redistribute it and/or modify it
     6.9 - * under the terms of the GNU General Public License version 2 only, as
    6.10 - * published by the Free Software Foundation.  Oracle designates this
    6.11 - * particular file as subject to the "Classpath" exception as provided
    6.12 - * by Oracle in the LICENSE file that accompanied this code.
    6.13 - *
    6.14 - * This code is distributed in the hope that it will be useful, but WITHOUT
    6.15 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    6.16 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    6.17 - * version 2 for more details (a copy is included in the LICENSE file that
    6.18 - * accompanied this code).
    6.19 - *
    6.20 - * You should have received a copy of the GNU General Public License version
    6.21 - * 2 along with this work; if not, write to the Free Software Foundation,
    6.22 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    6.23 - *
    6.24 - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    6.25 - * or visit www.oracle.com if you need additional information or have any
    6.26 - * questions.
    6.27 - */
    6.28 -
    6.29 -package jdk.nashorn.internal.runtime;
    6.30 -
    6.31 -import static java.util.regex.Pattern.CASE_INSENSITIVE;
    6.32 -import static java.util.regex.Pattern.MULTILINE;
    6.33 -import static java.util.regex.Pattern.UNICODE_CASE;
    6.34 -
    6.35 -import java.util.HashSet;
    6.36 -import java.util.regex.Pattern;
    6.37 -import java.util.regex.PatternSyntaxException;
    6.38 -
    6.39 -/**
    6.40 - * This class is used to represent a parsed regular expression. Accepts input
    6.41 - * pattern string and flagString. This is used by AbstractParser to validate
    6.42 - * RegExp literals as well as by NativeRegExp to parse RegExp constructor arguments.
    6.43 - */
    6.44 -public final class RegExp {
    6.45 -    /** Pattern string. */
    6.46 -    private final String input;
    6.47 -
    6.48 -    /** Global search flag for this regexp.*/
    6.49 -    private boolean global;
    6.50 -
    6.51 -    /** Case insensitive flag for this regexp */
    6.52 -    private boolean ignoreCase;
    6.53 -
    6.54 -    /** Multi-line flag for this regexp */
    6.55 -    private boolean multiline;
    6.56 -
    6.57 -    /** Java regexp pattern to use for match. We compile to one of these */
    6.58 -    private Pattern pattern;
    6.59 -
    6.60 -    /** BitVector that keeps track of groups in negative lookahead */
    6.61 -    private BitVector groupsInNegativeLookahead;
    6.62 -
    6.63 -    /**
    6.64 -     * Creates RegExpLiteral object from given input and flagString.
    6.65 -     *
    6.66 -     * @param input RegExp pattern string
    6.67 -     * @param flagString RegExp flags
    6.68 -     * @throws ParserException if flagString is invalid or input string has syntax error.
    6.69 -     */
    6.70 -    public RegExp(final String input, final String flagString) throws ParserException {
    6.71 -        this.input = input;
    6.72 -        final HashSet<Character> usedFlags = new HashSet<>();
    6.73 -        int flags = 0;
    6.74 -
    6.75 -        for (final char ch : flagString.toCharArray()) {
    6.76 -            if (usedFlags.contains(ch)) {
    6.77 -                throwParserException("repeated.flag", Character.toString(ch));
    6.78 -            }
    6.79 -
    6.80 -            switch (ch) {
    6.81 -            case 'g':
    6.82 -                this.global = true;
    6.83 -                usedFlags.add(ch);
    6.84 -                break;
    6.85 -            case 'i':
    6.86 -                this.ignoreCase = true;
    6.87 -                flags |= CASE_INSENSITIVE | UNICODE_CASE;
    6.88 -                usedFlags.add(ch);
    6.89 -                break;
    6.90 -            case 'm':
    6.91 -                this.multiline = true;
    6.92 -                flags |= MULTILINE;
    6.93 -                usedFlags.add(ch);
    6.94 -                break;
    6.95 -            default:
    6.96 -                throwParserException("unsupported.flag", Character.toString(ch));
    6.97 -            }
    6.98 -        }
    6.99 -
   6.100 -        try {
   6.101 -            RegExpScanner parsed;
   6.102 -
   6.103 -            try {
   6.104 -                parsed = RegExpScanner.scan(input);
   6.105 -            } catch (final PatternSyntaxException e) {
   6.106 -                // refine the exception with a better syntax error, if this
   6.107 -                // passes, just rethrow what we have
   6.108 -                Pattern.compile(input, flags);
   6.109 -                throw e;
   6.110 -            }
   6.111 -
   6.112 -            if (parsed != null) {
   6.113 -                this.pattern = Pattern.compile(parsed.getJavaPattern(), flags);
   6.114 -                this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
   6.115 -            }
   6.116 -        } catch (final PatternSyntaxException e2) {
   6.117 -            throwParserException("syntax", e2.getMessage());
   6.118 -        }
   6.119 -
   6.120 -    }
   6.121 -
   6.122 -    /**
   6.123 -     * @return the input
   6.124 -     */
   6.125 -    public String getInput() {
   6.126 -        return input;
   6.127 -    }
   6.128 -
   6.129 -    /**
   6.130 -     * @return the global
   6.131 -     */
   6.132 -    public boolean isGlobal() {
   6.133 -        return global;
   6.134 -    }
   6.135 -
   6.136 -    /**
   6.137 -     * @return the ignoreCase
   6.138 -     */
   6.139 -    public boolean isIgnoreCase() {
   6.140 -        return ignoreCase;
   6.141 -    }
   6.142 -
   6.143 -    /**
   6.144 -     * @return the multiline
   6.145 -     */
   6.146 -    public boolean isMultiline() {
   6.147 -        return multiline;
   6.148 -    }
   6.149 -
   6.150 -    /**
   6.151 -     * @return the pattern
   6.152 -     */
   6.153 -    public Pattern getPattern() {
   6.154 -        return pattern;
   6.155 -    }
   6.156 -
   6.157 -    /**
   6.158 -     * @return the groupsInNegativeLookahead
   6.159 -     */
   6.160 -    public BitVector getGroupsInNegativeLookahead() {
   6.161 -        return groupsInNegativeLookahead;
   6.162 -    }
   6.163 -
   6.164 -    /**
   6.165 -     * Validation method for RegExp input and flagString - we don't care about the RegExp object
   6.166 -     *
   6.167 -     * @param input        regexp input
   6.168 -     * @param flagString   flag string
   6.169 -     *
   6.170 -     * @throws ParserException if invalid regexp and flags
   6.171 -     */
   6.172 -    @SuppressWarnings({"unused", "ResultOfObjectAllocationIgnored"})
   6.173 -    public static void validate(final String input, final String flagString) throws ParserException {
   6.174 -        new RegExp(input, flagString);
   6.175 -    }
   6.176 -
   6.177 -    private static void throwParserException(final String key, final String str) throws ParserException {
   6.178 -        throw new ParserException(ECMAErrors.getMessage("parser.error.regex." + key, str));
   6.179 -    }
   6.180 -}
     7.1 --- a/src/jdk/nashorn/internal/runtime/RegExpMatch.java	Fri Feb 22 10:39:00 2013 -0400
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,98 +0,0 @@
     7.4 -/*
     7.5 - * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     7.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     7.7 - *
     7.8 - * This code is free software; you can redistribute it and/or modify it
     7.9 - * under the terms of the GNU General Public License version 2 only, as
    7.10 - * published by the Free Software Foundation.  Oracle designates this
    7.11 - * particular file as subject to the "Classpath" exception as provided
    7.12 - * by Oracle in the LICENSE file that accompanied this code.
    7.13 - *
    7.14 - * This code is distributed in the hope that it will be useful, but WITHOUT
    7.15 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    7.16 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    7.17 - * version 2 for more details (a copy is included in the LICENSE file that
    7.18 - * accompanied this code).
    7.19 - *
    7.20 - * You should have received a copy of the GNU General Public License version
    7.21 - * 2 along with this work; if not, write to the Free Software Foundation,
    7.22 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    7.23 - *
    7.24 - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    7.25 - * or visit www.oracle.com if you need additional information or have any
    7.26 - * questions.
    7.27 - */
    7.28 -
    7.29 -package jdk.nashorn.internal.runtime;
    7.30 -
    7.31 -/**
    7.32 - * Match tuple to keep track of ongoing regexp match.
    7.33 - */
    7.34 -public final class RegExpMatch {
    7.35 -    final Object[] groups;
    7.36 -    final int      index;
    7.37 -    final String   input;
    7.38 -
    7.39 -    /**
    7.40 -     * Constructor
    7.41 -     *
    7.42 -     * @param input  regexp input
    7.43 -     * @param index  index of match
    7.44 -     * @param groups groups vector
    7.45 -     */
    7.46 -    public RegExpMatch(final String input, final int index, final Object[] groups) {
    7.47 -        this.input  = input;
    7.48 -        this.index  = index;
    7.49 -        this.groups = groups;
    7.50 -    }
    7.51 -
    7.52 -    /**
    7.53 -     * Get the groups for the match
    7.54 -     * @return group vector
    7.55 -     */
    7.56 -    public Object[] getGroups() {
    7.57 -        return groups;
    7.58 -    }
    7.59 -
    7.60 -    /**
    7.61 -     * Get the input for the map
    7.62 -     * @return input
    7.63 -     */
    7.64 -    public String getInput() {
    7.65 -        return input;
    7.66 -    }
    7.67 -
    7.68 -    /**
    7.69 -     * Get the index for the match
    7.70 -     * @return index
    7.71 -     */
    7.72 -    public int getIndex() {
    7.73 -        return index;
    7.74 -    }
    7.75 -
    7.76 -    /**
    7.77 -     * Get the length of the match
    7.78 -     * @return length
    7.79 -     */
    7.80 -    public int length() {
    7.81 -        return ((String)groups[0]).length();
    7.82 -    }
    7.83 -
    7.84 -    /**
    7.85 -     * Get the group with the given index or the empty string if group index is not valid.
    7.86 -     * @param index the group index
    7.87 -     * @return the group or ""
    7.88 -     */
    7.89 -    public Object getGroup(int index) {
    7.90 -        return index >= 0 && index < groups.length ? groups[index] : "";
    7.91 -    }
    7.92 -
    7.93 -    /**
    7.94 -     * Get the last parenthesis group, or the empty string if none exists.
    7.95 -     * @return the last group or ""
    7.96 -     */
    7.97 -    public Object getLastParen() {
    7.98 -        return groups.length > 1 ? groups[groups.length - 1] : "";
    7.99 -    }
   7.100 -
   7.101 -}
     8.1 --- a/src/jdk/nashorn/internal/runtime/RegExpScanner.java	Fri Feb 22 10:39:00 2013 -0400
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,1411 +0,0 @@
     8.4 -/*
     8.5 - * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     8.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     8.7 - *
     8.8 - * This code is free software; you can redistribute it and/or modify it
     8.9 - * under the terms of the GNU General Public License version 2 only, as
    8.10 - * published by the Free Software Foundation.  Oracle designates this
    8.11 - * particular file as subject to the "Classpath" exception as provided
    8.12 - * by Oracle in the LICENSE file that accompanied this code.
    8.13 - *
    8.14 - * This code is distributed in the hope that it will be useful, but WITHOUT
    8.15 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    8.16 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    8.17 - * version 2 for more details (a copy is included in the LICENSE file that
    8.18 - * accompanied this code).
    8.19 - *
    8.20 - * You should have received a copy of the GNU General Public License version
    8.21 - * 2 along with this work; if not, write to the Free Software Foundation,
    8.22 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    8.23 - *
    8.24 - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    8.25 - * or visit www.oracle.com if you need additional information or have any
    8.26 - * questions.
    8.27 - */
    8.28 -
    8.29 -package jdk.nashorn.internal.runtime;
    8.30 -
    8.31 -import java.util.ArrayList;
    8.32 -import java.util.HashMap;
    8.33 -import java.util.Iterator;
    8.34 -import java.util.LinkedHashMap;
    8.35 -import java.util.LinkedList;
    8.36 -import java.util.List;
    8.37 -import java.util.Map;
    8.38 -import java.util.regex.PatternSyntaxException;
    8.39 -import jdk.nashorn.internal.parser.Lexer;
    8.40 -import jdk.nashorn.internal.parser.Scanner;
    8.41 -
    8.42 -/**
    8.43 - * Scan a JavaScript regexp, converting to Java regex if necessary.
    8.44 - *
    8.45 - */
    8.46 -final class RegExpScanner extends Scanner {
    8.47 -
    8.48 -    /**
    8.49 -     * String builder to accumulate the result - this contains verbatim parsed JavaScript.
    8.50 -     * to get the java equivalent we need to create a Pattern token and return its toString()
    8.51 -     */
    8.52 -    private final StringBuilder sb;
    8.53 -
    8.54 -    /** An optional error message if one occurred during parse. */
    8.55 -    private String errorMessage;
    8.56 -
    8.57 -    /** Is this the special case of a regexp that never matches anything */
    8.58 -    private boolean neverMatches;
    8.59 -
    8.60 -    /** The resulting java.util.regex pattern string. */
    8.61 -    private String javaPattern;
    8.62 -
    8.63 -    /** Expected token table */
    8.64 -    private final Map<Character, Integer> expected = new HashMap<>();
    8.65 -
    8.66 -    /** Capturing parenthesis that have been found so far. */
    8.67 -    private final List<Capture> caps = new LinkedList<>();
    8.68 -
    8.69 -    /** Forward references to capturing parenthesis to be resolved later.*/
    8.70 -    private final Map<Integer, Token> forwardReferences = new LinkedHashMap<>();
    8.71 -
    8.72 -    /** Current level of zero-width negative lookahead assertions. */
    8.73 -    private int negativeLookaheadLevel;
    8.74 -
    8.75 -    private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?";
    8.76 -
    8.77 -    private static class Capture {
    8.78 -        /**
    8.79 -         * Zero-width negative lookaheads enclosing the capture.
    8.80 -         */
    8.81 -        private final int negativeLookaheadLevel;
    8.82 -        /**
    8.83 -         * Captures that live inside a negative lookahead are dead after the
    8.84 -         * lookahead and will be undefined if referenced from outside.
    8.85 -         */
    8.86 -        private boolean isDead;
    8.87 -
    8.88 -        Capture(final int negativeLookaheadLevel) {
    8.89 -            this.negativeLookaheadLevel = negativeLookaheadLevel;
    8.90 -        }
    8.91 -
    8.92 -        public int getNegativeLookaheadLevel() {
    8.93 -            return negativeLookaheadLevel;
    8.94 -        }
    8.95 -
    8.96 -        public boolean isDead() {
    8.97 -            return isDead;
    8.98 -        }
    8.99 -
   8.100 -        public void setDead() {
   8.101 -            this.isDead = true;
   8.102 -        }
   8.103 -    }
   8.104 -
   8.105 -    /**
   8.106 -     * This is a token - the JavaScript regexp is scanned into a token tree
   8.107 -     * A token has other tokens as children as well as "atoms", i.e. Strings.
   8.108 -     *
   8.109 -     */
   8.110 -    private static class Token {
   8.111 -
   8.112 -        private enum Type {
   8.113 -            PATTERN,
   8.114 -            DISJUNCTION,
   8.115 -            ALTERNATIVE,
   8.116 -            TERM,
   8.117 -            ASSERTION,
   8.118 -            QUANTIFIER,
   8.119 -            QUANTIFIER_PREFIX,
   8.120 -            ATOM,
   8.121 -            PATTERN_CHARACTER,
   8.122 -            ATOM_ESCAPE,
   8.123 -            CHARACTER_ESCAPE,
   8.124 -            CONTROL_ESCAPE,
   8.125 -            CONTROL_LETTER,
   8.126 -            IDENTITY_ESCAPE,
   8.127 -            DECIMAL_ESCAPE,
   8.128 -            CHARACTERCLASS_ESCAPE,
   8.129 -            CHARACTERCLASS,
   8.130 -            CLASSRANGES,
   8.131 -            NON_EMPTY_CLASSRANGES,
   8.132 -            NON_EMPTY_CLASSRANGES_NODASH,
   8.133 -            CLASSATOM,
   8.134 -            CLASSATOM_NODASH,
   8.135 -            CLASS_ESCAPE,
   8.136 -            DECIMALDIGITS,
   8.137 -            HEX_ESCAPESEQUENCE,
   8.138 -            UNICODE_ESCAPESEQUENCE,
   8.139 -        }
   8.140 -
   8.141 -        /**
   8.142 -         * Token tyoe
   8.143 -         */
   8.144 -        private final Token.Type type;
   8.145 -
   8.146 -        /**
   8.147 -         * Child nodes
   8.148 -         */
   8.149 -        private final List<Object> children;
   8.150 -
   8.151 -        /**
   8.152 -         * Parent node
   8.153 -         */
   8.154 -        private Token parent;
   8.155 -
   8.156 -        /**
   8.157 -         * Dead code flag
   8.158 -         */
   8.159 -        private boolean isDead;
   8.160 -
   8.161 -        private static final Map<Type, ToString> toStringMap = new HashMap<>();
   8.162 -        private static final ToString DEFAULT_TOSTRING = new ToString();
   8.163 -
   8.164 -        private static String unicode(final int value) {
   8.165 -            final StringBuilder sb = new StringBuilder();
   8.166 -            final String hex = Integer.toHexString(value);
   8.167 -            sb.append('u');
   8.168 -            for (int i = 0; i < 4 - hex.length(); i++) {
   8.169 -                sb.append('0');
   8.170 -            }
   8.171 -            sb.append(hex);
   8.172 -
   8.173 -            return sb.toString();
   8.174 -        }
   8.175 -
   8.176 -        static {
   8.177 -            toStringMap.put(Type.CHARACTERCLASS, new ToString() {
   8.178 -                @Override
   8.179 -                public String toString(final Token token) {
   8.180 -                    return super.toString(token).replace("\\b", "\b");
   8.181 -                }
   8.182 -            });
   8.183 -
   8.184 -            // for some reason java regexps don't like control characters on the
   8.185 -            // form "\\ca".match([string with ascii 1 at char0]). Translating
   8.186 -            // them to unicode does it though.
   8.187 -            toStringMap.put(Type.CHARACTER_ESCAPE, new ToString() {
   8.188 -                @Override
   8.189 -                public String toString(final Token token) {
   8.190 -                    final String str = super.toString(token);
   8.191 -                    if (str.length() == 2) {
   8.192 -                        return Token.unicode(Character.toLowerCase(str.charAt(1)) - 'a' + 1);
   8.193 -                    }
   8.194 -                    return str;
   8.195 -                }
   8.196 -            });
   8.197 -
   8.198 -            toStringMap.put(Type.DECIMAL_ESCAPE, new ToString() {
   8.199 -                @Override
   8.200 -                public String toString(final Token token) {
   8.201 -                    final String str = super.toString(token);
   8.202 -
   8.203 -                    if ("\0".equals(str)) {
   8.204 -                        return str;
   8.205 -                    }
   8.206 -
   8.207 -                    int value;
   8.208 -
   8.209 -                    if (!token.hasParentOfType(Type.CLASSRANGES)) {
   8.210 -                        return str;
   8.211 -                    }
   8.212 -
   8.213 -                    value = Integer.parseInt(str, 8); //throws exception that leads to SyntaxError if not octal
   8.214 -                    if (value > 0xff) {
   8.215 -                        throw new NumberFormatException(str);
   8.216 -                    }
   8.217 -
   8.218 -                    return Token.unicode(value);
   8.219 -                }
   8.220 -            });
   8.221 -
   8.222 -        }
   8.223 -
   8.224 -        /**
   8.225 -         * JavaScript Token to Java regex substring framework.
   8.226 -         *
   8.227 -         */
   8.228 -        private static class ToString {
   8.229 -            String toString(final Token token) {
   8.230 -                final StringBuilder sb = new StringBuilder();
   8.231 -                for (final Object child : token.getChildren()) {
   8.232 -                    sb.append(child);
   8.233 -                }
   8.234 -
   8.235 -                //perform global substitutions that hold true for any evaluated form
   8.236 -                String str = sb.toString();
   8.237 -                switch (str) {
   8.238 -                case "\\s":
   8.239 -                    str = "[" + Lexer.getWhitespaceRegExp() + "]";
   8.240 -                    break;
   8.241 -                case "\\S":
   8.242 -                    str = "[^" + Lexer.getWhitespaceRegExp() + "]";
   8.243 -                    break;
   8.244 -                case "[^]":
   8.245 -                    str = "[\\s\\S]";
   8.246 -                    break;
   8.247 -                default:
   8.248 -                    break;
   8.249 -                }
   8.250 -                return str;
   8.251 -            }
   8.252 -        }
   8.253 -
   8.254 -        /**
   8.255 -         * Token iterator. Doesn't return "atom" children. i.e. string representations,
   8.256 -         * just tokens
   8.257 -         *
   8.258 -         */
   8.259 -        private static class TokenIterator implements Iterator<Token> {
   8.260 -            private final List<Token> preorder;
   8.261 -
   8.262 -            private void init(final Token root) {
   8.263 -                preorder.add(root);
   8.264 -                for (final Object child : root.getChildren()) {
   8.265 -                    if (child instanceof Token) {
   8.266 -                        init((Token)child);
   8.267 -                    }
   8.268 -                }
   8.269 -            }
   8.270 -
   8.271 -            TokenIterator(final Token root) {
   8.272 -                preorder = new ArrayList<>();
   8.273 -                init(root);
   8.274 -            }
   8.275 -
   8.276 -            @Override
   8.277 -            public boolean hasNext() {
   8.278 -                return !preorder.isEmpty();
   8.279 -            }
   8.280 -
   8.281 -            @Override
   8.282 -            public Token next() {
   8.283 -                return preorder.remove(0);
   8.284 -            }
   8.285 -
   8.286 -            @Override
   8.287 -            public void remove() {
   8.288 -                next();
   8.289 -            }
   8.290 -        }
   8.291 -
   8.292 -        /**
   8.293 -         * Constructor
   8.294 -         * @param type the token type
   8.295 -         */
   8.296 -        Token(final Token.Type type) {
   8.297 -            this.type = type;
   8.298 -            children = new ArrayList<>();
   8.299 -        }
   8.300 -
   8.301 -        /**
   8.302 -         * Add a an "atom" child to a token
   8.303 -         * @param child the child to add
   8.304 -         * @return the token (for chaining)
   8.305 -         */
   8.306 -        public Token add(final String child) {
   8.307 -            children.add(child);
   8.308 -            return this;
   8.309 -        }
   8.310 -
   8.311 -        /**
   8.312 -         * Add a child to a token
   8.313 -         * @param child the child
   8.314 -         * @return the token (for chaining)
   8.315 -         */
   8.316 -        public Token add(final Token child) {
   8.317 -            if (child != null) {
   8.318 -                children.add(child);
   8.319 -                child.setParent(this);
   8.320 -            }
   8.321 -            return this;
   8.322 -        }
   8.323 -
   8.324 -        /**
   8.325 -         * Remove a child from a token
   8.326 -         * @param child the child to remove
   8.327 -         * @return true if successful
   8.328 -         */
   8.329 -        public boolean remove(final Token child) {
   8.330 -            return children.remove(child);
   8.331 -        }
   8.332 -
   8.333 -        /**
   8.334 -         * Remove the last child from a token
   8.335 -         * @return the removed child
   8.336 -         */
   8.337 -        public Object removeLast() {
   8.338 -            return children.remove(children.size() - 1);
   8.339 -        }
   8.340 -
   8.341 -        /**
   8.342 -         * Flag this token as dead code
   8.343 -         * @param isDead is it dead or not
   8.344 -         */
   8.345 -        private void setIsDead(final boolean isDead) {
   8.346 -            this.isDead = isDead;
   8.347 -        }
   8.348 -
   8.349 -        /**
   8.350 -         * Is this token dead code
   8.351 -         * @return boolean
   8.352 -         */
   8.353 -        private boolean getIsDead() {
   8.354 -            return isDead;
   8.355 -        }
   8.356 -
   8.357 -        /**
   8.358 -         * Get the parent of this token
   8.359 -         * @return parent token
   8.360 -         */
   8.361 -        public Token getParent() {
   8.362 -            return parent;
   8.363 -        }
   8.364 -
   8.365 -        public boolean hasParentOfType(final Token.Type parentType) {
   8.366 -            for (Token p = getParent(); p != null; p = p.getParent()) {
   8.367 -                if (p.getType() == parentType) {
   8.368 -                    return true;
   8.369 -                }
   8.370 -            }
   8.371 -            return false;
   8.372 -        }
   8.373 -
   8.374 -        public boolean hasChildOfType(final Token.Type childType) {
   8.375 -            for (final Iterator<Token> iter = iterator() ; iter.hasNext() ; ) {
   8.376 -                if (iter.next().getType() == childType) {
   8.377 -                    return true;
   8.378 -                }
   8.379 -            }
   8.380 -            return false;
   8.381 -        }
   8.382 -
   8.383 -        /**
   8.384 -         * Set the parent of this token
   8.385 -         * @param parent
   8.386 -         */
   8.387 -        private void setParent(final Token parent) {
   8.388 -            this.parent = parent;
   8.389 -        }
   8.390 -
   8.391 -        /**
   8.392 -         * Get the children of this token
   8.393 -         * @return an array of children, never null
   8.394 -         */
   8.395 -        public Object[] getChildren() {
   8.396 -            return children.toArray();
   8.397 -        }
   8.398 -
   8.399 -        /**
   8.400 -         * Reset this token, remove all children
   8.401 -         */
   8.402 -        public void reset() {
   8.403 -            children.clear();
   8.404 -        }
   8.405 -
   8.406 -        /**
   8.407 -         * Get a preorder token iterator with this token as root
   8.408 -         * @return an iterator
   8.409 -         */
   8.410 -        public Iterator<Token> iterator() {
   8.411 -            return new TokenIterator(this);
   8.412 -        }
   8.413 -
   8.414 -        /**
   8.415 -         * Get the type of this token
   8.416 -         * @return type
   8.417 -         */
   8.418 -        public Type getType() {
   8.419 -            return type;
   8.420 -        }
   8.421 -
   8.422 -        /**
   8.423 -         * Turn this token into Java regexp compatible text
   8.424 -         * @return part of a java regexp
   8.425 -         */
   8.426 -        @Override
   8.427 -        public String toString() {
   8.428 -            ToString t = toStringMap.get(getType());
   8.429 -            if (t == null) {
   8.430 -                t = DEFAULT_TOSTRING;
   8.431 -            }
   8.432 -            return t.toString(this);
   8.433 -        }
   8.434 -    }
   8.435 -
   8.436 -    /**
   8.437 -     * Constructor
   8.438 -     * @param string the JavaScript regexp to parse
   8.439 -     */
   8.440 -    private RegExpScanner(final String string) {
   8.441 -        super(string);
   8.442 -        sb = new StringBuilder(limit);
   8.443 -        reset(0);
   8.444 -        expected.put(']', 0);
   8.445 -        expected.put('}', 0);
   8.446 -    }
   8.447 -
   8.448 -    private void processForwardReferences() {
   8.449 -        if (neverMatches()) {
   8.450 -            return;
   8.451 -        }
   8.452 -
   8.453 -        for (final Map.Entry<Integer, Token> fwdRef : forwardReferences.entrySet()) {
   8.454 -            if (fwdRef.getKey().intValue() > caps.size()) {
   8.455 -                neverMatches = true;
   8.456 -                break;
   8.457 -            }
   8.458 -
   8.459 -            fwdRef.getValue().setIsDead(true);
   8.460 -        }
   8.461 -
   8.462 -        forwardReferences.clear();
   8.463 -    }
   8.464 -
   8.465 -    /**
   8.466 -     * Scan a JavaScript regexp string returning a Java safe regex string.
   8.467 -     *
   8.468 -     * @param string
   8.469 -     *            JavaScript regexp string.
   8.470 -     * @return Java safe regex string.
   8.471 -     */
   8.472 -    public static RegExpScanner scan(final String string) {
   8.473 -        final RegExpScanner scanner = new RegExpScanner(string);
   8.474 -
   8.475 -        Token pattern;
   8.476 -
   8.477 -        try {
   8.478 -            pattern = scanner.pattern();
   8.479 -        } catch (final Exception e) {
   8.480 -            throw new PatternSyntaxException(e.getMessage(), string, scanner.sb.length());
   8.481 -        }
   8.482 -
   8.483 -        scanner.processForwardReferences();
   8.484 -        if (scanner.neverMatches()) {
   8.485 -            return null; // never matches
   8.486 -        }
   8.487 -
   8.488 -        // go over the code and remove dead code
   8.489 -        final Iterator<Token> iter = pattern.iterator();
   8.490 -        while (iter.hasNext()) {
   8.491 -            final Token next = iter.next();
   8.492 -            if (next.getIsDead()) {
   8.493 -                next.getParent().remove(next);
   8.494 -            }
   8.495 -        }
   8.496 -
   8.497 -        // turn the pattern into a string, p, the java equivalent string for our js regexp
   8.498 -        final String p = pattern.toString();
   8.499 -        // if builder contains all tokens that were sent in, we know
   8.500 -        // we correctly parsed the entire JavaScript regexp without syntax errors
   8.501 -        if (!string.equals(scanner.getStringBuilder().toString())) {
   8.502 -            throw new PatternSyntaxException(string, p, p.length() + 1);
   8.503 -        }
   8.504 -
   8.505 -        scanner.javaPattern = p;
   8.506 -        return scanner;
   8.507 -     }
   8.508 -
   8.509 -    /**
   8.510 -     * Does this regexp ever match anything? Use of e.g. [], which is legal in JavaScript,
   8.511 -     * is an example where we never match
   8.512 -     *
   8.513 -     * @return boolean
   8.514 -     */
   8.515 -    private boolean neverMatches() {
   8.516 -        return neverMatches;
   8.517 -    }
   8.518 -
   8.519 -    /**
   8.520 -     * This is used to set better error messages that can be reused
   8.521 -     * in NativeRegExp for augmenting e.g. SyntaxErrors.
   8.522 -     *
   8.523 -     * @return an error message or null if no extra info
   8.524 -     */
   8.525 -    public String getErrorMessage() {
   8.526 -        return errorMessage;
   8.527 -    }
   8.528 -
   8.529 -    final StringBuilder getStringBuilder() {
   8.530 -        return sb;
   8.531 -    }
   8.532 -
   8.533 -    String getJavaPattern() {
   8.534 -        return javaPattern;
   8.535 -    }
   8.536 -
   8.537 -    BitVector getGroupsInNegativeLookahead() {
   8.538 -        BitVector vec = null;
   8.539 -        for (int i = 0; i < caps.size(); i++) {
   8.540 -            final Capture cap = caps.get(i);
   8.541 -            if (cap.getNegativeLookaheadLevel() > 0) {
   8.542 -                if (vec == null) {
   8.543 -                    vec = new BitVector(caps.size() + 1);
   8.544 -                }
   8.545 -                vec.set(i + 1);
   8.546 -            }
   8.547 -        }
   8.548 -        return vec;
   8.549 -    }
   8.550 -
   8.551 -    /**
   8.552 -     * Commit n characters to the builder and to a given token
   8.553 -     * @param token Uncommitted token.
   8.554 -     * @param n     Number of characters.
   8.555 -     * @return Committed token
   8.556 -     */
   8.557 -    private Token commit(final Token token, final int n) {
   8.558 -        final int startIn = position;
   8.559 -
   8.560 -        switch (n) {
   8.561 -        case 1:
   8.562 -            sb.append(ch0);
   8.563 -            skip(1);
   8.564 -            break;
   8.565 -        case 2:
   8.566 -            sb.append(ch0);
   8.567 -            sb.append(ch1);
   8.568 -            skip(2);
   8.569 -            break;
   8.570 -        case 3:
   8.571 -            sb.append(ch0);
   8.572 -            sb.append(ch1);
   8.573 -            sb.append(ch2);
   8.574 -            skip(3);
   8.575 -            break;
   8.576 -        default:
   8.577 -            assert false : "Should not reach here";
   8.578 -        }
   8.579 -
   8.580 -        if (token == null) {
   8.581 -            return null;
   8.582 -        }
   8.583 -
   8.584 -        return token.add(sb.substring(startIn, sb.length()));
   8.585 -    }
   8.586 -
   8.587 -    /**
   8.588 -     * Restart the buffers back at an earlier position.
   8.589 -     *
   8.590 -     * @param startIn
   8.591 -     *            Position in the input stream.
   8.592 -     * @param startOut
   8.593 -     *            Position in the output stream.
   8.594 -     */
   8.595 -    private void restart(final int startIn, final int startOut) {
   8.596 -        reset(startIn);
   8.597 -        sb.setLength(startOut);
   8.598 -    }
   8.599 -
   8.600 -    private void push(final char ch) {
   8.601 -        expected.put(ch, expected.get(ch) + 1);
   8.602 -    }
   8.603 -
   8.604 -    private void pop(final char ch) {
   8.605 -        expected.put(ch, Math.min(0, expected.get(ch) - 1));
   8.606 -    }
   8.607 -
   8.608 -    /*
   8.609 -     * Recursive descent tokenizer starts below.
   8.610 -     */
   8.611 -
   8.612 -    /*
   8.613 -     * Pattern ::
   8.614 -     *      Disjunction
   8.615 -     */
   8.616 -    private Token pattern() {
   8.617 -        final Token token = new Token(Token.Type.PATTERN);
   8.618 -
   8.619 -        final Token child = disjunction();
   8.620 -        return token.add(child);
   8.621 -    }
   8.622 -
   8.623 -    /*
   8.624 -     * Disjunction ::
   8.625 -     *      Alternative
   8.626 -     *      Alternative | Disjunction
   8.627 -     */
   8.628 -    private Token disjunction() {
   8.629 -        final Token token = new Token(Token.Type.DISJUNCTION);
   8.630 -
   8.631 -        while (true) {
   8.632 -            token.add(alternative());
   8.633 -
   8.634 -            if (ch0 == '|') {
   8.635 -                commit(token, 1);
   8.636 -            } else {
   8.637 -                break;
   8.638 -            }
   8.639 -        }
   8.640 -
   8.641 -        return token;
   8.642 -    }
   8.643 -
   8.644 -    /*
   8.645 -     * Alternative ::
   8.646 -     *      [empty]
   8.647 -     *      Alternative Term
   8.648 -     */
   8.649 -    private Token alternative() {
   8.650 -        final Token token = new Token(Token.Type.ALTERNATIVE);
   8.651 -
   8.652 -        Token child;
   8.653 -        while ((child = term()) != null) {
   8.654 -            token.add(child);
   8.655 -        }
   8.656 -
   8.657 -        return token;
   8.658 -    }
   8.659 -
   8.660 -    /*
   8.661 -     * Term ::
   8.662 -     *      Assertion
   8.663 -     *      Atom
   8.664 -     *      Atom Quantifier
   8.665 -     */
   8.666 -    private Token term() {
   8.667 -        final int startIn  = position;
   8.668 -        final int startOut = sb.length();
   8.669 -        final Token token  = new Token(Token.Type.TERM);
   8.670 -        Token child;
   8.671 -
   8.672 -        child = assertion();
   8.673 -        if (child != null) {
   8.674 -            return token.add(child);
   8.675 -        }
   8.676 -
   8.677 -        child = atom();
   8.678 -        if (child != null) {
   8.679 -            boolean emptyCharacterClass = false;
   8.680 -            if ("[]".equals(child.toString())) {
   8.681 -                emptyCharacterClass = true;
   8.682 -            }
   8.683 -
   8.684 -            token.add(child);
   8.685 -
   8.686 -            final Token quantifier = quantifier();
   8.687 -            if (quantifier != null) {
   8.688 -                token.add(quantifier);
   8.689 -            }
   8.690 -
   8.691 -            if (emptyCharacterClass) {
   8.692 -                if (quantifier == null) {
   8.693 -                    neverMatches = true; //never matches ever.
   8.694 -                } else {
   8.695 -                    //if we can get away with max zero, remove this entire token
   8.696 -                    final String qs = quantifier.toString();
   8.697 -                    if ("+".equals(qs) || "*".equals(qs) || qs.startsWith("{0,")) {
   8.698 -                        token.setIsDead(true);
   8.699 -                    }
   8.700 -                }
   8.701 -            }
   8.702 -
   8.703 -            return token;
   8.704 -        }
   8.705 -
   8.706 -        restart(startIn, startOut);
   8.707 -        return null;
   8.708 -    }
   8.709 -
   8.710 -    /*
   8.711 -     * Assertion ::
   8.712 -     *      ^
   8.713 -     *      $
   8.714 -     *      \b
   8.715 -     *      \B
   8.716 -     *      ( ? = Disjunction )
   8.717 -     *      ( ? ! Disjunction )
   8.718 -     */
   8.719 -    private Token assertion() {
   8.720 -        final int startIn  = position;
   8.721 -        final int startOut = sb.length();
   8.722 -        final Token token  = new Token(Token.Type.ASSERTION);
   8.723 -
   8.724 -        switch (ch0) {
   8.725 -        case '^':
   8.726 -        case '$':
   8.727 -            return commit(token, 1);
   8.728 -
   8.729 -        case '\\':
   8.730 -            if (ch1 == 'b' || ch1 == 'B') {
   8.731 -                return commit(token, 2);
   8.732 -            }
   8.733 -            break;
   8.734 -
   8.735 -        case '(':
   8.736 -            if (ch1 != '?') {
   8.737 -                break;
   8.738 -            }
   8.739 -            if (ch2 != '=' && ch2 != '!') {
   8.740 -                break;
   8.741 -            }
   8.742 -            final boolean isNegativeLookahead = (ch2 == '!');
   8.743 -            commit(token, 3);
   8.744 -
   8.745 -            if (isNegativeLookahead) {
   8.746 -                negativeLookaheadLevel++;
   8.747 -            }
   8.748 -            final Token disjunction = disjunction();
   8.749 -            if (isNegativeLookahead) {
   8.750 -                for (final Capture cap : caps) {
   8.751 -                    if (cap.getNegativeLookaheadLevel() >= negativeLookaheadLevel) {
   8.752 -                        cap.setDead();
   8.753 -                    }
   8.754 -                }
   8.755 -                negativeLookaheadLevel--;
   8.756 -            }
   8.757 -
   8.758 -            if (disjunction != null && ch0 == ')') {
   8.759 -                token.add(disjunction);
   8.760 -                return commit(token, 1);
   8.761 -            }
   8.762 -            break;
   8.763 -
   8.764 -        default:
   8.765 -            break;
   8.766 -        }
   8.767 -
   8.768 -        restart(startIn, startOut);
   8.769 -
   8.770 -        return null;
   8.771 -    }
   8.772 -
   8.773 -    /*
   8.774 -     * Quantifier ::
   8.775 -     *      QuantifierPrefix
   8.776 -     *      QuantifierPrefix ?
   8.777 -     */
   8.778 -    private Token quantifier() {
   8.779 -        final Token token = new Token(Token.Type.QUANTIFIER);
   8.780 -        final Token child = quantifierPrefix();
   8.781 -        if (child != null) {
   8.782 -            token.add(child);
   8.783 -            if (ch0 == '?') {
   8.784 -                commit(token, 1);
   8.785 -            }
   8.786 -            return token;
   8.787 -        }
   8.788 -        return null;
   8.789 -    }
   8.790 -
   8.791 -    /*
   8.792 -     * QuantifierPrefix ::
   8.793 -     *      *
   8.794 -     *      +
   8.795 -     *      ?
   8.796 -     *      { DecimalDigits }
   8.797 -     *      { DecimalDigits , }
   8.798 -     *      { DecimalDigits , DecimalDigits }
   8.799 -     */
   8.800 -    private Token quantifierPrefix() {
   8.801 -        final int startIn  = position;
   8.802 -        final int startOut = sb.length();
   8.803 -        final Token token  = new Token(Token.Type.QUANTIFIER_PREFIX);
   8.804 -
   8.805 -        switch (ch0) {
   8.806 -        case '*':
   8.807 -        case '+':
   8.808 -        case '?':
   8.809 -            return commit(token, 1);
   8.810 -
   8.811 -        case '{':
   8.812 -            commit(token, 1);
   8.813 -
   8.814 -            final Token child = decimalDigits();
   8.815 -            if (child == null) {
   8.816 -                break; // not a quantifier - back out
   8.817 -            }
   8.818 -            push('}');
   8.819 -            token.add(child);
   8.820 -
   8.821 -            if (ch0 == ',') {
   8.822 -                commit(token, 1);
   8.823 -                token.add(decimalDigits());
   8.824 -            }
   8.825 -
   8.826 -            if (ch0 == '}') {
   8.827 -                pop('}');
   8.828 -                commit(token, 1);
   8.829 -            }
   8.830 -
   8.831 -            return token;
   8.832 -
   8.833 -        default:
   8.834 -            break;
   8.835 -        }
   8.836 -
   8.837 -        restart(startIn, startOut);
   8.838 -        return null;
   8.839 -    }
   8.840 -
   8.841 -    /*
   8.842 -     * Atom ::
   8.843 -     *      PatternCharacter
   8.844 -     *      .
   8.845 -     *      \ AtomEscape
   8.846 -     *      CharacterClass
   8.847 -     *      ( Disjunction )
   8.848 -     *      ( ? : Disjunction )
   8.849 -     *
   8.850 -     */
   8.851 -    private Token atom() {
   8.852 -        final int startIn  = position;
   8.853 -        final int startOut = sb.length();
   8.854 -        final Token token  = new Token(Token.Type.ATOM);
   8.855 -        Token child;
   8.856 -
   8.857 -        child = patternCharacter();
   8.858 -        if (child != null) {
   8.859 -            return token.add(child);
   8.860 -        }
   8.861 -
   8.862 -        if (ch0 == '.') {
   8.863 -            return commit(token, 1);
   8.864 -        }
   8.865 -
   8.866 -        if (ch0 == '\\') {
   8.867 -            commit(token, 1);
   8.868 -            child = atomEscape();
   8.869 -
   8.870 -            if (child != null) {
   8.871 -                if (child.hasChildOfType(Token.Type.IDENTITY_ESCAPE)) {
   8.872 -                    final char idEscape = child.toString().charAt(0);
   8.873 -                    if (NON_IDENT_ESCAPES.indexOf(idEscape) == -1) {
   8.874 -                        token.reset();
   8.875 -                    }
   8.876 -                }
   8.877 -
   8.878 -                token.add(child);
   8.879 -
   8.880 -                // forward backreferences always match empty. JavaScript != Java
   8.881 -                if (child.hasChildOfType(Token.Type.DECIMAL_ESCAPE) && !"\u0000".equals(child.toString())) {
   8.882 -                    final int refNum = Integer.parseInt(child.toString());
   8.883 -
   8.884 -                    if (refNum - 1 < caps.size() && caps.get(refNum - 1).isDead()) {
   8.885 -                        // reference to dead in-negative-lookahead capture
   8.886 -                        token.setIsDead(true);
   8.887 -                    } else if (caps.size() < refNum) {
   8.888 -                        // forward reference: always matches against empty string (dead token).
   8.889 -                        // invalid reference (non-existant capture): pattern never matches.
   8.890 -                        forwardReferences.put(refNum, token);
   8.891 -                    }
   8.892 -                }
   8.893 -
   8.894 -                return token;
   8.895 -            }
   8.896 -        }
   8.897 -
   8.898 -        child = characterClass();
   8.899 -        if (child != null) {
   8.900 -            return token.add(child);
   8.901 -        }
   8.902 -
   8.903 -        if (ch0 == '(') {
   8.904 -            boolean capturingParens = true;
   8.905 -            commit(token, 1);
   8.906 -            if (ch0 == '?' && ch1 == ':') {
   8.907 -                capturingParens = false;
   8.908 -                commit(token, 2);
   8.909 -            }
   8.910 -
   8.911 -            child = disjunction();
   8.912 -            if (child != null) {
   8.913 -                token.add(child);
   8.914 -                if (ch0 == ')') {
   8.915 -                    final Token atom = commit(token, 1);
   8.916 -                    if (capturingParens) {
   8.917 -                        caps.add(new Capture(negativeLookaheadLevel));
   8.918 -                    }
   8.919 -                    return atom;
   8.920 -                }
   8.921 -            }
   8.922 -        }
   8.923 -
   8.924 -        restart(startIn, startOut);
   8.925 -        return null;
   8.926 -    }
   8.927 -
   8.928 -    /*
   8.929 -     * PatternCharacter ::
   8.930 -     *      SourceCharacter but not any of: ^$\.*+?()[]{}|
   8.931 -     */
   8.932 -    @SuppressWarnings("fallthrough")
   8.933 -    private Token patternCharacter() {
   8.934 -        if (atEOF()) {
   8.935 -            return null;
   8.936 -        }
   8.937 -
   8.938 -        switch (ch0) {
   8.939 -        case '^':
   8.940 -        case '$':
   8.941 -        case '\\':
   8.942 -        case '.':
   8.943 -        case '*':
   8.944 -        case '+':
   8.945 -        case '?':
   8.946 -        case '(':
   8.947 -        case ')':
   8.948 -        case '[':
   8.949 -        case '|':
   8.950 -            return null;
   8.951 -
   8.952 -        case '}':
   8.953 -        case ']':
   8.954 -            final int n = expected.get(ch0);
   8.955 -            if (n != 0) {
   8.956 -                return null;
   8.957 -            }
   8.958 -
   8.959 -       case '{':
   8.960 -           // if not a valid quantifier escape curly brace to match itself
   8.961 -           // this ensures compatibility with other JS implementations
   8.962 -           final Token quant = quantifierPrefix();
   8.963 -           return (quant == null) ? commit(new Token(Token.Type.PATTERN_CHARACTER).add("\\"), 1) : null;
   8.964 -
   8.965 -        default:
   8.966 -            return commit(new Token(Token.Type.PATTERN_CHARACTER), 1); // SOURCECHARACTER
   8.967 -        }
   8.968 -    }
   8.969 -
   8.970 -    /*
   8.971 -     * AtomEscape ::
   8.972 -     *      DecimalEscape
   8.973 -     *      CharacterEscape
   8.974 -     *      CharacterClassEscape
   8.975 -     */
   8.976 -    private Token atomEscape() {
   8.977 -        final Token token = new Token(Token.Type.ATOM_ESCAPE);
   8.978 -        Token child;
   8.979 -
   8.980 -        child = decimalEscape();
   8.981 -        if (child != null) {
   8.982 -            return token.add(child);
   8.983 -        }
   8.984 -
   8.985 -        child = characterClassEscape();
   8.986 -        if (child != null) {
   8.987 -            return token.add(child);
   8.988 -        }
   8.989 -
   8.990 -        child = characterEscape();
   8.991 -        if (child != null) {
   8.992 -            return token.add(child);
   8.993 -        }
   8.994 -
   8.995 -
   8.996 -        return null;
   8.997 -    }
   8.998 -
   8.999 -    /*
  8.1000 -     * CharacterEscape ::
  8.1001 -     *      ControlEscape
  8.1002 -     *      c ControlLetter
  8.1003 -     *      HexEscapeSequence
  8.1004 -     *      UnicodeEscapeSequence
  8.1005 -     *      IdentityEscape
  8.1006 -     */
  8.1007 -    private Token characterEscape() {
  8.1008 -        final int startIn  = position;
  8.1009 -        final int startOut = sb.length();
  8.1010 -
  8.1011 -        final Token token = new Token(Token.Type.CHARACTER_ESCAPE);
  8.1012 -        Token child;
  8.1013 -
  8.1014 -        child = controlEscape();
  8.1015 -        if (child != null) {
  8.1016 -            return token.add(child);
  8.1017 -        }
  8.1018 -
  8.1019 -        if (ch0 == 'c') {
  8.1020 -            commit(token, 1);
  8.1021 -            child = controlLetter();
  8.1022 -            if (child != null) {
  8.1023 -                return token.add(child);
  8.1024 -            }
  8.1025 -            restart(startIn, startOut);
  8.1026 -        }
  8.1027 -
  8.1028 -        child = hexEscapeSequence();
  8.1029 -        if (child != null) {
  8.1030 -            return token.add(child);
  8.1031 -        }
  8.1032 -
  8.1033 -        child = unicodeEscapeSequence();
  8.1034 -        if (child != null) {
  8.1035 -            return token.add(child);
  8.1036 -        }
  8.1037 -
  8.1038 -        child = identityEscape();
  8.1039 -        if (child != null) {
  8.1040 -            return token.add(child);
  8.1041 -        }
  8.1042 -
  8.1043 -        restart(startIn, startOut);
  8.1044 -
  8.1045 -        return null;
  8.1046 -    }
  8.1047 -
  8.1048 -    private boolean scanEscapeSequence(final char leader, final int length, final Token token) {
  8.1049 -        final int startIn  = position;
  8.1050 -        final int startOut = sb.length();
  8.1051 -
  8.1052 -        if (ch0 != leader) {
  8.1053 -            return false;
  8.1054 -        }
  8.1055 -
  8.1056 -        commit(token, 1);
  8.1057 -        for (int i = 0; i < length; i++) {
  8.1058 -            final char ch0l = Character.toLowerCase(ch0);
  8.1059 -            if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {
  8.1060 -                commit(token, 1);
  8.1061 -            } else {
  8.1062 -                restart(startIn, startOut);
  8.1063 -                return false;
  8.1064 -            }
  8.1065 -        }
  8.1066 -
  8.1067 -        return true;
  8.1068 -    }
  8.1069 -
  8.1070 -    private Token hexEscapeSequence() {
  8.1071 -        final Token token = new Token(Token.Type.HEX_ESCAPESEQUENCE);
  8.1072 -        if (scanEscapeSequence('x', 2, token)) {
  8.1073 -            return token;
  8.1074 -        }
  8.1075 -        return null;
  8.1076 -    }
  8.1077 -
  8.1078 -    private Token unicodeEscapeSequence() {
  8.1079 -        final Token token = new Token(Token.Type.UNICODE_ESCAPESEQUENCE);
  8.1080 -        if (scanEscapeSequence('u', 4, token)) {
  8.1081 -            return token;
  8.1082 -        }
  8.1083 -        return null;
  8.1084 -    }
  8.1085 -
  8.1086 -    /*
  8.1087 -     * ControlEscape ::
  8.1088 -     *      one of fnrtv
  8.1089 -     */
  8.1090 -    private Token controlEscape() {
  8.1091 -        switch (ch0) {
  8.1092 -        case 'f':
  8.1093 -        case 'n':
  8.1094 -        case 'r':
  8.1095 -        case 't':
  8.1096 -        case 'v':
  8.1097 -            return commit(new Token(Token.Type.CONTROL_ESCAPE), 1);
  8.1098 -
  8.1099 -        default:
  8.1100 -            return null;
  8.1101 -        }
  8.1102 -    }
  8.1103 -
  8.1104 -    /*
  8.1105 -     * ControlLetter ::
  8.1106 -     *      one of abcdefghijklmnopqrstuvwxyz
  8.1107 -     *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
  8.1108 -     */
  8.1109 -    private Token controlLetter() {
  8.1110 -        final char c = Character.toUpperCase(ch0);
  8.1111 -        if (c >= 'A' && c <= 'Z') {
  8.1112 -            final Token token = new Token(Token.Type.CONTROL_LETTER);
  8.1113 -            commit(token, 1);
  8.1114 -            return token;
  8.1115 -        }
  8.1116 -        return null;
  8.1117 -        /*
  8.1118 -        Token token = new Token(Token.Type.CONTROL_LETTER);
  8.1119 -        commit(null, 1);//add original char to builder not to token
  8.1120 -        this.neverMatches = c < 'A' || c > 'Z';
  8.1121 -        return token.add(""+c);*/
  8.1122 -    }
  8.1123 -
  8.1124 -    /*
  8.1125 -     * IdentityEscape ::
  8.1126 -     *      SourceCharacter but not IdentifierPart
  8.1127 -     *      <ZWJ>  (200c)
  8.1128 -     *      <ZWNJ> (200d)
  8.1129 -     */
  8.1130 -    private Token identityEscape() {
  8.1131 -        final Token token = new Token(Token.Type.IDENTITY_ESCAPE);
  8.1132 -        commit(token, 1);
  8.1133 -        return token;
  8.1134 -    }
  8.1135 -
  8.1136 -    /*
  8.1137 -     * DecimalEscape ::
  8.1138 -     *      DecimalIntegerLiteral [lookahead DecimalDigit]
  8.1139 -     */
  8.1140 -    private Token decimalEscape() {
  8.1141 -        final Token token = new Token(Token.Type.DECIMAL_ESCAPE);
  8.1142 -        final int startIn  = position;
  8.1143 -        final int startOut = sb.length();
  8.1144 -
  8.1145 -        if (ch0 == '0' && !isDecimalDigit(ch1)) {
  8.1146 -            commit(token, 1);
  8.1147 -            token.removeLast();
  8.1148 -            //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);
  8.1149 -            return token.add("\u0000");
  8.1150 -        }
  8.1151 -
  8.1152 -        if (isDecimalDigit(ch0)) {
  8.1153 -            while (isDecimalDigit(ch0)) {
  8.1154 -                commit(token, 1);
  8.1155 -            }
  8.1156 -            return token;
  8.1157 -        }
  8.1158 -
  8.1159 -        restart(startIn, startOut);
  8.1160 -
  8.1161 -        return null;
  8.1162 -    }
  8.1163 -
  8.1164 -    /*
  8.1165 -     * CharacterClassEscape ::
  8.1166 -     *  one of dDsSwW
  8.1167 -     */
  8.1168 -    private Token characterClassEscape() {
  8.1169 -        switch (ch0) {
  8.1170 -        case 's':
  8.1171 -        case 'S':
  8.1172 -        case 'd':
  8.1173 -        case 'D':
  8.1174 -        case 'w':
  8.1175 -        case 'W':
  8.1176 -            return commit(new Token(Token.Type.CHARACTERCLASS_ESCAPE), 1);
  8.1177 -
  8.1178 -        default:
  8.1179 -            return null;
  8.1180 -        }
  8.1181 -    }
  8.1182 -
  8.1183 -    /*
  8.1184 -     * CharacterClass ::
  8.1185 -     *      [ [lookahead {^}] ClassRanges ]
  8.1186 -     *      [ ^ ClassRanges ]
  8.1187 -     */
  8.1188 -    private Token characterClass() {
  8.1189 -        final int startIn  = position;
  8.1190 -        final int startOut = sb.length();
  8.1191 -        final Token token  = new Token(Token.Type.CHARACTERCLASS);
  8.1192 -
  8.1193 -        if (ch0 == '[') {
  8.1194 -            push(']');
  8.1195 -            commit(token, 1);
  8.1196 -
  8.1197 -            if (ch0 == '^') {
  8.1198 -                commit(token, 1);
  8.1199 -            }
  8.1200 -
  8.1201 -            final Token child = classRanges();
  8.1202 -            if (child != null && ch0 == ']') {
  8.1203 -                pop(']');
  8.1204 -                token.add(child);
  8.1205 -                return commit(token, 1);
  8.1206 -            }
  8.1207 -        }
  8.1208 -
  8.1209 -        restart(startIn, startOut);
  8.1210 -        return null;
  8.1211 -    }
  8.1212 -
  8.1213 -    /*
  8.1214 -     * ClassRanges ::
  8.1215 -     *      [empty]
  8.1216 -     *      NonemptyClassRanges
  8.1217 -     */
  8.1218 -    private Token classRanges() {
  8.1219 -        return new Token(Token.Type.CLASSRANGES).add(nonemptyClassRanges());
  8.1220 -    }
  8.1221 -
  8.1222 -    /*
  8.1223 -     * NonemptyClassRanges ::
  8.1224 -     *      ClassAtom
  8.1225 -     *      ClassAtom NonemptyClassRangesNoDash
  8.1226 -     *      ClassAtom - ClassAtom ClassRanges
  8.1227 -     */
  8.1228 -    private Token nonemptyClassRanges() {
  8.1229 -        final int startIn  = position;
  8.1230 -        final int startOut = sb.length();
  8.1231 -        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES);
  8.1232 -        Token child;
  8.1233 -
  8.1234 -        child = classAtom();
  8.1235 -        if (child != null) {
  8.1236 -            token.add(child);
  8.1237 -
  8.1238 -            if (ch0 == '-') {
  8.1239 -                commit(token, 1);
  8.1240 -
  8.1241 -                final Token child1 = classAtom();
  8.1242 -                final Token child2 = classRanges();
  8.1243 -                if (child1 != null && child2 != null) {
  8.1244 -                    token.add(child1);
  8.1245 -                    token.add(child2);
  8.1246 -
  8.1247 -                    return token;
  8.1248 -                }
  8.1249 -            }
  8.1250 -
  8.1251 -            child = nonemptyClassRangesNoDash();
  8.1252 -            if (child != null) {
  8.1253 -                token.add(child);
  8.1254 -                return token;
  8.1255 -            }
  8.1256 -
  8.1257 -            return token;
  8.1258 -        }
  8.1259 -
  8.1260 -        restart(startIn, startOut);
  8.1261 -        return null;
  8.1262 -    }
  8.1263 -
  8.1264 -    /*
  8.1265 -     * NonemptyClassRangesNoDash ::
  8.1266 -     *      ClassAtom
  8.1267 -     *      ClassAtomNoDash NonemptyClassRangesNoDash
  8.1268 -     *      ClassAtomNoDash - ClassAtom ClassRanges
  8.1269 -     */
  8.1270 -    private Token nonemptyClassRangesNoDash() {
  8.1271 -        final int startIn  = position;
  8.1272 -        final int startOut = sb.length();
  8.1273 -        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES_NODASH);
  8.1274 -        Token child;
  8.1275 -
  8.1276 -        child = classAtomNoDash();
  8.1277 -        if (child != null) {
  8.1278 -            token.add(child);
  8.1279 -
  8.1280 -            // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom
  8.1281 -            if (ch0 == '-') {
  8.1282 -               commit(token, 1);
  8.1283 -
  8.1284 -               final Token child1 = classAtom();
  8.1285 -               final Token child2 = classRanges();
  8.1286 -               if (child1 != null && child2 != null) {
  8.1287 -                   token.add(child1);
  8.1288 -                   return token.add(child2);
  8.1289 -               }
  8.1290 -               //fallthru
  8.1291 -           }
  8.1292 -
  8.1293 -            child = nonemptyClassRangesNoDash();
  8.1294 -            if (child != null) {
  8.1295 -                token.add(child);
  8.1296 -            }
  8.1297 -            return token; // still a class atom
  8.1298 -        }
  8.1299 -
  8.1300 -        child = classAtom();
  8.1301 -        if (child != null) {
  8.1302 -            return token.add(child);
  8.1303 -        }
  8.1304 -
  8.1305 -        restart(startIn, startOut);
  8.1306 -        return null;
  8.1307 -    }
  8.1308 -
  8.1309 -    /*
  8.1310 -     * ClassAtom : - ClassAtomNoDash
  8.1311 -     */
  8.1312 -    private Token classAtom() {
  8.1313 -        final Token token = new Token(Token.Type.CLASSATOM);
  8.1314 -
  8.1315 -        if (ch0 == '-') {
  8.1316 -            return commit(token, 1);
  8.1317 -        }
  8.1318 -
  8.1319 -        final Token child = classAtomNoDash();
  8.1320 -        if (child != null) {
  8.1321 -            return token.add(child);
  8.1322 -        }
  8.1323 -
  8.1324 -        return null;
  8.1325 -    }
  8.1326 -
  8.1327 -    /*
  8.1328 -     * ClassAtomNoDash ::
  8.1329 -     *      SourceCharacter but not one of \ or ] or -
  8.1330 -     *      \ ClassEscape
  8.1331 -     */
  8.1332 -    private Token classAtomNoDash() {
  8.1333 -        final int startIn  = position;
  8.1334 -        final int startOut = sb.length();
  8.1335 -        final Token token  = new Token(Token.Type.CLASSATOM_NODASH);
  8.1336 -
  8.1337 -        switch (ch0) {
  8.1338 -        case ']':
  8.1339 -        case '-':
  8.1340 -        case '\0':
  8.1341 -            return null;
  8.1342 -
  8.1343 -        case '[':
  8.1344 -            // unescaped left square bracket - add escape
  8.1345 -            return commit(token.add("\\"), 1);
  8.1346 -
  8.1347 -        case '\\':
  8.1348 -            commit(token, 1);
  8.1349 -            final Token child = classEscape();
  8.1350 -            if (child != null) {
  8.1351 -                return token.add(child);
  8.1352 -            }
  8.1353 -
  8.1354 -            restart(startIn, startOut);
  8.1355 -            return null;
  8.1356 -
  8.1357 -        default:
  8.1358 -            return commit(token, 1);
  8.1359 -        }
  8.1360 -    }
  8.1361 -
  8.1362 -    /*
  8.1363 -     * ClassEscape ::
  8.1364 -     *      DecimalEscape
  8.1365 -     *      b
  8.1366 -     *      CharacterEscape
  8.1367 -     *      CharacterClassEscape
  8.1368 -     */
  8.1369 -    private Token classEscape() {
  8.1370 -        final Token token = new Token(Token.Type.CLASS_ESCAPE);
  8.1371 -        Token child;
  8.1372 -
  8.1373 -        child = decimalEscape();
  8.1374 -        if (child != null) {
  8.1375 -            return token.add(child);
  8.1376 -        }
  8.1377 -
  8.1378 -        if (ch0 == 'b') {
  8.1379 -            return commit(token, 1);
  8.1380 -        }
  8.1381 -
  8.1382 -        child = characterEscape();
  8.1383 -        if (child != null) {
  8.1384 -            return token.add(child);
  8.1385 -        }
  8.1386 -
  8.1387 -        child = characterClassEscape();
  8.1388 -        if (child != null) {
  8.1389 -            return token.add(child);
  8.1390 -        }
  8.1391 -
  8.1392 -        return null;
  8.1393 -    }
  8.1394 -
  8.1395 -    /*
  8.1396 -     * DecimalDigits
  8.1397 -     */
  8.1398 -    private Token decimalDigits() {
  8.1399 -        if (!isDecimalDigit(ch0)) {
  8.1400 -            return null;
  8.1401 -        }
  8.1402 -
  8.1403 -        final Token token = new Token(Token.Type.DECIMALDIGITS);
  8.1404 -        while (isDecimalDigit(ch0)) {
  8.1405 -            commit(token, 1);
  8.1406 -        }
  8.1407 -
  8.1408 -        return token;
  8.1409 -    }
  8.1410 -
  8.1411 -    private static boolean isDecimalDigit(final char ch) {
  8.1412 -        return ch >= '0' && ch <= '9';
  8.1413 -    }
  8.1414 -}
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/DefaultRegExp.java	Fri Feb 22 16:31:10 2013 +0100
     9.3 @@ -0,0 +1,163 @@
     9.4 +/*
     9.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     9.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     9.7 + *
     9.8 + * This code is free software; you can redistribute it and/or modify it
     9.9 + * under the terms of the GNU General Public License version 2 only, as
    9.10 + * published by the Free Software Foundation.  Oracle designates this
    9.11 + * particular file as subject to the "Classpath" exception as provided
    9.12 + * by Oracle in the LICENSE file that accompanied this code.
    9.13 + *
    9.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    9.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    9.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    9.17 + * version 2 for more details (a copy is included in the LICENSE file that
    9.18 + * accompanied this code).
    9.19 + *
    9.20 + * You should have received a copy of the GNU General Public License version
    9.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    9.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    9.23 + *
    9.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    9.25 + * or visit www.oracle.com if you need additional information or have any
    9.26 + * questions.
    9.27 + */
    9.28 +
    9.29 +package jdk.nashorn.internal.runtime.regexp;
    9.30 +
    9.31 +import jdk.nashorn.internal.runtime.ParserException;
    9.32 +
    9.33 +import static java.util.regex.Pattern.CASE_INSENSITIVE;
    9.34 +import static java.util.regex.Pattern.MULTILINE;
    9.35 +import static java.util.regex.Pattern.UNICODE_CASE;
    9.36 +
    9.37 +import java.util.regex.Matcher;
    9.38 +import java.util.regex.Pattern;
    9.39 +import java.util.regex.PatternSyntaxException;
    9.40 +
    9.41 +/**
    9.42 + * Default regular expression implementation based on java.util.regex package.
    9.43 + *
    9.44 + * Note that this class is not thread-safe as it stores the current match result
    9.45 + * and the string being matched in instance fields.
    9.46 + */
    9.47 +public class DefaultRegExp extends RegExp {
    9.48 +
    9.49 +    /** Java regexp pattern to use for match. We compile to one of these */
    9.50 +    private Pattern pattern;
    9.51 +
    9.52 +    /** The matcher */
    9.53 +    private RegExpMatcher matcher;
    9.54 +
    9.55 +    /**
    9.56 +     * Construct a Regular expression from the given {@code source} and {@code flags} strings.
    9.57 +     *
    9.58 +     * @param source RegExp source string
    9.59 +     * @param flags RegExp flag string
    9.60 +     * @throws ParserException if flags is invalid or source string has syntax error.
    9.61 +     */
    9.62 +    public DefaultRegExp(final String source, final String flags) throws ParserException {
    9.63 +        super(source, flags);
    9.64 +
    9.65 +        int intFlags = 0;
    9.66 +
    9.67 +        if (isIgnoreCase()) {
    9.68 +            intFlags |= CASE_INSENSITIVE | UNICODE_CASE;
    9.69 +        }
    9.70 +        if (isMultiline()) {
    9.71 +            intFlags |= MULTILINE;
    9.72 +        }
    9.73 +
    9.74 +        try {
    9.75 +            RegExpScanner parsed;
    9.76 +
    9.77 +            try {
    9.78 +                parsed = RegExpScanner.scan(source);
    9.79 +            } catch (final PatternSyntaxException e) {
    9.80 +                // refine the exception with a better syntax error, if this
    9.81 +                // passes, just rethrow what we have
    9.82 +                Pattern.compile(source, intFlags);
    9.83 +                throw e;
    9.84 +            }
    9.85 +
    9.86 +            if (parsed != null) {
    9.87 +                this.pattern = Pattern.compile(parsed.getJavaPattern(), intFlags);
    9.88 +                this.groupsInNegativeLookahead = parsed.getGroupsInNegativeLookahead();
    9.89 +            }
    9.90 +        } catch (final PatternSyntaxException e2) {
    9.91 +            throwParserException("syntax", e2.getMessage());
    9.92 +        }
    9.93 +    }
    9.94 +
    9.95 +    @Override
    9.96 +    public RegExpMatcher match(final String str) {
    9.97 +        if (pattern == null) {
    9.98 +            return null; // never matches or similar, e.g. a[]
    9.99 +        }
   9.100 +
   9.101 +        RegExpMatcher matcher = this.matcher;
   9.102 +
   9.103 +        if (matcher == null || matcher.getInput() != str) {
   9.104 +            matcher = new DefaultMatcher(str);
   9.105 +            this.matcher = matcher;
   9.106 +        }
   9.107 +
   9.108 +        return matcher;
   9.109 +    }
   9.110 +
   9.111 +    class DefaultMatcher implements RegExpMatcher {
   9.112 +        final String input;
   9.113 +        final Matcher matcher;
   9.114 +
   9.115 +        DefaultMatcher(final String input) {
   9.116 +            this.input = input;
   9.117 +            this.matcher = pattern.matcher(input);
   9.118 +        }
   9.119 +
   9.120 +        @Override
   9.121 +        public boolean search(final int start) {
   9.122 +            return matcher.find(start);
   9.123 +        }
   9.124 +
   9.125 +        @Override
   9.126 +        public String getInput() {
   9.127 +            return input;
   9.128 +        }
   9.129 +
   9.130 +        @Override
   9.131 +        public int start() {
   9.132 +            return matcher.start();
   9.133 +        }
   9.134 +
   9.135 +        @Override
   9.136 +        public int start(final int group) {
   9.137 +            return matcher.start(group);
   9.138 +        }
   9.139 +
   9.140 +        @Override
   9.141 +        public int end() {
   9.142 +            return matcher.end();
   9.143 +        }
   9.144 +
   9.145 +        @Override
   9.146 +        public int end(final int group) {
   9.147 +            return matcher.end(group);
   9.148 +        }
   9.149 +
   9.150 +        @Override
   9.151 +        public String group() {
   9.152 +            return matcher.group();
   9.153 +        }
   9.154 +
   9.155 +        @Override
   9.156 +        public String group(final int group) {
   9.157 +            return matcher.group(group);
   9.158 +        }
   9.159 +
   9.160 +        @Override
   9.161 +        public int groupCount() {
   9.162 +            return matcher.groupCount();
   9.163 +        }
   9.164 +    }
   9.165 +
   9.166 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExp.java	Fri Feb 22 16:31:10 2013 +0100
    10.3 @@ -0,0 +1,164 @@
    10.4 +/*
    10.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
    10.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    10.7 + *
    10.8 + * This code is free software; you can redistribute it and/or modify it
    10.9 + * under the terms of the GNU General Public License version 2 only, as
   10.10 + * published by the Free Software Foundation.  Oracle designates this
   10.11 + * particular file as subject to the "Classpath" exception as provided
   10.12 + * by Oracle in the LICENSE file that accompanied this code.
   10.13 + *
   10.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
   10.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   10.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   10.17 + * version 2 for more details (a copy is included in the LICENSE file that
   10.18 + * accompanied this code).
   10.19 + *
   10.20 + * You should have received a copy of the GNU General Public License version
   10.21 + * 2 along with this work; if not, write to the Free Software Foundation,
   10.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   10.23 + *
   10.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   10.25 + * or visit www.oracle.com if you need additional information or have any
   10.26 + * questions.
   10.27 + */
   10.28 +
   10.29 +package jdk.nashorn.internal.runtime.regexp;
   10.30 +
   10.31 +import jdk.nashorn.internal.runtime.BitVector;
   10.32 +import jdk.nashorn.internal.runtime.ECMAErrors;
   10.33 +import jdk.nashorn.internal.runtime.ParserException;
   10.34 +
   10.35 +import java.util.regex.MatchResult;
   10.36 +
   10.37 +/**
   10.38 + * This is the base class for representing a parsed regular expression.
   10.39 + *
   10.40 + * Instances of this class are created by a {@link RegExpFactory}.
   10.41 + */
   10.42 +public abstract class RegExp {
   10.43 +
   10.44 +    /** Pattern string. */
   10.45 +    private final String source;
   10.46 +
   10.47 +    /** Global search flag for this regexp.*/
   10.48 +    private boolean global;
   10.49 +
   10.50 +    /** Case insensitive flag for this regexp */
   10.51 +    private boolean ignoreCase;
   10.52 +
   10.53 +    /** Multi-line flag for this regexp */
   10.54 +    private boolean multiline;
   10.55 +
   10.56 +    /** BitVector that keeps track of groups in negative lookahead */
   10.57 +    protected BitVector groupsInNegativeLookahead;
   10.58 +
   10.59 +    /**
   10.60 +     * Constructor.
   10.61 +     *
   10.62 +     * @param source the source string
   10.63 +     * @param flags the flags string
   10.64 +     */
   10.65 +    protected RegExp(final String source, final String flags) {
   10.66 +        this.source = source;
   10.67 +        for (int i = 0; i < flags.length(); i++) {
   10.68 +            final char ch = flags.charAt(i);
   10.69 +            switch (ch) {
   10.70 +            case 'g':
   10.71 +                if (this.global) {
   10.72 +                    throwParserException("repeated.flag", "g");
   10.73 +                }
   10.74 +                this.global = true;
   10.75 +                break;
   10.76 +            case 'i':
   10.77 +                if (this.ignoreCase) {
   10.78 +                    throwParserException("repeated.flag", "i");
   10.79 +                }
   10.80 +                this.ignoreCase = true;
   10.81 +                break;
   10.82 +            case 'm':
   10.83 +                if (this.multiline) {
   10.84 +                    throwParserException("repeated.flag", "m");
   10.85 +                }
   10.86 +                this.multiline = true;
   10.87 +                break;
   10.88 +            default:
   10.89 +                throwParserException("unsupported.flag", Character.toString(ch));
   10.90 +            }
   10.91 +        }
   10.92 +    }
   10.93 +
   10.94 +    /**
   10.95 +     * Get the source pattern of this regular expression.
   10.96 +     *
   10.97 +     * @return the source string
   10.98 +     */
   10.99 +    public String getSource() {
  10.100 +        return source;
  10.101 +    }
  10.102 +
  10.103 +    /**
  10.104 +     * Set the global flag of this regular expression to {@code global}.
  10.105 +     *
  10.106 +     * @param global the new global flag
  10.107 +     */
  10.108 +    public void setGlobal(final boolean global) {
  10.109 +        this.global = global;
  10.110 +    }
  10.111 +
  10.112 +    /**
  10.113 +     * Get the global flag of this regular expression.
  10.114 +     *
  10.115 +     * @return the global flag
  10.116 +     */
  10.117 +    public boolean isGlobal() {
  10.118 +        return global;
  10.119 +    }
  10.120 +
  10.121 +    /**
  10.122 +     * Get the ignore-case flag of this regular expression.
  10.123 +     *
  10.124 +     * @return the ignore-case flag
  10.125 +     */
  10.126 +    public boolean isIgnoreCase() {
  10.127 +        return ignoreCase;
  10.128 +    }
  10.129 +
  10.130 +    /**
  10.131 +     * Get the multiline flag of this regular expression.
  10.132 +     *
  10.133 +     * @return the multiline flag
  10.134 +     */
  10.135 +    public boolean isMultiline() {
  10.136 +        return multiline;
  10.137 +    }
  10.138 +
  10.139 +    /**
  10.140 +     * Get a bitset indicating which of the groups in this regular expression are inside a negative lookahead.
  10.141 +     *
  10.142 +     * @return the groups-in-negative-lookahead bitset
  10.143 +     */
  10.144 +    public BitVector getGroupsInNegativeLookahead() {
  10.145 +        return groupsInNegativeLookahead;
  10.146 +    }
  10.147 +
  10.148 +    /**
  10.149 +     * Match this regular expression against {@code str}, starting at index {@code start}
  10.150 +     * and return a {@link MatchResult} with the result.
  10.151 +     *
  10.152 +     * @param str the string
  10.153 +     * @return the matcher
  10.154 +     */
  10.155 +    public abstract RegExpMatcher match(String str);
  10.156 +
  10.157 +    /**
  10.158 +     * Throw a regexp parser exception.
  10.159 +     *
  10.160 +     * @param key the message key
  10.161 +     * @param str string argument
  10.162 +     * @throws jdk.nashorn.internal.runtime.ParserException
  10.163 +     */
  10.164 +    protected static void throwParserException(final String key, final String str) throws ParserException {
  10.165 +        throw new ParserException(ECMAErrors.getMessage("parser.error.regex." + key, str));
  10.166 +    }
  10.167 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpFactory.java	Fri Feb 22 16:31:10 2013 +0100
    11.3 @@ -0,0 +1,103 @@
    11.4 +/*
    11.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
    11.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    11.7 + *
    11.8 + * This code is free software; you can redistribute it and/or modify it
    11.9 + * under the terms of the GNU General Public License version 2 only, as
   11.10 + * published by the Free Software Foundation.  Oracle designates this
   11.11 + * particular file as subject to the "Classpath" exception as provided
   11.12 + * by Oracle in the LICENSE file that accompanied this code.
   11.13 + *
   11.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
   11.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   11.17 + * version 2 for more details (a copy is included in the LICENSE file that
   11.18 + * accompanied this code).
   11.19 + *
   11.20 + * You should have received a copy of the GNU General Public License version
   11.21 + * 2 along with this work; if not, write to the Free Software Foundation,
   11.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   11.23 + *
   11.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   11.25 + * or visit www.oracle.com if you need additional information or have any
   11.26 + * questions.
   11.27 + */
   11.28 +
   11.29 +package jdk.nashorn.internal.runtime.regexp;
   11.30 +
   11.31 +import jdk.nashorn.internal.parser.Lexer;
   11.32 +import jdk.nashorn.internal.runtime.ParserException;
   11.33 +
   11.34 +/**
   11.35 + * Factory class for regular expressions. This class creates instances of {@link DefaultRegExp}.
   11.36 + */
   11.37 +public class RegExpFactory {
   11.38 +
   11.39 +
   11.40 +    private final static RegExpFactory instance = new RegExpFactory();
   11.41 +
   11.42 +    /**
   11.43 +     * Creates a Regular expression from the given {@code pattern} and {@code flags} strings.
   11.44 +     *
   11.45 +     * @param pattern RegExp pattern string
   11.46 +     * @param flags RegExp flags string
   11.47 +     * @throws ParserException if flags is invalid or pattern string has syntax error.
   11.48 +     */
   11.49 +    protected RegExp compile(final String pattern, final String flags) throws ParserException {
   11.50 +        return new DefaultRegExp(pattern, flags);
   11.51 +    }
   11.52 +
   11.53 +    /**
   11.54 +     * Replace a regexp token as suitable for regexp instances created by this factory.
   11.55 +     *
   11.56 +     * @param str a regular expression token
   11.57 +     * @return the replacement token
   11.58 +     */
   11.59 +    protected String replaceToken(final String str) {
   11.60 +        switch (str) {
   11.61 +            case "\\s":
   11.62 +                return "[" + Lexer.getWhitespaceRegExp() + "]";
   11.63 +            case "\\S":
   11.64 +                return "[^" + Lexer.getWhitespaceRegExp() + "]";
   11.65 +            case "[^]":
   11.66 +                return "[\\s\\S]";
   11.67 +            default:
   11.68 +                return str;
   11.69 +        }
   11.70 +    }
   11.71 +
   11.72 +    /**
   11.73 +     * Compile a regexp with the given {@code source} and {@code flags}.
   11.74 +     *
   11.75 +     * @param pattern RegExp pattern string
   11.76 +     * @param flags  flag string
   11.77 +     *
   11.78 +     * @throws ParserException if invalid source or flags
   11.79 +     */
   11.80 +    public static RegExp create(final String pattern, final String flags) {
   11.81 +        return instance.compile(pattern,  flags);
   11.82 +    }
   11.83 +
   11.84 +    /**
   11.85 +     * Replace a regexp token as needed by the currently installed factory instance.
   11.86 +     *
   11.87 +     * @param token a regexp token
   11.88 +     * @return the replacement token
   11.89 +     */
   11.90 +    public static String replace(final String token) {
   11.91 +        return instance.replaceToken(token);
   11.92 +    }
   11.93 +
   11.94 +    /**
   11.95 +     * Validate a regexp with the given {@code source} and {@code flags}.
   11.96 +     *
   11.97 +     * @param pattern RegExp pattern string
   11.98 +     * @param flags  flag string
   11.99 +     *
  11.100 +     * @throws ParserException if invalid source or flags
  11.101 +     */
  11.102 +    // @SuppressWarnings({"unused"})
  11.103 +    public static void validate(final String pattern, final String flags) throws ParserException {
  11.104 +        instance.compile(pattern, flags);
  11.105 +    }
  11.106 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpMatcher.java	Fri Feb 22 16:31:10 2013 +0100
    12.3 @@ -0,0 +1,51 @@
    12.4 +/*
    12.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
    12.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    12.7 + *
    12.8 + * This code is free software; you can redistribute it and/or modify it
    12.9 + * under the terms of the GNU General Public License version 2 only, as
   12.10 + * published by the Free Software Foundation.  Oracle designates this
   12.11 + * particular file as subject to the "Classpath" exception as provided
   12.12 + * by Oracle in the LICENSE file that accompanied this code.
   12.13 + *
   12.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
   12.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   12.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12.17 + * version 2 for more details (a copy is included in the LICENSE file that
   12.18 + * accompanied this code).
   12.19 + *
   12.20 + * You should have received a copy of the GNU General Public License version
   12.21 + * 2 along with this work; if not, write to the Free Software Foundation,
   12.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   12.23 + *
   12.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   12.25 + * or visit www.oracle.com if you need additional information or have any
   12.26 + * questions.
   12.27 + */
   12.28 +
   12.29 +package jdk.nashorn.internal.runtime.regexp;
   12.30 +
   12.31 +import java.util.regex.MatchResult;
   12.32 +
   12.33 +/**
   12.34 + * Interface for matching a regular expression against a string and retrieving the
   12.35 + * match result. Extends {@link MatchResult}.
   12.36 + */
   12.37 +public interface RegExpMatcher extends MatchResult {
   12.38 +
   12.39 +    /**
   12.40 +     * Searches for pattern starting at {@code start}. Returns {@code true} if a match was found.
   12.41 +     *
   12.42 +     * @param start the start index in the input string
   12.43 +     * @return {@code true} if a match was found
   12.44 +     */
   12.45 +    boolean search(int start);
   12.46 +
   12.47 +    /**
   12.48 +     * Get the input string.
   12.49 +     *
   12.50 +     * @return the input string
   12.51 +     */
   12.52 +    String getInput();
   12.53 +
   12.54 +}
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpResult.java	Fri Feb 22 16:31:10 2013 +0100
    13.3 @@ -0,0 +1,98 @@
    13.4 +/*
    13.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
    13.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    13.7 + *
    13.8 + * This code is free software; you can redistribute it and/or modify it
    13.9 + * under the terms of the GNU General Public License version 2 only, as
   13.10 + * published by the Free Software Foundation.  Oracle designates this
   13.11 + * particular file as subject to the "Classpath" exception as provided
   13.12 + * by Oracle in the LICENSE file that accompanied this code.
   13.13 + *
   13.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
   13.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   13.17 + * version 2 for more details (a copy is included in the LICENSE file that
   13.18 + * accompanied this code).
   13.19 + *
   13.20 + * You should have received a copy of the GNU General Public License version
   13.21 + * 2 along with this work; if not, write to the Free Software Foundation,
   13.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   13.23 + *
   13.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   13.25 + * or visit www.oracle.com if you need additional information or have any
   13.26 + * questions.
   13.27 + */
   13.28 +
   13.29 +package jdk.nashorn.internal.runtime.regexp;
   13.30 +
   13.31 +/**
   13.32 + * Match tuple to keep track of ongoing regexp match.
   13.33 + */
   13.34 +public final class RegExpResult {
   13.35 +    final Object[] groups;
   13.36 +    final int      index;
   13.37 +    final String   input;
   13.38 +
   13.39 +    /**
   13.40 +     * Constructor
   13.41 +     *
   13.42 +     * @param input  regexp input
   13.43 +     * @param index  index of match
   13.44 +     * @param groups groups vector
   13.45 +     */
   13.46 +    public RegExpResult(final String input, final int index, final Object[] groups) {
   13.47 +        this.input  = input;
   13.48 +        this.index  = index;
   13.49 +        this.groups = groups;
   13.50 +    }
   13.51 +
   13.52 +    /**
   13.53 +     * Get the groups for the match
   13.54 +     * @return group vector
   13.55 +     */
   13.56 +    public Object[] getGroups() {
   13.57 +        return groups;
   13.58 +    }
   13.59 +
   13.60 +    /**
   13.61 +     * Get the input for the map
   13.62 +     * @return input
   13.63 +     */
   13.64 +    public String getInput() {
   13.65 +        return input;
   13.66 +    }
   13.67 +
   13.68 +    /**
   13.69 +     * Get the index for the match
   13.70 +     * @return index
   13.71 +     */
   13.72 +    public int getIndex() {
   13.73 +        return index;
   13.74 +    }
   13.75 +
   13.76 +    /**
   13.77 +     * Get the length of the match
   13.78 +     * @return length
   13.79 +     */
   13.80 +    public int length() {
   13.81 +        return ((String)groups[0]).length();
   13.82 +    }
   13.83 +
   13.84 +    /**
   13.85 +     * Get the group with the given index or the empty string if group index is not valid.
   13.86 +     * @param index the group index
   13.87 +     * @return the group or ""
   13.88 +     */
   13.89 +    public Object getGroup(int index) {
   13.90 +        return index >= 0 && index < groups.length ? groups[index] : "";
   13.91 +    }
   13.92 +
   13.93 +    /**
   13.94 +     * Get the last parenthesis group, or the empty string if none exists.
   13.95 +     * @return the last group or ""
   13.96 +     */
   13.97 +    public Object getLastParen() {
   13.98 +        return groups.length > 1 ? groups[groups.length - 1] : "";
   13.99 +    }
  13.100 +
  13.101 +}
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/src/jdk/nashorn/internal/runtime/regexp/RegExpScanner.java	Fri Feb 22 16:31:10 2013 +0100
    14.3 @@ -0,0 +1,1391 @@
    14.4 +/*
    14.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
    14.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    14.7 + *
    14.8 + * This code is free software; you can redistribute it and/or modify it
    14.9 + * under the terms of the GNU General Public License version 2 only, as
   14.10 + * published by the Free Software Foundation.  Oracle designates this
   14.11 + * particular file as subject to the "Classpath" exception as provided
   14.12 + * by Oracle in the LICENSE file that accompanied this code.
   14.13 + *
   14.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
   14.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   14.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14.17 + * version 2 for more details (a copy is included in the LICENSE file that
   14.18 + * accompanied this code).
   14.19 + *
   14.20 + * You should have received a copy of the GNU General Public License version
   14.21 + * 2 along with this work; if not, write to the Free Software Foundation,
   14.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   14.23 + *
   14.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   14.25 + * or visit www.oracle.com if you need additional information or have any
   14.26 + * questions.
   14.27 + */
   14.28 +
   14.29 +package jdk.nashorn.internal.runtime.regexp;
   14.30 +
   14.31 +import java.util.ArrayList;
   14.32 +import java.util.HashMap;
   14.33 +import java.util.Iterator;
   14.34 +import java.util.LinkedHashMap;
   14.35 +import java.util.LinkedList;
   14.36 +import java.util.List;
   14.37 +import java.util.Map;
   14.38 +import java.util.regex.PatternSyntaxException;
   14.39 +
   14.40 +import jdk.nashorn.internal.parser.Scanner;
   14.41 +import jdk.nashorn.internal.runtime.BitVector;
   14.42 +
   14.43 +/**
   14.44 + * Scan a JavaScript regexp, converting to Java regex if necessary.
   14.45 + *
   14.46 + */
   14.47 +final class RegExpScanner extends Scanner {
   14.48 +
   14.49 +    /**
   14.50 +     * String builder to accumulate the result - this contains verbatim parsed JavaScript.
   14.51 +     * to get the java equivalent we need to create a Pattern token and return its toString()
   14.52 +     */
   14.53 +    private final StringBuilder sb;
   14.54 +
   14.55 +    /** Is this the special case of a regexp that never matches anything */
   14.56 +    private boolean neverMatches;
   14.57 +
   14.58 +    /** The resulting java.util.regex pattern string. */
   14.59 +    private String javaPattern;
   14.60 +
   14.61 +    /** Expected token table */
   14.62 +    private final Map<Character, Integer> expected = new HashMap<>();
   14.63 +
   14.64 +    /** Capturing parenthesis that have been found so far. */
   14.65 +    private final List<Capture> caps = new LinkedList<>();
   14.66 +
   14.67 +    /** Forward references to capturing parenthesis to be resolved later.*/
   14.68 +    private final Map<Integer, Token> forwardReferences = new LinkedHashMap<>();
   14.69 +
   14.70 +    /** Current level of zero-width negative lookahead assertions. */
   14.71 +    private int negativeLookaheadLevel;
   14.72 +
   14.73 +    private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?";
   14.74 +
   14.75 +    private static class Capture {
   14.76 +        /**
   14.77 +         * Zero-width negative lookaheads enclosing the capture.
   14.78 +         */
   14.79 +        private final int negativeLookaheadLevel;
   14.80 +        /**
   14.81 +         * Captures that live inside a negative lookahead are dead after the
   14.82 +         * lookahead and will be undefined if referenced from outside.
   14.83 +         */
   14.84 +        private boolean isDead;
   14.85 +
   14.86 +        Capture(final int negativeLookaheadLevel) {
   14.87 +            this.negativeLookaheadLevel = negativeLookaheadLevel;
   14.88 +        }
   14.89 +
   14.90 +        public int getNegativeLookaheadLevel() {
   14.91 +            return negativeLookaheadLevel;
   14.92 +        }
   14.93 +
   14.94 +        public boolean isDead() {
   14.95 +            return isDead;
   14.96 +        }
   14.97 +
   14.98 +        public void setDead() {
   14.99 +            this.isDead = true;
  14.100 +        }
  14.101 +    }
  14.102 +
  14.103 +    /**
  14.104 +     * This is a token - the JavaScript regexp is scanned into a token tree
  14.105 +     * A token has other tokens as children as well as "atoms", i.e. Strings.
  14.106 +     */
  14.107 +    private static class Token {
  14.108 +
  14.109 +        private enum Type {
  14.110 +            PATTERN,
  14.111 +            DISJUNCTION,
  14.112 +            ALTERNATIVE,
  14.113 +            TERM,
  14.114 +            ASSERTION,
  14.115 +            QUANTIFIER,
  14.116 +            QUANTIFIER_PREFIX,
  14.117 +            ATOM,
  14.118 +            PATTERN_CHARACTER,
  14.119 +            ATOM_ESCAPE,
  14.120 +            CHARACTER_ESCAPE,
  14.121 +            CONTROL_ESCAPE,
  14.122 +            CONTROL_LETTER,
  14.123 +            IDENTITY_ESCAPE,
  14.124 +            DECIMAL_ESCAPE,
  14.125 +            CHARACTERCLASS_ESCAPE,
  14.126 +            CHARACTERCLASS,
  14.127 +            CLASSRANGES,
  14.128 +            NON_EMPTY_CLASSRANGES,
  14.129 +            NON_EMPTY_CLASSRANGES_NODASH,
  14.130 +            CLASSATOM,
  14.131 +            CLASSATOM_NODASH,
  14.132 +            CLASS_ESCAPE,
  14.133 +            DECIMALDIGITS,
  14.134 +            HEX_ESCAPESEQUENCE,
  14.135 +            UNICODE_ESCAPESEQUENCE,
  14.136 +        }
  14.137 +
  14.138 +        /**
  14.139 +         * Token tyoe
  14.140 +         */
  14.141 +        private final Token.Type type;
  14.142 +
  14.143 +        /**
  14.144 +         * Child nodes
  14.145 +         */
  14.146 +        private final List<Object> children;
  14.147 +
  14.148 +        /**
  14.149 +         * Parent node
  14.150 +         */
  14.151 +        private Token parent;
  14.152 +
  14.153 +        /**
  14.154 +         * Dead code flag
  14.155 +         */
  14.156 +        private boolean isDead;
  14.157 +
  14.158 +        private static final Map<Type, ToString> toStringMap = new HashMap<>();
  14.159 +        private static final ToString DEFAULT_TOSTRING = new ToString();
  14.160 +
  14.161 +        private static String unicode(final int value) {
  14.162 +            final StringBuilder sb = new StringBuilder();
  14.163 +            final String hex = Integer.toHexString(value);
  14.164 +            sb.append('u');
  14.165 +            for (int i = 0; i < 4 - hex.length(); i++) {
  14.166 +                sb.append('0');
  14.167 +            }
  14.168 +            sb.append(hex);
  14.169 +
  14.170 +            return sb.toString();
  14.171 +        }
  14.172 +
  14.173 +        static {
  14.174 +            toStringMap.put(Type.CHARACTERCLASS, new ToString() {
  14.175 +                @Override
  14.176 +                public String toString(final Token token) {
  14.177 +                    return super.toString(token).replace("\\b", "\b");
  14.178 +                }
  14.179 +            });
  14.180 +
  14.181 +            // for some reason java regexps don't like control characters on the
  14.182 +            // form "\\ca".match([string with ascii 1 at char0]). Translating
  14.183 +            // them to unicode does it though.
  14.184 +            toStringMap.put(Type.CHARACTER_ESCAPE, new ToString() {
  14.185 +                @Override
  14.186 +                public String toString(final Token token) {
  14.187 +                    final String str = super.toString(token);
  14.188 +                    if (str.length() == 2) {
  14.189 +                        return Token.unicode(Character.toLowerCase(str.charAt(1)) - 'a' + 1);
  14.190 +                    }
  14.191 +                    return str;
  14.192 +                }
  14.193 +            });
  14.194 +
  14.195 +            toStringMap.put(Type.DECIMAL_ESCAPE, new ToString() {
  14.196 +                @Override
  14.197 +                public String toString(final Token token) {
  14.198 +                    final String str = super.toString(token);
  14.199 +
  14.200 +                    if ("\0".equals(str)) {
  14.201 +                        return str;
  14.202 +                    }
  14.203 +
  14.204 +                    int value;
  14.205 +
  14.206 +                    if (!token.hasParentOfType(Type.CLASSRANGES)) {
  14.207 +                        return str;
  14.208 +                    }
  14.209 +
  14.210 +                    value = Integer.parseInt(str, 8); //throws exception that leads to SyntaxError if not octal
  14.211 +                    if (value > 0xff) {
  14.212 +                        throw new NumberFormatException(str);
  14.213 +                    }
  14.214 +
  14.215 +                    return Token.unicode(value);
  14.216 +                }
  14.217 +            });
  14.218 +
  14.219 +        }
  14.220 +
  14.221 +        /**
  14.222 +         * JavaScript Token to Java regex substring framework.
  14.223 +         */
  14.224 +        private static class ToString {
  14.225 +            String toString(final Token token) {
  14.226 +                final Object[] children = token.getChildren();
  14.227 +
  14.228 +                // Allow the installed regexp factory to perform global substitutions.
  14.229 +                switch (children.length) {
  14.230 +                    case 0:
  14.231 +                        return "";
  14.232 +                    case 1:
  14.233 +                        return RegExpFactory.replace(children[0].toString());
  14.234 +                    default:
  14.235 +                        final StringBuilder sb = new StringBuilder();
  14.236 +                        for (final Object child : children) {
  14.237 +                            sb.append(child);
  14.238 +                        }
  14.239 +                        return RegExpFactory.replace(sb.toString());
  14.240 +                }
  14.241 +            }
  14.242 +        }
  14.243 +
  14.244 +        /**
  14.245 +         * Token iterator. Doesn't return "atom" children. i.e. string representations,
  14.246 +         * just tokens
  14.247 +         *
  14.248 +         */
  14.249 +        private static class TokenIterator implements Iterator<Token> {
  14.250 +            private final List<Token> preorder;
  14.251 +
  14.252 +            private void init(final Token root) {
  14.253 +                preorder.add(root);
  14.254 +                for (final Object child : root.getChildren()) {
  14.255 +                    if (child instanceof Token) {
  14.256 +                        init((Token)child);
  14.257 +                    }
  14.258 +                }
  14.259 +            }
  14.260 +
  14.261 +            TokenIterator(final Token root) {
  14.262 +                preorder = new ArrayList<>();
  14.263 +                init(root);
  14.264 +            }
  14.265 +
  14.266 +            @Override
  14.267 +            public boolean hasNext() {
  14.268 +                return !preorder.isEmpty();
  14.269 +            }
  14.270 +
  14.271 +            @Override
  14.272 +            public Token next() {
  14.273 +                return preorder.remove(0);
  14.274 +            }
  14.275 +
  14.276 +            @Override
  14.277 +            public void remove() {
  14.278 +                next();
  14.279 +            }
  14.280 +        }
  14.281 +
  14.282 +        /**
  14.283 +         * Constructor
  14.284 +         * @param type the token type
  14.285 +         */
  14.286 +        Token(final Token.Type type) {
  14.287 +            this.type = type;
  14.288 +            children = new ArrayList<>();
  14.289 +        }
  14.290 +
  14.291 +        /**
  14.292 +         * Add a an "atom" child to a token
  14.293 +         * @param child the child to add
  14.294 +         * @return the token (for chaining)
  14.295 +         */
  14.296 +        public Token add(final String child) {
  14.297 +            children.add(child);
  14.298 +            return this;
  14.299 +        }
  14.300 +
  14.301 +        /**
  14.302 +         * Add a child to a token
  14.303 +         * @param child the child
  14.304 +         * @return the token (for chaining)
  14.305 +         */
  14.306 +        public Token add(final Token child) {
  14.307 +            if (child != null) {
  14.308 +                children.add(child);
  14.309 +                child.setParent(this);
  14.310 +            }
  14.311 +            return this;
  14.312 +        }
  14.313 +
  14.314 +        /**
  14.315 +         * Remove a child from a token
  14.316 +         * @param child the child to remove
  14.317 +         * @return true if successful
  14.318 +         */
  14.319 +        public boolean remove(final Token child) {
  14.320 +            return children.remove(child);
  14.321 +        }
  14.322 +
  14.323 +        /**
  14.324 +         * Remove the last child from a token
  14.325 +         * @return the removed child
  14.326 +         */
  14.327 +        public Object removeLast() {
  14.328 +            return children.remove(children.size() - 1);
  14.329 +        }
  14.330 +
  14.331 +        /**
  14.332 +         * Flag this token as dead code
  14.333 +         * @param isDead is it dead or not
  14.334 +         */
  14.335 +        private void setIsDead(final boolean isDead) {
  14.336 +            this.isDead = isDead;
  14.337 +        }
  14.338 +
  14.339 +        /**
  14.340 +         * Is this token dead code
  14.341 +         * @return boolean
  14.342 +         */
  14.343 +        private boolean getIsDead() {
  14.344 +            return isDead;
  14.345 +        }
  14.346 +
  14.347 +        /**
  14.348 +         * Get the parent of this token
  14.349 +         * @return parent token
  14.350 +         */
  14.351 +        public Token getParent() {
  14.352 +            return parent;
  14.353 +        }
  14.354 +
  14.355 +        public boolean hasParentOfType(final Token.Type parentType) {
  14.356 +            for (Token p = getParent(); p != null; p = p.getParent()) {
  14.357 +                if (p.getType() == parentType) {
  14.358 +                    return true;
  14.359 +                }
  14.360 +            }
  14.361 +            return false;
  14.362 +        }
  14.363 +
  14.364 +        public boolean hasChildOfType(final Token.Type childType) {
  14.365 +            for (final Iterator<Token> iter = iterator() ; iter.hasNext() ; ) {
  14.366 +                if (iter.next().getType() == childType) {
  14.367 +                    return true;
  14.368 +                }
  14.369 +            }
  14.370 +            return false;
  14.371 +        }
  14.372 +
  14.373 +        /**
  14.374 +         * Set the parent of this token
  14.375 +         * @param parent
  14.376 +         */
  14.377 +        private void setParent(final Token parent) {
  14.378 +            this.parent = parent;
  14.379 +        }
  14.380 +
  14.381 +        /**
  14.382 +         * Get the children of this token
  14.383 +         * @return an array of children, never null
  14.384 +         */
  14.385 +        public Object[] getChildren() {
  14.386 +            return children.toArray();
  14.387 +        }
  14.388 +
  14.389 +        /**
  14.390 +         * Reset this token, remove all children
  14.391 +         */
  14.392 +        public void reset() {
  14.393 +            children.clear();
  14.394 +        }
  14.395 +
  14.396 +        /**
  14.397 +         * Get a preorder token iterator with this token as root
  14.398 +         * @return an iterator
  14.399 +         */
  14.400 +        public Iterator<Token> iterator() {
  14.401 +            return new TokenIterator(this);
  14.402 +        }
  14.403 +
  14.404 +        /**
  14.405 +         * Get the type of this token
  14.406 +         * @return type
  14.407 +         */
  14.408 +        public Type getType() {
  14.409 +            return type;
  14.410 +        }
  14.411 +
  14.412 +        /**
  14.413 +         * Turn this token into Java regexp compatible text
  14.414 +         * @return part of a java regexp
  14.415 +         */
  14.416 +        @Override
  14.417 +        public String toString() {
  14.418 +            ToString t = toStringMap.get(getType());
  14.419 +            if (t == null) {
  14.420 +                t = DEFAULT_TOSTRING;
  14.421 +            }
  14.422 +            return t.toString(this);
  14.423 +        }
  14.424 +    }
  14.425 +
  14.426 +    /**
  14.427 +     * Constructor
  14.428 +     * @param string the JavaScript regexp to parse
  14.429 +     */
  14.430 +    private RegExpScanner(final String string) {
  14.431 +        super(string);
  14.432 +        sb = new StringBuilder(limit);
  14.433 +        reset(0);
  14.434 +        expected.put(']', 0);
  14.435 +        expected.put('}', 0);
  14.436 +    }
  14.437 +
  14.438 +    private void processForwardReferences() {
  14.439 +        if (neverMatches()) {
  14.440 +            return;
  14.441 +        }
  14.442 +
  14.443 +        for (final Map.Entry<Integer, Token> fwdRef : forwardReferences.entrySet()) {
  14.444 +            if (fwdRef.getKey().intValue() > caps.size()) {
  14.445 +                neverMatches = true;
  14.446 +                break;
  14.447 +            }
  14.448 +
  14.449 +            fwdRef.getValue().setIsDead(true);
  14.450 +        }
  14.451 +
  14.452 +        forwardReferences.clear();
  14.453 +    }
  14.454 +
  14.455 +    /**
  14.456 +     * Scan a JavaScript regexp string returning a Java safe regex string.
  14.457 +     *
  14.458 +     * @param string
  14.459 +     *            JavaScript regexp string.
  14.460 +     * @return Java safe regex string.
  14.461 +     */
  14.462 +    public static RegExpScanner scan(final String string) {
  14.463 +        final RegExpScanner scanner = new RegExpScanner(string);
  14.464 +
  14.465 +        Token pattern;
  14.466 +
  14.467 +        try {
  14.468 +            pattern = scanner.pattern();
  14.469 +        } catch (final Exception e) {
  14.470 +            throw new PatternSyntaxException(e.getMessage(), string, scanner.sb.length());
  14.471 +        }
  14.472 +
  14.473 +        scanner.processForwardReferences();
  14.474 +        if (scanner.neverMatches()) {
  14.475 +            return null; // never matches
  14.476 +        }
  14.477 +
  14.478 +        // go over the code and remove dead code
  14.479 +        final Iterator<Token> iter = pattern.iterator();
  14.480 +        while (iter.hasNext()) {
  14.481 +            final Token next = iter.next();
  14.482 +            if (next.getIsDead()) {
  14.483 +                next.getParent().remove(next);
  14.484 +            }
  14.485 +        }
  14.486 +
  14.487 +        // turn the pattern into a string, p, the java equivalent string for our js regexp
  14.488 +        final String p = pattern.toString();
  14.489 +        // if builder contains all tokens that were sent in, we know
  14.490 +        // we correctly parsed the entire JavaScript regexp without syntax errors
  14.491 +        if (!string.equals(scanner.getStringBuilder().toString())) {
  14.492 +            throw new PatternSyntaxException(string, p, p.length() + 1);
  14.493 +        }
  14.494 +
  14.495 +        scanner.javaPattern = p;
  14.496 +        return scanner;
  14.497 +     }
  14.498 +
  14.499 +    /**
  14.500 +     * Does this regexp ever match anything? Use of e.g. [], which is legal in JavaScript,
  14.501 +     * is an example where we never match
  14.502 +     *
  14.503 +     * @return boolean
  14.504 +     */
  14.505 +    private boolean neverMatches() {
  14.506 +        return neverMatches;
  14.507 +    }
  14.508 +
  14.509 +    final StringBuilder getStringBuilder() {
  14.510 +        return sb;
  14.511 +    }
  14.512 +
  14.513 +    String getJavaPattern() {
  14.514 +        return javaPattern;
  14.515 +    }
  14.516 +
  14.517 +    BitVector getGroupsInNegativeLookahead() {
  14.518 +        BitVector vec = null;
  14.519 +        for (int i = 0; i < caps.size(); i++) {
  14.520 +            final Capture cap = caps.get(i);
  14.521 +            if (cap.getNegativeLookaheadLevel() > 0) {
  14.522 +                if (vec == null) {
  14.523 +                    vec = new BitVector(caps.size() + 1);
  14.524 +                }
  14.525 +                vec.set(i + 1);
  14.526 +            }
  14.527 +        }
  14.528 +        return vec;
  14.529 +    }
  14.530 +
  14.531 +    /**
  14.532 +     * Commit n characters to the builder and to a given token
  14.533 +     * @param token Uncommitted token.
  14.534 +     * @param n     Number of characters.
  14.535 +     * @return Committed token
  14.536 +     */
  14.537 +    private Token commit(final Token token, final int n) {
  14.538 +        final int startIn = position;
  14.539 +
  14.540 +        switch (n) {
  14.541 +        case 1:
  14.542 +            sb.append(ch0);
  14.543 +            skip(1);
  14.544 +            break;
  14.545 +        case 2:
  14.546 +            sb.append(ch0);
  14.547 +            sb.append(ch1);
  14.548 +            skip(2);
  14.549 +            break;
  14.550 +        case 3:
  14.551 +            sb.append(ch0);
  14.552 +            sb.append(ch1);
  14.553 +            sb.append(ch2);
  14.554 +            skip(3);
  14.555 +            break;
  14.556 +        default:
  14.557 +            assert false : "Should not reach here";
  14.558 +        }
  14.559 +
  14.560 +        if (token == null) {
  14.561 +            return null;
  14.562 +        }
  14.563 +
  14.564 +        return token.add(sb.substring(startIn, sb.length()));
  14.565 +    }
  14.566 +
  14.567 +    /**
  14.568 +     * Restart the buffers back at an earlier position.
  14.569 +     *
  14.570 +     * @param startIn
  14.571 +     *            Position in the input stream.
  14.572 +     * @param startOut
  14.573 +     *            Position in the output stream.
  14.574 +     */
  14.575 +    private void restart(final int startIn, final int startOut) {
  14.576 +        reset(startIn);
  14.577 +        sb.setLength(startOut);
  14.578 +    }
  14.579 +
  14.580 +    private void push(final char ch) {
  14.581 +        expected.put(ch, expected.get(ch) + 1);
  14.582 +    }
  14.583 +
  14.584 +    private void pop(final char ch) {
  14.585 +        expected.put(ch, Math.min(0, expected.get(ch) - 1));
  14.586 +    }
  14.587 +
  14.588 +    /*
  14.589 +     * Recursive descent tokenizer starts below.
  14.590 +     */
  14.591 +
  14.592 +    /*
  14.593 +     * Pattern ::
  14.594 +     *      Disjunction
  14.595 +     */
  14.596 +    private Token pattern() {
  14.597 +        final Token token = new Token(Token.Type.PATTERN);
  14.598 +
  14.599 +        final Token child = disjunction();
  14.600 +        return token.add(child);
  14.601 +    }
  14.602 +
  14.603 +    /*
  14.604 +     * Disjunction ::
  14.605 +     *      Alternative
  14.606 +     *      Alternative | Disjunction
  14.607 +     */
  14.608 +    private Token disjunction() {
  14.609 +        final Token token = new Token(Token.Type.DISJUNCTION);
  14.610 +
  14.611 +        while (true) {
  14.612 +            token.add(alternative());
  14.613 +
  14.614 +            if (ch0 == '|') {
  14.615 +                commit(token, 1);
  14.616 +            } else {
  14.617 +                break;
  14.618 +            }
  14.619 +        }
  14.620 +
  14.621 +        return token;
  14.622 +    }
  14.623 +
  14.624 +    /*
  14.625 +     * Alternative ::
  14.626 +     *      [empty]
  14.627 +     *      Alternative Term
  14.628 +     */
  14.629 +    private Token alternative() {
  14.630 +        final Token token = new Token(Token.Type.ALTERNATIVE);
  14.631 +
  14.632 +        Token child;
  14.633 +        while ((child = term()) != null) {
  14.634 +            token.add(child);
  14.635 +        }
  14.636 +
  14.637 +        return token;
  14.638 +    }
  14.639 +
  14.640 +    /*
  14.641 +     * Term ::
  14.642 +     *      Assertion
  14.643 +     *      Atom
  14.644 +     *      Atom Quantifier
  14.645 +     */
  14.646 +    private Token term() {
  14.647 +        final int startIn  = position;
  14.648 +        final int startOut = sb.length();
  14.649 +        final Token token  = new Token(Token.Type.TERM);
  14.650 +        Token child;
  14.651 +
  14.652 +        child = assertion();
  14.653 +        if (child != null) {
  14.654 +            return token.add(child);
  14.655 +        }
  14.656 +
  14.657 +        child = atom();
  14.658 +        if (child != null) {
  14.659 +            boolean emptyCharacterClass = false;
  14.660 +            if ("[]".equals(child.toString())) {
  14.661 +                emptyCharacterClass = true;
  14.662 +            }
  14.663 +
  14.664 +            token.add(child);
  14.665 +
  14.666 +            final Token quantifier = quantifier();
  14.667 +            if (quantifier != null) {
  14.668 +                token.add(quantifier);
  14.669 +            }
  14.670 +
  14.671 +            if (emptyCharacterClass) {
  14.672 +                if (quantifier == null) {
  14.673 +                    neverMatches = true; //never matches ever.
  14.674 +                } else {
  14.675 +                    //if we can get away with max zero, remove this entire token
  14.676 +                    final String qs = quantifier.toString();
  14.677 +                    if ("+".equals(qs) || "*".equals(qs) || qs.startsWith("{0,")) {
  14.678 +                        token.setIsDead(true);
  14.679 +                    }
  14.680 +                }
  14.681 +            }
  14.682 +
  14.683 +            return token;
  14.684 +        }
  14.685 +
  14.686 +        restart(startIn, startOut);
  14.687 +        return null;
  14.688 +    }
  14.689 +
  14.690 +    /*
  14.691 +     * Assertion ::
  14.692 +     *      ^
  14.693 +     *      $
  14.694 +     *      \b
  14.695 +     *      \B
  14.696 +     *      ( ? = Disjunction )
  14.697 +     *      ( ? ! Disjunction )
  14.698 +     */
  14.699 +    private Token assertion() {
  14.700 +        final int startIn  = position;
  14.701 +        final int startOut = sb.length();
  14.702 +        final Token token  = new Token(Token.Type.ASSERTION);
  14.703 +
  14.704 +        switch (ch0) {
  14.705 +        case '^':
  14.706 +        case '$':
  14.707 +            return commit(token, 1);
  14.708 +
  14.709 +        case '\\':
  14.710 +            if (ch1 == 'b' || ch1 == 'B') {
  14.711 +                return commit(token, 2);
  14.712 +            }
  14.713 +            break;
  14.714 +
  14.715 +        case '(':
  14.716 +            if (ch1 != '?') {
  14.717 +                break;
  14.718 +            }
  14.719 +            if (ch2 != '=' && ch2 != '!') {
  14.720 +                break;
  14.721 +            }
  14.722 +            final boolean isNegativeLookahead = (ch2 == '!');
  14.723 +            commit(token, 3);
  14.724 +
  14.725 +            if (isNegativeLookahead) {
  14.726 +                negativeLookaheadLevel++;
  14.727 +            }
  14.728 +            final Token disjunction = disjunction();
  14.729 +            if (isNegativeLookahead) {
  14.730 +                for (final Capture cap : caps) {
  14.731 +                    if (cap.getNegativeLookaheadLevel() >= negativeLookaheadLevel) {
  14.732 +                        cap.setDead();
  14.733 +                    }
  14.734 +                }
  14.735 +                negativeLookaheadLevel--;
  14.736 +            }
  14.737 +
  14.738 +            if (disjunction != null && ch0 == ')') {
  14.739 +                token.add(disjunction);
  14.740 +                return commit(token, 1);
  14.741 +            }
  14.742 +            break;
  14.743 +
  14.744 +        default:
  14.745 +            break;
  14.746 +        }
  14.747 +
  14.748 +        restart(startIn, startOut);
  14.749 +
  14.750 +        return null;
  14.751 +    }
  14.752 +
  14.753 +    /*
  14.754 +     * Quantifier ::
  14.755 +     *      QuantifierPrefix
  14.756 +     *      QuantifierPrefix ?
  14.757 +     */
  14.758 +    private Token quantifier() {
  14.759 +        final Token token = new Token(Token.Type.QUANTIFIER);
  14.760 +        final Token child = quantifierPrefix();
  14.761 +        if (child != null) {
  14.762 +            token.add(child);
  14.763 +            if (ch0 == '?') {
  14.764 +                commit(token, 1);
  14.765 +            }
  14.766 +            return token;
  14.767 +        }
  14.768 +        return null;
  14.769 +    }
  14.770 +
  14.771 +    /*
  14.772 +     * QuantifierPrefix ::
  14.773 +     *      *
  14.774 +     *      +
  14.775 +     *      ?
  14.776 +     *      { DecimalDigits }
  14.777 +     *      { DecimalDigits , }
  14.778 +     *      { DecimalDigits , DecimalDigits }
  14.779 +     */
  14.780 +    private Token quantifierPrefix() {
  14.781 +        final int startIn  = position;
  14.782 +        final int startOut = sb.length();
  14.783 +        final Token token  = new Token(Token.Type.QUANTIFIER_PREFIX);
  14.784 +
  14.785 +        switch (ch0) {
  14.786 +        case '*':
  14.787 +        case '+':
  14.788 +        case '?':
  14.789 +            return commit(token, 1);
  14.790 +
  14.791 +        case '{':
  14.792 +            commit(token, 1);
  14.793 +
  14.794 +            final Token child = decimalDigits();
  14.795 +            if (child == null) {
  14.796 +                break; // not a quantifier - back out
  14.797 +            }
  14.798 +            push('}');
  14.799 +            token.add(child);
  14.800 +
  14.801 +            if (ch0 == ',') {
  14.802 +                commit(token, 1);
  14.803 +                token.add(decimalDigits());
  14.804 +            }
  14.805 +
  14.806 +            if (ch0 == '}') {
  14.807 +                pop('}');
  14.808 +                commit(token, 1);
  14.809 +            }
  14.810 +
  14.811 +            return token;
  14.812 +
  14.813 +        default:
  14.814 +            break;
  14.815 +        }
  14.816 +
  14.817 +        restart(startIn, startOut);
  14.818 +        return null;
  14.819 +    }
  14.820 +
  14.821 +    /*
  14.822 +     * Atom ::
  14.823 +     *      PatternCharacter
  14.824 +     *      .
  14.825 +     *      \ AtomEscape
  14.826 +     *      CharacterClass
  14.827 +     *      ( Disjunction )
  14.828 +     *      ( ? : Disjunction )
  14.829 +     *
  14.830 +     */
  14.831 +    private Token atom() {
  14.832 +        final int startIn  = position;
  14.833 +        final int startOut = sb.length();
  14.834 +        final Token token  = new Token(Token.Type.ATOM);
  14.835 +        Token child;
  14.836 +
  14.837 +        child = patternCharacter();
  14.838 +        if (child != null) {
  14.839 +            return token.add(child);
  14.840 +        }
  14.841 +
  14.842 +        if (ch0 == '.') {
  14.843 +            return commit(token, 1);
  14.844 +        }
  14.845 +
  14.846 +        if (ch0 == '\\') {
  14.847 +            commit(token, 1);
  14.848 +            child = atomEscape();
  14.849 +
  14.850 +            if (child != null) {
  14.851 +                if (child.hasChildOfType(Token.Type.IDENTITY_ESCAPE)) {
  14.852 +                    final char idEscape = child.toString().charAt(0);
  14.853 +                    if (NON_IDENT_ESCAPES.indexOf(idEscape) == -1) {
  14.854 +                        token.reset();
  14.855 +                    }
  14.856 +                }
  14.857 +
  14.858 +                token.add(child);
  14.859 +
  14.860 +                // forward backreferences always match empty. JavaScript != Java
  14.861 +                if (child.hasChildOfType(Token.Type.DECIMAL_ESCAPE) && !"\u0000".equals(child.toString())) {
  14.862 +                    final int refNum = Integer.parseInt(child.toString());
  14.863 +
  14.864 +                    if (refNum - 1 < caps.size() && caps.get(refNum - 1).isDead()) {
  14.865 +                        // reference to dead in-negative-lookahead capture
  14.866 +                        token.setIsDead(true);
  14.867 +                    } else if (caps.size() < refNum) {
  14.868 +                        // forward reference: always matches against empty string (dead token).
  14.869 +                        // invalid reference (non-existant capture): pattern never matches.
  14.870 +                        forwardReferences.put(refNum, token);
  14.871 +                    }
  14.872 +                }
  14.873 +
  14.874 +                return token;
  14.875 +            }
  14.876 +        }
  14.877 +
  14.878 +        child = characterClass();
  14.879 +        if (child != null) {
  14.880 +            return token.add(child);
  14.881 +        }
  14.882 +
  14.883 +        if (ch0 == '(') {
  14.884 +            boolean capturingParens = true;
  14.885 +            commit(token, 1);
  14.886 +            if (ch0 == '?' && ch1 == ':') {
  14.887 +                capturingParens = false;
  14.888 +                commit(token, 2);
  14.889 +            }
  14.890 +
  14.891 +            child = disjunction();
  14.892 +            if (child != null) {
  14.893 +                token.add(child);
  14.894 +                if (ch0 == ')') {
  14.895 +                    final Token atom = commit(token, 1);
  14.896 +                    if (capturingParens) {
  14.897 +                        caps.add(new Capture(negativeLookaheadLevel));
  14.898 +                    }
  14.899 +                    return atom;
  14.900 +                }
  14.901 +            }
  14.902 +        }
  14.903 +
  14.904 +        restart(startIn, startOut);
  14.905 +        return null;
  14.906 +    }
  14.907 +
  14.908 +    /*
  14.909 +     * PatternCharacter ::
  14.910 +     *      SourceCharacter but not any of: ^$\.*+?()[]{}|
  14.911 +     */
  14.912 +    @SuppressWarnings("fallthrough")
  14.913 +    private Token patternCharacter() {
  14.914 +        if (atEOF()) {
  14.915 +            return null;
  14.916 +        }
  14.917 +
  14.918 +        switch (ch0) {
  14.919 +        case '^':
  14.920 +        case '$':
  14.921 +        case '\\':
  14.922 +        case '.':
  14.923 +        case '*':
  14.924 +        case '+':
  14.925 +        case '?':
  14.926 +        case '(':
  14.927 +        case ')':
  14.928 +        case '[':
  14.929 +        case '|':
  14.930 +            return null;
  14.931 +
  14.932 +        case '}':
  14.933 +        case ']':
  14.934 +            final int n = expected.get(ch0);
  14.935 +            if (n != 0) {
  14.936 +                return null;
  14.937 +            }
  14.938 +
  14.939 +       case '{':
  14.940 +           // if not a valid quantifier escape curly brace to match itself
  14.941 +           // this ensures compatibility with other JS implementations
  14.942 +           final Token quant = quantifierPrefix();
  14.943 +           return (quant == null) ? commit(new Token(Token.Type.PATTERN_CHARACTER).add("\\"), 1) : null;
  14.944 +
  14.945 +        default:
  14.946 +            return commit(new Token(Token.Type.PATTERN_CHARACTER), 1); // SOURCECHARACTER
  14.947 +        }
  14.948 +    }
  14.949 +
  14.950 +    /*
  14.951 +     * AtomEscape ::
  14.952 +     *      DecimalEscape
  14.953 +     *      CharacterEscape
  14.954 +     *      CharacterClassEscape
  14.955 +     */
  14.956 +    private Token atomEscape() {
  14.957 +        final Token token = new Token(Token.Type.ATOM_ESCAPE);
  14.958 +        Token child;
  14.959 +
  14.960 +        child = decimalEscape();
  14.961 +        if (child != null) {
  14.962 +            return token.add(child);
  14.963 +        }
  14.964 +
  14.965 +        child = characterClassEscape();
  14.966 +        if (child != null) {
  14.967 +            return token.add(child);
  14.968 +        }
  14.969 +
  14.970 +        child = characterEscape();
  14.971 +        if (child != null) {
  14.972 +            return token.add(child);
  14.973 +        }
  14.974 +
  14.975 +
  14.976 +        return null;
  14.977 +    }
  14.978 +
  14.979 +    /*
  14.980 +     * CharacterEscape ::
  14.981 +     *      ControlEscape
  14.982 +     *      c ControlLetter
  14.983 +     *      HexEscapeSequence
  14.984 +     *      UnicodeEscapeSequence
  14.985 +     *      IdentityEscape
  14.986 +     */
  14.987 +    private Token characterEscape() {
  14.988 +        final int startIn  = position;
  14.989 +        final int startOut = sb.length();
  14.990 +
  14.991 +        final Token token = new Token(Token.Type.CHARACTER_ESCAPE);
  14.992 +        Token child;
  14.993 +
  14.994 +        child = controlEscape();
  14.995 +        if (child != null) {
  14.996 +            return token.add(child);
  14.997 +        }
  14.998 +
  14.999 +        if (ch0 == 'c') {
 14.1000 +            commit(token, 1);
 14.1001 +            child = controlLetter();
 14.1002 +            if (child != null) {
 14.1003 +                return token.add(child);
 14.1004 +            }
 14.1005 +            restart(startIn, startOut);
 14.1006 +        }
 14.1007 +
 14.1008 +        child = hexEscapeSequence();
 14.1009 +        if (child != null) {
 14.1010 +            return token.add(child);
 14.1011 +        }
 14.1012 +
 14.1013 +        child = unicodeEscapeSequence();
 14.1014 +        if (child != null) {
 14.1015 +            return token.add(child);
 14.1016 +        }
 14.1017 +
 14.1018 +        child = identityEscape();
 14.1019 +        if (child != null) {
 14.1020 +            return token.add(child);
 14.1021 +        }
 14.1022 +
 14.1023 +        restart(startIn, startOut);
 14.1024 +
 14.1025 +        return null;
 14.1026 +    }
 14.1027 +
 14.1028 +    private boolean scanEscapeSequence(final char leader, final int length, final Token token) {
 14.1029 +        final int startIn  = position;
 14.1030 +        final int startOut = sb.length();
 14.1031 +
 14.1032 +        if (ch0 != leader) {
 14.1033 +            return false;
 14.1034 +        }
 14.1035 +
 14.1036 +        commit(token, 1);
 14.1037 +        for (int i = 0; i < length; i++) {
 14.1038 +            final char ch0l = Character.toLowerCase(ch0);
 14.1039 +            if ((ch0l >= 'a' && ch0l <= 'f') || isDecimalDigit(ch0)) {
 14.1040 +                commit(token, 1);
 14.1041 +            } else {
 14.1042 +                restart(startIn, startOut);
 14.1043 +                return false;
 14.1044 +            }
 14.1045 +        }
 14.1046 +
 14.1047 +        return true;
 14.1048 +    }
 14.1049 +
 14.1050 +    private Token hexEscapeSequence() {
 14.1051 +        final Token token = new Token(Token.Type.HEX_ESCAPESEQUENCE);
 14.1052 +        if (scanEscapeSequence('x', 2, token)) {
 14.1053 +            return token;
 14.1054 +        }
 14.1055 +        return null;
 14.1056 +    }
 14.1057 +
 14.1058 +    private Token unicodeEscapeSequence() {
 14.1059 +        final Token token = new Token(Token.Type.UNICODE_ESCAPESEQUENCE);
 14.1060 +        if (scanEscapeSequence('u', 4, token)) {
 14.1061 +            return token;
 14.1062 +        }
 14.1063 +        return null;
 14.1064 +    }
 14.1065 +
 14.1066 +    /*
 14.1067 +     * ControlEscape ::
 14.1068 +     *      one of fnrtv
 14.1069 +     */
 14.1070 +    private Token controlEscape() {
 14.1071 +        switch (ch0) {
 14.1072 +        case 'f':
 14.1073 +        case 'n':
 14.1074 +        case 'r':
 14.1075 +        case 't':
 14.1076 +        case 'v':
 14.1077 +            return commit(new Token(Token.Type.CONTROL_ESCAPE), 1);
 14.1078 +
 14.1079 +        default:
 14.1080 +            return null;
 14.1081 +        }
 14.1082 +    }
 14.1083 +
 14.1084 +    /*
 14.1085 +     * ControlLetter ::
 14.1086 +     *      one of abcdefghijklmnopqrstuvwxyz
 14.1087 +     *      ABCDEFGHIJKLMNOPQRSTUVWXYZ
 14.1088 +     */
 14.1089 +    private Token controlLetter() {
 14.1090 +        final char c = Character.toUpperCase(ch0);
 14.1091 +        if (c >= 'A' && c <= 'Z') {
 14.1092 +            final Token token = new Token(Token.Type.CONTROL_LETTER);
 14.1093 +            commit(token, 1);
 14.1094 +            return token;
 14.1095 +        }
 14.1096 +        return null;
 14.1097 +        /*
 14.1098 +        Token token = new Token(Token.Type.CONTROL_LETTER);
 14.1099 +        commit(null, 1);//add original char to builder not to token
 14.1100 +        this.neverMatches = c < 'A' || c > 'Z';
 14.1101 +        return token.add(""+c);*/
 14.1102 +    }
 14.1103 +
 14.1104 +    /*
 14.1105 +     * IdentityEscape ::
 14.1106 +     *      SourceCharacter but not IdentifierPart
 14.1107 +     *      <ZWJ>  (200c)
 14.1108 +     *      <ZWNJ> (200d)
 14.1109 +     */
 14.1110 +    private Token identityEscape() {
 14.1111 +        final Token token = new Token(Token.Type.IDENTITY_ESCAPE);
 14.1112 +        commit(token, 1);
 14.1113 +        return token;
 14.1114 +    }
 14.1115 +
 14.1116 +    /*
 14.1117 +     * DecimalEscape ::
 14.1118 +     *      DecimalIntegerLiteral [lookahead DecimalDigit]
 14.1119 +     */
 14.1120 +    private Token decimalEscape() {
 14.1121 +        final Token token = new Token(Token.Type.DECIMAL_ESCAPE);
 14.1122 +        final int startIn  = position;
 14.1123 +        final int startOut = sb.length();
 14.1124 +
 14.1125 +        if (ch0 == '0' && !isDecimalDigit(ch1)) {
 14.1126 +            commit(token, 1);
 14.1127 +            token.removeLast();
 14.1128 +            //  DecimalEscape :: 0. If i is zero, return the EscapeValue consisting of a <NUL> character (Unicodevalue0000);
 14.1129 +            return token.add("\u0000");
 14.1130 +        }
 14.1131 +
 14.1132 +        if (isDecimalDigit(ch0)) {
 14.1133 +            while (isDecimalDigit(ch0)) {
 14.1134 +                commit(token, 1);
 14.1135 +            }
 14.1136 +            return token;
 14.1137 +        }
 14.1138 +
 14.1139 +        restart(startIn, startOut);
 14.1140 +
 14.1141 +        return null;
 14.1142 +    }
 14.1143 +
 14.1144 +    /*
 14.1145 +     * CharacterClassEscape ::
 14.1146 +     *  one of dDsSwW
 14.1147 +     */
 14.1148 +    private Token characterClassEscape() {
 14.1149 +        switch (ch0) {
 14.1150 +        case 's':
 14.1151 +        case 'S':
 14.1152 +        case 'd':
 14.1153 +        case 'D':
 14.1154 +        case 'w':
 14.1155 +        case 'W':
 14.1156 +            return commit(new Token(Token.Type.CHARACTERCLASS_ESCAPE), 1);
 14.1157 +
 14.1158 +        default:
 14.1159 +            return null;
 14.1160 +        }
 14.1161 +    }
 14.1162 +
 14.1163 +    /*
 14.1164 +     * CharacterClass ::
 14.1165 +     *      [ [lookahead {^}] ClassRanges ]
 14.1166 +     *      [ ^ ClassRanges ]
 14.1167 +     */
 14.1168 +    private Token characterClass() {
 14.1169 +        final int startIn  = position;
 14.1170 +        final int startOut = sb.length();
 14.1171 +        final Token token  = new Token(Token.Type.CHARACTERCLASS);
 14.1172 +
 14.1173 +        if (ch0 == '[') {
 14.1174 +            push(']');
 14.1175 +            commit(token, 1);
 14.1176 +
 14.1177 +            if (ch0 == '^') {
 14.1178 +                commit(token, 1);
 14.1179 +            }
 14.1180 +
 14.1181 +            final Token child = classRanges();
 14.1182 +            if (child != null && ch0 == ']') {
 14.1183 +                pop(']');
 14.1184 +                token.add(child);
 14.1185 +                return commit(token, 1);
 14.1186 +            }
 14.1187 +        }
 14.1188 +
 14.1189 +        restart(startIn, startOut);
 14.1190 +        return null;
 14.1191 +    }
 14.1192 +
 14.1193 +    /*
 14.1194 +     * ClassRanges ::
 14.1195 +     *      [empty]
 14.1196 +     *      NonemptyClassRanges
 14.1197 +     */
 14.1198 +    private Token classRanges() {
 14.1199 +        return new Token(Token.Type.CLASSRANGES).add(nonemptyClassRanges());
 14.1200 +    }
 14.1201 +
 14.1202 +    /*
 14.1203 +     * NonemptyClassRanges ::
 14.1204 +     *      ClassAtom
 14.1205 +     *      ClassAtom NonemptyClassRangesNoDash
 14.1206 +     *      ClassAtom - ClassAtom ClassRanges
 14.1207 +     */
 14.1208 +    private Token nonemptyClassRanges() {
 14.1209 +        final int startIn  = position;
 14.1210 +        final int startOut = sb.length();
 14.1211 +        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES);
 14.1212 +        Token child;
 14.1213 +
 14.1214 +        child = classAtom();
 14.1215 +        if (child != null) {
 14.1216 +            token.add(child);
 14.1217 +
 14.1218 +            if (ch0 == '-') {
 14.1219 +                commit(token, 1);
 14.1220 +
 14.1221 +                final Token child1 = classAtom();
 14.1222 +                final Token child2 = classRanges();
 14.1223 +                if (child1 != null && child2 != null) {
 14.1224 +                    token.add(child1);
 14.1225 +                    token.add(child2);
 14.1226 +
 14.1227 +                    return token;
 14.1228 +                }
 14.1229 +            }
 14.1230 +
 14.1231 +            child = nonemptyClassRangesNoDash();
 14.1232 +            if (child != null) {
 14.1233 +                token.add(child);
 14.1234 +                return token;
 14.1235 +            }
 14.1236 +
 14.1237 +            return token;
 14.1238 +        }
 14.1239 +
 14.1240 +        restart(startIn, startOut);
 14.1241 +        return null;
 14.1242 +    }
 14.1243 +
 14.1244 +    /*
 14.1245 +     * NonemptyClassRangesNoDash ::
 14.1246 +     *      ClassAtom
 14.1247 +     *      ClassAtomNoDash NonemptyClassRangesNoDash
 14.1248 +     *      ClassAtomNoDash - ClassAtom ClassRanges
 14.1249 +     */
 14.1250 +    private Token nonemptyClassRangesNoDash() {
 14.1251 +        final int startIn  = position;
 14.1252 +        final int startOut = sb.length();
 14.1253 +        final Token token  = new Token(Token.Type.NON_EMPTY_CLASSRANGES_NODASH);
 14.1254 +        Token child;
 14.1255 +
 14.1256 +        child = classAtomNoDash();
 14.1257 +        if (child != null) {
 14.1258 +            token.add(child);
 14.1259 +
 14.1260 +            // need to check dash first, as for e.g. [a-b|c-d] will otherwise parse - as an atom
 14.1261 +            if (ch0 == '-') {
 14.1262 +               commit(token, 1);
 14.1263 +
 14.1264 +               final Token child1 = classAtom();
 14.1265 +               final Token child2 = classRanges();
 14.1266 +               if (child1 != null && child2 != null) {
 14.1267 +                   token.add(child1);
 14.1268 +                   return token.add(child2);
 14.1269 +               }
 14.1270 +               //fallthru
 14.1271 +           }
 14.1272 +
 14.1273 +            child = nonemptyClassRangesNoDash();
 14.1274 +            if (child != null) {
 14.1275 +                token.add(child);
 14.1276 +            }
 14.1277 +            return token; // still a class atom
 14.1278 +        }
 14.1279 +
 14.1280 +        child = classAtom();
 14.1281 +        if (child != null) {
 14.1282 +            return token.add(child);
 14.1283 +        }
 14.1284 +
 14.1285 +        restart(startIn, startOut);
 14.1286 +        return null;
 14.1287 +    }
 14.1288 +
 14.1289 +    /*
 14.1290 +     * ClassAtom : - ClassAtomNoDash
 14.1291 +     */
 14.1292 +    private Token classAtom() {
 14.1293 +        final Token token = new Token(Token.Type.CLASSATOM);
 14.1294 +
 14.1295 +        if (ch0 == '-') {
 14.1296 +            return commit(token, 1);
 14.1297 +        }
 14.1298 +
 14.1299 +        final Token child = classAtomNoDash();
 14.1300 +        if (child != null) {
 14.1301 +            return token.add(child);
 14.1302 +        }
 14.1303 +
 14.1304 +        return null;
 14.1305 +    }
 14.1306 +
 14.1307 +    /*
 14.1308 +     * ClassAtomNoDash ::
 14.1309 +     *      SourceCharacter but not one of \ or ] or -
 14.1310 +     *      \ ClassEscape
 14.1311 +     */
 14.1312 +    private Token classAtomNoDash() {
 14.1313 +        final int startIn  = position;
 14.1314 +        final int startOut = sb.length();
 14.1315 +        final Token token  = new Token(Token.Type.CLASSATOM_NODASH);
 14.1316 +
 14.1317 +        switch (ch0) {
 14.1318 +        case ']':
 14.1319 +        case '-':
 14.1320 +        case '\0':
 14.1321 +            return null;
 14.1322 +
 14.1323 +        case '[':
 14.1324 +            // unescaped left square bracket - add escape
 14.1325 +            return commit(token.add("\\"), 1);
 14.1326 +
 14.1327 +        case '\\':
 14.1328 +            commit(token, 1);
 14.1329 +            final Token child = classEscape();
 14.1330 +            if (child != null) {
 14.1331 +                return token.add(child);
 14.1332 +            }
 14.1333 +
 14.1334 +            restart(startIn, startOut);
 14.1335 +            return null;
 14.1336 +
 14.1337 +        default:
 14.1338 +            return commit(token, 1);
 14.1339 +        }
 14.1340 +    }
 14.1341 +
 14.1342 +    /*
 14.1343 +     * ClassEscape ::
 14.1344 +     *      DecimalEscape
 14.1345 +     *      b
 14.1346 +     *      CharacterEscape
 14.1347 +     *      CharacterClassEscape
 14.1348 +     */
 14.1349 +    private Token classEscape() {
 14.1350 +        final Token token = new Token(Token.Type.CLASS_ESCAPE);
 14.1351 +        Token child;
 14.1352 +
 14.1353 +        child = decimalEscape();
 14.1354 +        if (child != null) {
 14.1355 +            return token.add(child);
 14.1356 +        }
 14.1357 +
 14.1358 +        if (ch0 == 'b') {
 14.1359 +            return commit(token, 1);
 14.1360 +        }
 14.1361 +
 14.1362 +        child = characterEscape();
 14.1363 +        if (child != null) {
 14.1364 +            return token.add(child);
 14.1365 +        }
 14.1366 +
 14.1367 +        child = characterClassEscape();
 14.1368 +        if (child != null) {
 14.1369 +            return token.add(child);
 14.1370 +        }
 14.1371 +
 14.1372 +        return null;
 14.1373 +    }
 14.1374 +
 14.1375 +    /*
 14.1376 +     * DecimalDigits
 14.1377 +     */
 14.1378 +    private Token decimalDigits() {
 14.1379 +        if (!isDecimalDigit(ch0)) {
 14.1380 +            return null;
 14.1381 +        }
 14.1382 +
 14.1383 +        final Token token = new Token(Token.Type.DECIMALDIGITS);
 14.1384 +        while (isDecimalDigit(ch0)) {
 14.1385 +            commit(token, 1);
 14.1386 +        }
 14.1387 +
 14.1388 +        return token;
 14.1389 +    }
 14.1390 +
 14.1391 +    private static boolean isDecimalDigit(final char ch) {
 14.1392 +        return ch >= '0' && ch <= '9';
 14.1393 +    }
 14.1394 +}

mercurial