src/jdk/nashorn/internal/parser/Lexer.java

Fri, 17 Oct 2014 14:24:26 +0200

author
attila
date
Fri, 17 Oct 2014 14:24:26 +0200
changeset 1063
8c51767d534d
parent 998
b788246cf987
child 1205
4112748288bb
child 1228
3f7e205c2c44
permissions
-rw-r--r--

8059843: Make AST serializable
Reviewed-by: hannesw, lagergren

     1 /*
     2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package jdk.nashorn.internal.parser;
    28 import static jdk.nashorn.internal.parser.TokenType.ADD;
    29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
    30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
    31 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
    32 import static jdk.nashorn.internal.parser.TokenType.EOF;
    33 import static jdk.nashorn.internal.parser.TokenType.EOL;
    34 import static jdk.nashorn.internal.parser.TokenType.ERROR;
    35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
    36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
    37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
    38 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
    39 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
    40 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
    41 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
    42 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
    43 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
    44 import static jdk.nashorn.internal.parser.TokenType.REGEX;
    45 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
    46 import static jdk.nashorn.internal.parser.TokenType.STRING;
    47 import static jdk.nashorn.internal.parser.TokenType.XML;
    49 import java.io.Serializable;
    50 import jdk.nashorn.internal.runtime.ECMAErrors;
    51 import jdk.nashorn.internal.runtime.ErrorManager;
    52 import jdk.nashorn.internal.runtime.JSErrorType;
    53 import jdk.nashorn.internal.runtime.JSType;
    54 import jdk.nashorn.internal.runtime.ParserException;
    55 import jdk.nashorn.internal.runtime.Source;
    56 import jdk.nashorn.internal.runtime.options.Options;
    58 /**
    59  * Responsible for converting source content into a stream of tokens.
    60  *
    61  */
    62 @SuppressWarnings("fallthrough")
    63 public class Lexer extends Scanner {
    64     private static final long MIN_INT_L = Integer.MIN_VALUE;
    65     private static final long MAX_INT_L = Integer.MAX_VALUE;
    67     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
    69     /** Content source. */
    70     private final Source source;
    72     /** Buffered stream for tokens. */
    73     private final TokenStream stream;
    75     /** True if here and edit strings are supported. */
    76     private final boolean scripting;
    78     /** True if a nested scan. (scan to completion, no EOF.) */
    79     private final boolean nested;
    81     /** Pending new line number and position. */
    82     int pendingLine;
    84     /** Position of last EOL + 1. */
    85     private int linePosition;
    87     /** Type of last token added. */
    88     private TokenType last;
    90     private final boolean pauseOnFunctionBody;
    91     private boolean pauseOnNextLeftBrace;
    93     private static final String SPACETAB = " \t";  // ASCII space and tab
    94     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)
    96     private static final String JSON_WHITESPACE_EOL = LFCR;
    97     private static final String JSON_WHITESPACE     = SPACETAB + LFCR;
    99     private static final String JAVASCRIPT_WHITESPACE_EOL =
   100         LFCR +
   101         "\u2028" + // line separator
   102         "\u2029"   // paragraph separator
   103         ;
   104     private static final String JAVASCRIPT_WHITESPACE =
   105         SPACETAB +
   106         JAVASCRIPT_WHITESPACE_EOL +
   107         "\u000b" + // tabulation line
   108         "\u000c" + // ff (ctrl-l)
   109         "\u00a0" + // Latin-1 space
   110         "\u1680" + // Ogham space mark
   111         "\u180e" + // separator, Mongolian vowel
   112         "\u2000" + // en quad
   113         "\u2001" + // em quad
   114         "\u2002" + // en space
   115         "\u2003" + // em space
   116         "\u2004" + // three-per-em space
   117         "\u2005" + // four-per-em space
   118         "\u2006" + // six-per-em space
   119         "\u2007" + // figure space
   120         "\u2008" + // punctuation space
   121         "\u2009" + // thin space
   122         "\u200a" + // hair space
   123         "\u202f" + // narrow no-break space
   124         "\u205f" + // medium mathematical space
   125         "\u3000" + // ideographic space
   126         "\ufeff"   // byte order mark
   127         ;
   129     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
   130         "\\u000a" + // line feed
   131         "\\u000d" + // carriage return (ctrl-m)
   132         "\\u2028" + // line separator
   133         "\\u2029" + // paragraph separator
   134         "\\u0009" + // tab
   135         "\\u0020" + // ASCII space
   136         "\\u000b" + // tabulation line
   137         "\\u000c" + // ff (ctrl-l)
   138         "\\u00a0" + // Latin-1 space
   139         "\\u1680" + // Ogham space mark
   140         "\\u180e" + // separator, Mongolian vowel
   141         "\\u2000" + // en quad
   142         "\\u2001" + // em quad
   143         "\\u2002" + // en space
   144         "\\u2003" + // em space
   145         "\\u2004" + // three-per-em space
   146         "\\u2005" + // four-per-em space
   147         "\\u2006" + // six-per-em space
   148         "\\u2007" + // figure space
   149         "\\u2008" + // punctuation space
   150         "\\u2009" + // thin space
   151         "\\u200a" + // hair space
   152         "\\u202f" + // narrow no-break space
   153         "\\u205f" + // medium mathematical space
   154         "\\u3000" + // ideographic space
   155         "\\ufeff"   // byte order mark
   156         ;
   158     static String unicodeEscape(final char ch) {
   159         final StringBuilder sb = new StringBuilder();
   161         sb.append("\\u");
   163         final String hex = Integer.toHexString(ch);
   164         for (int i = hex.length(); i < 4; i++) {
   165             sb.append('0');
   166         }
   167         sb.append(hex);
   169         return sb.toString();
   170     }
   172     /**
   173      * Constructor
   174      *
   175      * @param source    the source
   176      * @param stream    the token stream to lex
   177      */
   178     public Lexer(final Source source, final TokenStream stream) {
   179         this(source, stream, false);
   180     }
   182     /**
   183      * Constructor
   184      *
   185      * @param source    the source
   186      * @param stream    the token stream to lex
   187      * @param scripting are we in scripting mode
   188      */
   189     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
   190         this(source, 0, source.getLength(), stream, scripting, false);
   191     }
   193     /**
   194      * Constructor
   195      *
   196      * @param source    the source
   197      * @param start     start position in source from which to start lexing
   198      * @param len       length of source segment to lex
   199      * @param stream    token stream to lex
   200      * @param scripting are we in scripting mode
   201      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
   202      * function body. This is used with the feature where the parser is skipping nested function bodies to
   203      * avoid reading ahead unnecessarily when we skip the function bodies.
   204      */
   206     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {
   207         super(source.getContent(), 1, start, len);
   208         this.source      = source;
   209         this.stream      = stream;
   210         this.scripting   = scripting;
   211         this.nested      = false;
   212         this.pendingLine = 1;
   213         this.last        = EOL;
   215         this.pauseOnFunctionBody = pauseOnFunctionBody;
   216     }
   218     private Lexer(final Lexer lexer, final State state) {
   219         super(lexer, state);
   221         source = lexer.source;
   222         stream = lexer.stream;
   223         scripting = lexer.scripting;
   224         nested = true;
   226         pendingLine = state.pendingLine;
   227         linePosition = state.linePosition;
   228         last = EOL;
   229         pauseOnFunctionBody = false;
   230     }
   232     static class State extends Scanner.State {
   233         /** Pending new line number and position. */
   234         public final int pendingLine;
   236         /** Position of last EOL + 1. */
   237         public final int linePosition;
   239         /** Type of last token added. */
   240         public final TokenType last;
   242         /*
   243          * Constructor.
   244          */
   246         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
   247             super(position, limit, line);
   249             this.pendingLine = pendingLine;
   250             this.linePosition = linePosition;
   251             this.last = last;
   252         }
   253     }
   255     /**
   256      * Save the state of the scan.
   257      *
   258      * @return Captured state.
   259      */
   260     @Override
   261     State saveState() {
   262         return new State(position, limit, line, pendingLine, linePosition, last);
   263     }
   265     /**
   266      * Restore the state of the scan.
   267      *
   268      * @param state
   269      *            Captured state.
   270      */
   271     void restoreState(final State state) {
   272         super.restoreState(state);
   274         pendingLine = state.pendingLine;
   275         linePosition = state.linePosition;
   276         last = state.last;
   277     }
   279     /**
   280      * Add a new token to the stream.
   281      *
   282      * @param type
   283      *            Token type.
   284      * @param start
   285      *            Start position.
   286      * @param end
   287      *            End position.
   288      */
   289     protected void add(final TokenType type, final int start, final int end) {
   290         // Record last token.
   291         last = type;
   293         // Only emit the last EOL in a cluster.
   294         if (type == EOL) {
   295             pendingLine = end;
   296             linePosition = start;
   297         } else {
   298             // Write any pending EOL to stream.
   299             if (pendingLine != -1) {
   300                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
   301                 pendingLine = -1;
   302             }
   304             // Write token to stream.
   305             stream.put(Token.toDesc(type, start, end - start));
   306         }
   307     }
   309     /**
   310      * Add a new token to the stream.
   311      *
   312      * @param type
   313      *            Token type.
   314      * @param start
   315      *            Start position.
   316      */
   317     protected void add(final TokenType type, final int start) {
   318         add(type, start, position);
   319     }
   321     /**
   322      * Return the String of valid whitespace characters for regular
   323      * expressions in JavaScript
   324      * @return regexp whitespace string
   325      */
   326     public static String getWhitespaceRegExp() {
   327         return JAVASCRIPT_WHITESPACE_IN_REGEXP;
   328     }
   330     /**
   331      * Skip end of line.
   332      *
   333      * @param addEOL true if EOL token should be recorded.
   334      */
   335     private void skipEOL(final boolean addEOL) {
   337         if (ch0 == '\r') { // detect \r\n pattern
   338             skip(1);
   339             if (ch0 == '\n') {
   340                 skip(1);
   341             }
   342         } else { // all other space, ch0 is guaranteed to be EOL or \0
   343             skip(1);
   344         }
   346         // bump up line count
   347         line++;
   349         if (addEOL) {
   350             // Add an EOL token.
   351             add(EOL, position, line);
   352         }
   353     }
   355     /**
   356      * Skip over rest of line including end of line.
   357      *
   358      * @param addEOL true if EOL token should be recorded.
   359      */
   360     private void skipLine(final boolean addEOL) {
   361         // Ignore characters.
   362         while (!isEOL(ch0) && !atEOF()) {
   363             skip(1);
   364         }
   365         // Skip over end of line.
   366         skipEOL(addEOL);
   367     }
   369     /**
   370      * Test whether a char is valid JavaScript whitespace
   371      * @param ch a char
   372      * @return true if valid JavaScript whitespace
   373      */
   374     public static boolean isJSWhitespace(final char ch) {
   375         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
   376     }
   378     /**
   379      * Test whether a char is valid JavaScript end of line
   380      * @param ch a char
   381      * @return true if valid JavaScript end of line
   382      */
   383     public static boolean isJSEOL(final char ch) {
   384         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
   385     }
   387     /**
   388      * Test whether a char is valid JSON whitespace
   389      * @param ch a char
   390      * @return true if valid JSON whitespace
   391      */
   392     public static boolean isJsonWhitespace(final char ch) {
   393         return JSON_WHITESPACE.indexOf(ch) != -1;
   394     }
   396     /**
   397      * Test whether a char is valid JSON end of line
   398      * @param ch a char
   399      * @return true if valid JSON end of line
   400      */
   401     public static boolean isJsonEOL(final char ch) {
   402         return JSON_WHITESPACE_EOL.indexOf(ch) != -1;
   403     }
   405     /**
   406      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec
   407      * strings ('`') in scripting mode.
   408      * @param ch a char
   409      * @return true if string delimiter
   410      */
   411     protected boolean isStringDelimiter(final char ch) {
   412         return ch == '\'' || ch == '"' || (scripting && ch == '`');
   413     }
   415     /**
   416      * Test whether a char is valid JavaScript whitespace
   417      * @param ch a char
   418      * @return true if valid JavaScript whitespace
   419      */
   420     protected boolean isWhitespace(final char ch) {
   421         return Lexer.isJSWhitespace(ch);
   422     }
   424     /**
   425      * Test whether a char is valid JavaScript end of line
   426      * @param ch a char
   427      * @return true if valid JavaScript end of line
   428      */
   429     protected boolean isEOL(final char ch) {
   430         return Lexer.isJSEOL(ch);
   431     }
   433     /**
   434      * Skip over whitespace and detect end of line, adding EOL tokens if
   435      * encountered.
   436      *
   437      * @param addEOL true if EOL tokens should be recorded.
   438      */
   439     private void skipWhitespace(final boolean addEOL) {
   440         while (isWhitespace(ch0)) {
   441             if (isEOL(ch0)) {
   442                 skipEOL(addEOL);
   443             } else {
   444                 skip(1);
   445             }
   446         }
   447     }
   449     /**
   450      * Skip over comments.
   451      *
   452      * @return True if a comment.
   453      */
   454     protected boolean skipComments() {
   455         // Save the current position.
   456         final int start = position;
   458         if (ch0 == '/') {
   459             // Is it a // comment.
   460             if (ch1 == '/') {
   461                 // Skip over //.
   462                 skip(2);
   464                 boolean directiveComment = false;
   465                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
   466                     directiveComment = true;
   467                 }
   469                 // Scan for EOL.
   470                 while (!atEOF() && !isEOL(ch0)) {
   471                     skip(1);
   472                 }
   473                 // Did detect a comment.
   474                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
   475                 return true;
   476             } else if (ch1 == '*') {
   477                 // Skip over /*.
   478                 skip(2);
   479                 // Scan for */.
   480                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
   481                     // If end of line handle else skip character.
   482                     if (isEOL(ch0)) {
   483                         skipEOL(true);
   484                     } else {
   485                         skip(1);
   486                     }
   487                 }
   489                 if (atEOF()) {
   490                     // TODO - Report closing */ missing in parser.
   491                     add(ERROR, start);
   492                 } else {
   493                     // Skip */.
   494                     skip(2);
   495                 }
   497                 // Did detect a comment.
   498                 add(COMMENT, start);
   499                 return true;
   500             }
   501         } else if (ch0 == '#') {
   502             assert scripting;
   503             // shell style comment
   504             // Skip over #.
   505             skip(1);
   506             // Scan for EOL.
   507             while (!atEOF() && !isEOL(ch0)) {
   508                 skip(1);
   509             }
   510             // Did detect a comment.
   511             add(COMMENT, start);
   512             return true;
   513         }
   515         // Not a comment.
   516         return false;
   517     }
   519     /**
   520      * Convert a regex token to a token object.
   521      *
   522      * @param start  Position in source content.
   523      * @param length Length of regex token.
   524      * @return Regex token object.
   525      */
   526     public RegexToken valueOfPattern(final int start, final int length) {
   527         // Save the current position.
   528         final int savePosition = position;
   529         // Reset to beginning of content.
   530         reset(start);
   531         // Buffer for recording characters.
   532         final StringBuilder sb = new StringBuilder(length);
   534         // Skip /.
   535         skip(1);
   536         boolean inBrackets = false;
   537         // Scan for closing /, stopping at end of line.
   538         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
   539             // Skip over escaped character.
   540             if (ch0 == '\\') {
   541                 sb.append(ch0);
   542                 sb.append(ch1);
   543                 skip(2);
   544             } else {
   545                 if (ch0 == '[') {
   546                     inBrackets = true;
   547                 } else if (ch0 == ']') {
   548                     inBrackets = false;
   549                 }
   551                 // Skip literal character.
   552                 sb.append(ch0);
   553                 skip(1);
   554             }
   555         }
   557         // Get pattern as string.
   558         final String regex = sb.toString();
   560         // Skip /.
   561         skip(1);
   563         // Options as string.
   564         final String options = source.getString(position, scanIdentifier());
   566         reset(savePosition);
   568         // Compile the pattern.
   569         return new RegexToken(regex, options);
   570     }
   572     /**
   573      * Return true if the given token can be the beginning of a literal.
   574      *
   575      * @param token a token
   576      * @return true if token can start a literal.
   577      */
   578     public boolean canStartLiteral(final TokenType token) {
   579         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
   580     }
   582     /**
   583      * interface to receive line information for multi-line literals.
   584      */
   585     protected interface LineInfoReceiver {
   586         /**
   587          * Receives line information
   588          * @param line last line number
   589          * @param linePosition position of last line
   590          */
   591         public void lineInfo(int line, int linePosition);
   592     }
   594     /**
   595      * Check whether the given token represents the beginning of a literal. If so scan
   596      * the literal and return <tt>true</tt>, otherwise return false.
   597      *
   598      * @param token the token.
   599      * @param startTokenType the token type.
   600      * @param lir LineInfoReceiver that receives line info for multi-line string literals.
   601      * @return True if a literal beginning with startToken was found and scanned.
   602      */
   603     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
   604         // Check if it can be a literal.
   605         if (!canStartLiteral(startTokenType)) {
   606             return false;
   607         }
   608         // We break on ambiguous tokens so if we already moved on it can't be a literal.
   609         if (stream.get(stream.last()) != token) {
   610             return false;
   611         }
   612         // Rewind to token start position
   613         reset(Token.descPosition(token));
   615         if (ch0 == '/') {
   616             return scanRegEx();
   617         } else if (ch0 == '<') {
   618             if (ch1 == '<') {
   619                 return scanHereString(lir);
   620             } else if (Character.isJavaIdentifierStart(ch1)) {
   621                 return scanXMLLiteral();
   622             }
   623         }
   625         return false;
   626     }
   628     /**
   629      * Scan over regex literal.
   630      *
   631      * @return True if a regex literal.
   632      */
   633     private boolean scanRegEx() {
   634         assert ch0 == '/';
   635         // Make sure it's not a comment.
   636         if (ch1 != '/' && ch1 != '*') {
   637             // Record beginning of literal.
   638             final int start = position;
   639             // Skip /.
   640             skip(1);
   641             boolean inBrackets = false;
   643             // Scan for closing /, stopping at end of line.
   644             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
   645                 // Skip over escaped character.
   646                 if (ch0 == '\\') {
   647                     skip(1);
   648                     if (isEOL(ch0)) {
   649                         reset(start);
   650                         return false;
   651                     }
   652                     skip(1);
   653                 } else {
   654                     if (ch0 == '[') {
   655                         inBrackets = true;
   656                     } else if (ch0 == ']') {
   657                         inBrackets = false;
   658                     }
   660                     // Skip literal character.
   661                     skip(1);
   662                 }
   663             }
   665             // If regex literal.
   666             if (ch0 == '/') {
   667                 // Skip /.
   668                 skip(1);
   670                 // Skip over options.
   671                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
   672                     skip(1);
   673                 }
   675                 // Add regex token.
   676                 add(REGEX, start);
   677                 // Regex literal detected.
   678                 return true;
   679             }
   681             // False start try again.
   682             reset(start);
   683         }
   685         // Regex literal not detected.
   686         return false;
   687     }
   689     /**
   690      * Convert a digit to a integer.  Can't use Character.digit since we are
   691      * restricted to ASCII by the spec.
   692      *
   693      * @param ch   Character to convert.
   694      * @param base Numeric base.
   695      *
   696      * @return The converted digit or -1 if invalid.
   697      */
   698     protected static int convertDigit(final char ch, final int base) {
   699         int digit;
   701         if ('0' <= ch && ch <= '9') {
   702             digit = ch - '0';
   703         } else if ('A' <= ch && ch <= 'Z') {
   704             digit = ch - 'A' + 10;
   705         } else if ('a' <= ch && ch <= 'z') {
   706             digit = ch - 'a' + 10;
   707         } else {
   708             return -1;
   709         }
   711         return digit < base ? digit : -1;
   712     }
   715     /**
   716      * Get the value of a hexadecimal numeric sequence.
   717      *
   718      * @param length Number of digits.
   719      * @param type   Type of token to report against.
   720      * @return Value of sequence or < 0 if no digits.
   721      */
   722     private int hexSequence(final int length, final TokenType type) {
   723         int value = 0;
   725         for (int i = 0; i < length; i++) {
   726             final int digit = convertDigit(ch0, 16);
   728             if (digit == -1) {
   729                 error(Lexer.message("invalid.hex"), type, position, limit);
   730                 return i == 0 ? -1 : value;
   731             }
   733             value = digit | value << 4;
   734             skip(1);
   735         }
   737         return value;
   738     }
   740     /**
   741      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
   742      *
   743      * @return Value of sequence.
   744      */
   745     private int octalSequence() {
   746         int value = 0;
   748         for (int i = 0; i < 3; i++) {
   749             final int digit = convertDigit(ch0, 8);
   751             if (digit == -1) {
   752                 break;
   753             }
   754             value = digit | value << 3;
   755             skip(1);
   757             if (i == 1 && value >= 32) {
   758                 break;
   759             }
   760         }
   761         return value;
   762     }
   764     /**
   765      * Convert a string to a JavaScript identifier.
   766      *
   767      * @param start  Position in source content.
   768      * @param length Length of token.
   769      * @return Ident string or null if an error.
   770      */
   771     private String valueOfIdent(final int start, final int length) throws RuntimeException {
   772         // Save the current position.
   773         final int savePosition = position;
   774         // End of scan.
   775         final int end = start + length;
   776         // Reset to beginning of content.
   777         reset(start);
   778         // Buffer for recording characters.
   779         final StringBuilder sb = new StringBuilder(length);
   781         // Scan until end of line or end of file.
   782         while (!atEOF() && position < end && !isEOL(ch0)) {
   783             // If escape character.
   784             if (ch0 == '\\' && ch1 == 'u') {
   785                 skip(2);
   786                 final int ch = hexSequence(4, TokenType.IDENT);
   787                 if (isWhitespace((char)ch)) {
   788                     return null;
   789                 }
   790                 if (ch < 0) {
   791                     sb.append('\\');
   792                     sb.append('u');
   793                 } else {
   794                     sb.append((char)ch);
   795                 }
   796             } else {
   797                 // Add regular character.
   798                 sb.append(ch0);
   799                 skip(1);
   800             }
   801         }
   803         // Restore position.
   804         reset(savePosition);
   806         return sb.toString();
   807     }
   809     /**
   810      * Scan over and identifier or keyword. Handles identifiers containing
   811      * encoded Unicode chars.
   812      *
   813      * Example:
   814      *
   815      * var \u0042 = 44;
   816      */
   817     private void scanIdentifierOrKeyword() {
   818         // Record beginning of identifier.
   819         final int start = position;
   820         // Scan identifier.
   821         final int length = scanIdentifier();
   822         // Check to see if it is a keyword.
   823         final TokenType type = TokenLookup.lookupKeyword(content, start, length);
   824         if (type == FUNCTION && pauseOnFunctionBody) {
   825             pauseOnNextLeftBrace = true;
   826         }
   827         // Add keyword or identifier token.
   828         add(type, start);
   829     }
   831     /**
   832      * Convert a string to a JavaScript string object.
   833      *
   834      * @param start  Position in source content.
   835      * @param length Length of token.
   836      * @return JavaScript string object.
   837      */
   838     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
   839         // Save the current position.
   840         final int savePosition = position;
   841         // Calculate the end position.
   842         final int end = start + length;
   843         // Reset to beginning of string.
   844         reset(start);
   846         // Buffer for recording characters.
   847         final StringBuilder sb = new StringBuilder(length);
   849         // Scan until end of string.
   850         while (position < end) {
   851             // If escape character.
   852             if (ch0 == '\\') {
   853                 skip(1);
   855                 final char next = ch0;
   856                 final int afterSlash = position;
   858                 skip(1);
   860                 // Special characters.
   861                 switch (next) {
   862                 case '0':
   863                 case '1':
   864                 case '2':
   865                 case '3':
   866                 case '4':
   867                 case '5':
   868                 case '6':
   869                 case '7': {
   870                     if (strict) {
   871                         // "\0" itself is allowed in strict mode. Only other 'real'
   872                         // octal escape sequences are not allowed (eg. "\02", "\31").
   873                         // See section 7.8.4 String literals production EscapeSequence
   874                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
   875                             error(Lexer.message("strict.no.octal"), STRING, position, limit);
   876                         }
   877                     }
   878                     reset(afterSlash);
   879                     // Octal sequence.
   880                     final int ch = octalSequence();
   882                     if (ch < 0) {
   883                         sb.append('\\');
   884                         sb.append('x');
   885                     } else {
   886                         sb.append((char)ch);
   887                     }
   888                     break;
   889                 }
   890                 case 'n':
   891                     sb.append('\n');
   892                     break;
   893                 case 't':
   894                     sb.append('\t');
   895                     break;
   896                 case 'b':
   897                     sb.append('\b');
   898                     break;
   899                 case 'f':
   900                     sb.append('\f');
   901                     break;
   902                 case 'r':
   903                     sb.append('\r');
   904                     break;
   905                 case '\'':
   906                     sb.append('\'');
   907                     break;
   908                 case '\"':
   909                     sb.append('\"');
   910                     break;
   911                 case '\\':
   912                     sb.append('\\');
   913                     break;
   914                 case '\r': // CR | CRLF
   915                     if (ch0 == '\n') {
   916                         skip(1);
   917                     }
   918                     // fall through
   919                 case '\n': // LF
   920                 case '\u2028': // LS
   921                 case '\u2029': // PS
   922                     // continue on the next line, slash-return continues string
   923                     // literal
   924                     break;
   925                 case 'x': {
   926                     // Hex sequence.
   927                     final int ch = hexSequence(2, STRING);
   929                     if (ch < 0) {
   930                         sb.append('\\');
   931                         sb.append('x');
   932                     } else {
   933                         sb.append((char)ch);
   934                     }
   935                 }
   936                     break;
   937                 case 'u': {
   938                     // Unicode sequence.
   939                     final int ch = hexSequence(4, STRING);
   941                     if (ch < 0) {
   942                         sb.append('\\');
   943                         sb.append('u');
   944                     } else {
   945                         sb.append((char)ch);
   946                     }
   947                 }
   948                     break;
   949                 case 'v':
   950                     sb.append('\u000B');
   951                     break;
   952                 // All other characters.
   953                 default:
   954                     sb.append(next);
   955                     break;
   956                 }
   957             } else {
   958                 // Add regular character.
   959                 sb.append(ch0);
   960                 skip(1);
   961             }
   962         }
   964         // Restore position.
   965         reset(savePosition);
   967         return sb.toString();
   968     }
   970     /**
   971      * Scan over a string literal.
   972      * @param add true if we nare not just scanning but should actually modify the token stream
   973      */
   974     protected void scanString(final boolean add) {
   975         // Type of string.
   976         TokenType type = STRING;
   977         // Record starting quote.
   978         final char quote = ch0;
   979         // Skip over quote.
   980         skip(1);
   982         // Record beginning of string content.
   983         final State stringState = saveState();
   985         // Scan until close quote or end of line.
   986         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
   987             // Skip over escaped character.
   988             if (ch0 == '\\') {
   989                 type = ESCSTRING;
   990                 skip(1);
   991                 if (! isEscapeCharacter(ch0)) {
   992                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);
   993                 }
   994                 if (isEOL(ch0)) {
   995                     // Multiline string literal
   996                     skipEOL(false);
   997                     continue;
   998                 }
   999             }
  1000             // Skip literal character.
  1001             skip(1);
  1004         // If close quote.
  1005         if (ch0 == quote) {
  1006             // Skip close quote.
  1007             skip(1);
  1008         } else {
  1009             error(Lexer.message("missing.close.quote"), STRING, position, limit);
  1012         // If not just scanning.
  1013         if (add) {
  1014             // Record end of string.
  1015             stringState.setLimit(position - 1);
  1017             if (scripting && !stringState.isEmpty()) {
  1018                 switch (quote) {
  1019                 case '`':
  1020                     // Mark the beginning of an exec string.
  1021                     add(EXECSTRING, stringState.position, stringState.limit);
  1022                     // Frame edit string with left brace.
  1023                     add(LBRACE, stringState.position, stringState.position);
  1024                     // Process edit string.
  1025                     editString(type, stringState);
  1026                     // Frame edit string with right brace.
  1027                     add(RBRACE, stringState.limit, stringState.limit);
  1028                     break;
  1029                 case '"':
  1030                     // Only edit double quoted strings.
  1031                     editString(type, stringState);
  1032                     break;
  1033                 case '\'':
  1034                     // Add string token without editing.
  1035                     add(type, stringState.position, stringState.limit);
  1036                     break;
  1037                 default:
  1038                     break;
  1040             } else {
  1041                 /// Add string token without editing.
  1042                 add(type, stringState.position, stringState.limit);
  1047     /**
  1048      * Is the given character a valid escape char after "\" ?
  1050      * @param ch character to be checked
  1051      * @return if the given character is valid after "\"
  1052      */
  1053     protected boolean isEscapeCharacter(final char ch) {
  1054         return true;
  1057     /**
  1058      * Convert string to number.
  1060      * @param valueString  String to convert.
  1061      * @param radix        Numeric base.
  1062      * @return Converted number.
  1063      */
  1064     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
  1065         try {
  1066             final long value = Long.parseLong(valueString, radix);
  1067             if(value >= MIN_INT_L && value <= MAX_INT_L) {
  1068                 return Integer.valueOf((int)value);
  1070             return Long.valueOf(value);
  1071         } catch (final NumberFormatException e) {
  1072             if (radix == 10) {
  1073                 return Double.valueOf(valueString);
  1076             double value = 0.0;
  1078             for (int i = 0; i < valueString.length(); i++) {
  1079                 final char ch = valueString.charAt(i);
  1080                 // Preverified, should always be a valid digit.
  1081                 final int digit = convertDigit(ch, radix);
  1082                 value *= radix;
  1083                 value += digit;
  1086             return value;
  1090     /**
  1091      * Scan a number.
  1092      */
  1093     protected void scanNumber() {
  1094         // Record beginning of number.
  1095         final int start = position;
  1096         // Assume value is a decimal.
  1097         TokenType type = DECIMAL;
  1099         // First digit of number.
  1100         int digit = convertDigit(ch0, 10);
  1102         // If number begins with 0x.
  1103         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
  1104             // Skip over 0xN.
  1105             skip(3);
  1106             // Skip over remaining digits.
  1107             while (convertDigit(ch0, 16) != -1) {
  1108                 skip(1);
  1111             type = HEXADECIMAL;
  1112         } else {
  1113             // Check for possible octal constant.
  1114             boolean octal = digit == 0;
  1115             // Skip first digit if not leading '.'.
  1116             if (digit != -1) {
  1117                 skip(1);
  1120             // Skip remaining digits.
  1121             while ((digit = convertDigit(ch0, 10)) != -1) {
  1122                 // Check octal only digits.
  1123                 octal = octal && digit < 8;
  1124                 // Skip digit.
  1125                 skip(1);
  1128             if (octal && position - start > 1) {
  1129                 type = OCTAL;
  1130             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
  1131                 // Must be a double.
  1132                 if (ch0 == '.') {
  1133                     // Skip period.
  1134                     skip(1);
  1135                     // Skip mantissa.
  1136                     while (convertDigit(ch0, 10) != -1) {
  1137                         skip(1);
  1141                 // Detect exponent.
  1142                 if (ch0 == 'E' || ch0 == 'e') {
  1143                     // Skip E.
  1144                     skip(1);
  1145                     // Detect and skip exponent sign.
  1146                     if (ch0 == '+' || ch0 == '-') {
  1147                         skip(1);
  1149                     // Skip exponent.
  1150                     while (convertDigit(ch0, 10) != -1) {
  1151                         skip(1);
  1155                 type = FLOATING;
  1159         if (Character.isJavaIdentifierStart(ch0)) {
  1160             error(Lexer.message("missing.space.after.number"), type, position, 1);
  1163         // Add number token.
  1164         add(type, start);
  1167     /**
  1168      * Convert a regex token to a token object.
  1170      * @param start  Position in source content.
  1171      * @param length Length of regex token.
  1172      * @return Regex token object.
  1173      */
  1174     XMLToken valueOfXML(final int start, final int length) {
  1175         return new XMLToken(source.getString(start, length));
  1178     /**
  1179      * Scan over a XML token.
  1181      * @return TRUE if is an XML literal.
  1182      */
  1183     private boolean scanXMLLiteral() {
  1184         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
  1185         if (XML_LITERALS) {
  1186             // Record beginning of xml expression.
  1187             final int start = position;
  1189             int openCount = 0;
  1191             do {
  1192                 if (ch0 == '<') {
  1193                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
  1194                         skip(3);
  1195                         openCount--;
  1196                     } else if (Character.isJavaIdentifierStart(ch1)) {
  1197                         skip(2);
  1198                         openCount++;
  1199                     } else if (ch1 == '?') {
  1200                         skip(2);
  1201                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
  1202                         skip(4);
  1203                     } else {
  1204                         reset(start);
  1205                         return false;
  1208                     while (!atEOF() && ch0 != '>') {
  1209                         if (ch0 == '/' && ch1 == '>') {
  1210                             openCount--;
  1211                             skip(1);
  1212                             break;
  1213                         } else if (ch0 == '\"' || ch0 == '\'') {
  1214                             scanString(false);
  1215                         } else {
  1216                             skip(1);
  1220                     if (ch0 != '>') {
  1221                         reset(start);
  1222                         return false;
  1225                     skip(1);
  1226                 } else if (atEOF()) {
  1227                     reset(start);
  1228                     return false;
  1229                 } else {
  1230                     skip(1);
  1232             } while (openCount > 0);
  1234             add(XML, start);
  1235             return true;
  1238         return false;
  1241     /**
  1242      * Scan over identifier characters.
  1244      * @return Length of identifier or zero if none found.
  1245      */
  1246     private int scanIdentifier() {
  1247         final int start = position;
  1249         // Make sure first character is valid start character.
  1250         if (ch0 == '\\' && ch1 == 'u') {
  1251             skip(2);
  1252             final int ch = hexSequence(4, TokenType.IDENT);
  1254             if (!Character.isJavaIdentifierStart(ch)) {
  1255                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
  1257         } else if (!Character.isJavaIdentifierStart(ch0)) {
  1258             // Not an identifier.
  1259             return 0;
  1262         // Make sure remaining characters are valid part characters.
  1263         while (!atEOF()) {
  1264             if (ch0 == '\\' && ch1 == 'u') {
  1265                 skip(2);
  1266                 final int ch = hexSequence(4, TokenType.IDENT);
  1268                 if (!Character.isJavaIdentifierPart(ch)) {
  1269                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
  1271             } else if (Character.isJavaIdentifierPart(ch0)) {
  1272                 skip(1);
  1273             } else {
  1274                 break;
  1278         // Length of identifier sequence.
  1279         return position - start;
  1282     /**
  1283      * Compare two identifiers (in content) for equality.
  1285      * @param aStart  Start of first identifier.
  1286      * @param aLength Length of first identifier.
  1287      * @param bStart  Start of second identifier.
  1288      * @param bLength Length of second identifier.
  1289      * @return True if equal.
  1290      */
  1291     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
  1292         if (aLength == bLength) {
  1293             for (int i = 0; i < aLength; i++) {
  1294                 if (content[aStart + i] != content[bStart + i]) {
  1295                     return false;
  1299             return true;
  1302         return false;
  1305     /**
  1306      * Detect if a line starts with a marker identifier.
  1308      * @param identStart  Start of identifier.
  1309      * @param identLength Length of identifier.
  1310      * @return True if detected.
  1311      */
  1312     private boolean hasHereMarker(final int identStart, final int identLength) {
  1313         // Skip any whitespace.
  1314         skipWhitespace(false);
  1316         return identifierEqual(identStart, identLength, position, scanIdentifier());
  1319     /**
  1320      * Lexer to service edit strings.
  1321      */
  1322     private static class EditStringLexer extends Lexer {
  1323         /** Type of string literals to emit. */
  1324         final TokenType stringType;
  1326         /*
  1327          * Constructor.
  1328          */
  1330         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
  1331             super(lexer, stringState);
  1333             this.stringType = stringType;
  1336         /**
  1337          * Lexify the contents of the string.
  1338          */
  1339         @Override
  1340         public void lexify() {
  1341             // Record start of string position.
  1342             int stringStart = position;
  1343             // Indicate that the priming first string has not been emitted.
  1344             boolean primed = false;
  1346             while (true) {
  1347                 // Detect end of content.
  1348                 if (atEOF()) {
  1349                     break;
  1352                 // Honour escapes (should be well formed.)
  1353                 if (ch0 == '\\' && stringType == ESCSTRING) {
  1354                     skip(2);
  1356                     continue;
  1359                 // If start of expression.
  1360                 if (ch0 == '$' && ch1 == '{') {
  1361                     if (!primed || stringStart != position) {
  1362                         if (primed) {
  1363                             add(ADD, stringStart, stringStart + 1);
  1366                         add(stringType, stringStart, position);
  1367                         primed = true;
  1370                     // Skip ${
  1371                     skip(2);
  1373                     // Save expression state.
  1374                     final State expressionState = saveState();
  1376                     // Start with one open brace.
  1377                     int braceCount = 1;
  1379                     // Scan for the rest of the string.
  1380                     while (!atEOF()) {
  1381                         // If closing brace.
  1382                         if (ch0 == '}') {
  1383                             // Break only only if matching brace.
  1384                             if (--braceCount == 0) {
  1385                                 break;
  1387                         } else if (ch0 == '{') {
  1388                             // Bump up the brace count.
  1389                             braceCount++;
  1392                         // Skip to next character.
  1393                         skip(1);
  1396                     // If braces don't match then report an error.
  1397                     if (braceCount != 0) {
  1398                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
  1401                     // Mark end of expression.
  1402                     expressionState.setLimit(position);
  1403                     // Skip closing brace.
  1404                     skip(1);
  1406                     // Start next string.
  1407                     stringStart = position;
  1409                     // Concatenate expression.
  1410                     add(ADD, expressionState.position, expressionState.position + 1);
  1411                     add(LPAREN, expressionState.position, expressionState.position + 1);
  1413                     // Scan expression.
  1414                     final Lexer lexer = new Lexer(this, expressionState);
  1415                     lexer.lexify();
  1417                     // Close out expression parenthesis.
  1418                     add(RPAREN, position - 1, position);
  1420                     continue;
  1423                 // Next character in string.
  1424                 skip(1);
  1427             // If there is any unemitted string portion.
  1428             if (stringStart != limit) {
  1429                 // Concatenate remaining string.
  1430                 if (primed) {
  1431                     add(ADD, stringStart, 1);
  1434                 add(stringType, stringStart, limit);
  1440     /**
  1441      * Edit string for nested expressions.
  1443      * @param stringType  Type of string literals to emit.
  1444      * @param stringState State of lexer at start of string.
  1445      */
  1446     private void editString(final TokenType stringType, final State stringState) {
  1447         // Use special lexer to scan string.
  1448         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
  1449         lexer.lexify();
  1451         // Need to keep lexer informed.
  1452         last = stringType;
  1455     /**
  1456      * Scan over a here string.
  1458      * @return TRUE if is a here string.
  1459      */
  1460     private boolean scanHereString(final LineInfoReceiver lir) {
  1461         assert ch0 == '<' && ch1 == '<';
  1462         if (scripting) {
  1463             // Record beginning of here string.
  1464             final State saved = saveState();
  1466             // << or <<<
  1467             final boolean excludeLastEOL = ch2 != '<';
  1469             if (excludeLastEOL) {
  1470                 skip(2);
  1471             } else {
  1472                 skip(3);
  1475             // Scan identifier.
  1476             final int identStart = position;
  1477             final int identLength = scanIdentifier();
  1479             // Check for identifier.
  1480             if (identLength == 0) {
  1481                 // Treat as shift.
  1482                 restoreState(saved);
  1484                 return false;
  1487             // Record rest of line.
  1488             final State restState = saveState();
  1489             // keep line number updated
  1490             int lastLine = line;
  1492             skipLine(false);
  1493             lastLine++;
  1494             int lastLinePosition = position;
  1495             restState.setLimit(position);
  1497             // Record beginning of string.
  1498             final State stringState = saveState();
  1499             int stringEnd = position;
  1501             // Hunt down marker.
  1502             while (!atEOF()) {
  1503                 // Skip any whitespace.
  1504                 skipWhitespace(false);
  1506                 if (hasHereMarker(identStart, identLength)) {
  1507                     break;
  1510                 skipLine(false);
  1511                 lastLine++;
  1512                 lastLinePosition = position;
  1513                 stringEnd = position;
  1516             // notify last line information
  1517             lir.lineInfo(lastLine, lastLinePosition);
  1519             // Record end of string.
  1520             stringState.setLimit(stringEnd);
  1522             // If marker is missing.
  1523             if (stringState.isEmpty() || atEOF()) {
  1524                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
  1525                 restoreState(saved);
  1527                 return false;
  1530             // Remove last end of line if specified.
  1531             if (excludeLastEOL) {
  1532                 // Handles \n.
  1533                 if (content[stringEnd - 1] == '\n') {
  1534                     stringEnd--;
  1537                 // Handles \r and \r\n.
  1538                 if (content[stringEnd - 1] == '\r') {
  1539                     stringEnd--;
  1542                 // Update end of string.
  1543                 stringState.setLimit(stringEnd);
  1546             // Edit string if appropriate.
  1547             if (scripting && !stringState.isEmpty()) {
  1548                 editString(STRING, stringState);
  1549             } else {
  1550                 // Add here string.
  1551                 add(STRING, stringState.position, stringState.limit);
  1554             // Scan rest of original line.
  1555             final Lexer restLexer = new Lexer(this, restState);
  1557             restLexer.lexify();
  1559             return true;
  1562         return false;
  1565     /**
  1566      * Breaks source content down into lex units, adding tokens to the token
  1567      * stream. The routine scans until the stream buffer is full. Can be called
  1568      * repeatedly until EOF is detected.
  1569      */
  1570     public void lexify() {
  1571         while (!stream.isFull() || nested) {
  1572             // Skip over whitespace.
  1573             skipWhitespace(true);
  1575             // Detect end of file.
  1576             if (atEOF()) {
  1577                 if (!nested) {
  1578                     // Add an EOF token at the end.
  1579                     add(EOF, position);
  1582                 break;
  1585             // Check for comments. Note that we don't scan for regexp and other literals here as
  1586             // we may not have enough context to distinguish them from similar looking operators.
  1587             // Instead we break on ambiguous operators below and let the parser decide.
  1588             if (ch0 == '/' && skipComments()) {
  1589                 continue;
  1592             if (scripting && ch0 == '#' && skipComments()) {
  1593                 continue;
  1596             // TokenType for lookup of delimiter or operator.
  1597             TokenType type;
  1599             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
  1600                 // '.' followed by digit.
  1601                 // Scan and add a number.
  1602                 scanNumber();
  1603             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
  1604                 // Get the number of characters in the token.
  1605                 final int typeLength = type.getLength();
  1606                 // Skip that many characters.
  1607                 skip(typeLength);
  1608                 // Add operator token.
  1609                 add(type, position - typeLength);
  1610                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
  1611                 // We break to let the parser decide what it is.
  1612                 if (canStartLiteral(type)) {
  1613                     break;
  1614                 } else if (type == LBRACE && pauseOnNextLeftBrace) {
  1615                     pauseOnNextLeftBrace = false;
  1616                     break;
  1618             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
  1619                 // Scan and add identifier or keyword.
  1620                 scanIdentifierOrKeyword();
  1621             } else if (isStringDelimiter(ch0)) {
  1622                 // Scan and add a string.
  1623                 scanString(true);
  1624             } else if (Character.isDigit(ch0)) {
  1625                 // Scan and add a number.
  1626                 scanNumber();
  1627             } else {
  1628                 // Don't recognize this character.
  1629                 skip(1);
  1630                 add(ERROR, position - 1);
  1635     /**
  1636      * Return value of token given its token descriptor.
  1638      * @param token  Token descriptor.
  1639      * @return JavaScript value.
  1640      */
  1641     Object getValueOf(final long token, final boolean strict) {
  1642         final int start = Token.descPosition(token);
  1643         final int len   = Token.descLength(token);
  1645         switch (Token.descType(token)) {
  1646         case DECIMAL:
  1647             return Lexer.valueOf(source.getString(start, len), 10); // number
  1648         case OCTAL:
  1649             return Lexer.valueOf(source.getString(start, len), 8); // number
  1650         case HEXADECIMAL:
  1651             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
  1652         case FLOATING:
  1653             final String str   = source.getString(start, len);
  1654             final double value = Double.valueOf(str);
  1655             if (str.indexOf('.') != -1) {
  1656                 return value; //number
  1658             //anything without an explicit decimal point is still subject to a
  1659             //"representable as int or long" check. Then the programmer does not
  1660             //explicitly code something as a double. For example new Color(int, int, int)
  1661             //and new Color(float, float, float) will get ambiguous for cases like
  1662             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
  1663             //yet we don't want e.g. 1e6 to be a double unnecessarily
  1664             if (JSType.isRepresentableAsInt(value) && !JSType.isNegativeZero(value)) {
  1665                 return (int)value;
  1666             } else if (JSType.isRepresentableAsLong(value) && !JSType.isNegativeZero(value)) {
  1667                 return (long)value;
  1669             return value;
  1670         case STRING:
  1671             return source.getString(start, len); // String
  1672         case ESCSTRING:
  1673             return valueOfString(start, len, strict); // String
  1674         case IDENT:
  1675             return valueOfIdent(start, len); // String
  1676         case REGEX:
  1677             return valueOfPattern(start, len); // RegexToken::LexerToken
  1678         case XML:
  1679             return valueOfXML(start, len); // XMLToken::LexerToken
  1680         case DIRECTIVE_COMMENT:
  1681             return source.getString(start, len);
  1682         default:
  1683             break;
  1686         return null;
  1689     /**
  1690      * Get the correctly localized error message for a given message id format arguments
  1691      * @param msgId message id
  1692      * @param args  format arguments
  1693      * @return message
  1694      */
  1695     protected static String message(final String msgId, final String... args) {
  1696         return ECMAErrors.getMessage("lexer.error." + msgId, args);
  1699     /**
  1700      * Generate a runtime exception
  1702      * @param message       error message
  1703      * @param type          token type
  1704      * @param start         start position of lexed error
  1705      * @param length        length of lexed error
  1706      * @throws ParserException  unconditionally
  1707      */
  1708     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
  1709         final long token     = Token.toDesc(type, start, length);
  1710         final int  pos       = Token.descPosition(token);
  1711         final int  lineNum   = source.getLine(pos);
  1712         final int  columnNum = source.getColumn(pos);
  1713         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
  1714         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
  1717     /**
  1718      * Helper class for Lexer tokens, e.g XML or RegExp tokens.
  1719      * This is the abstract superclass
  1720      */
  1721     public static abstract class LexerToken implements Serializable {
  1722         private static final long serialVersionUID = 1L;
  1724         private final String expression;
  1726         /**
  1727          * Constructor
  1728          * @param expression token expression
  1729          */
  1730         protected LexerToken(final String expression) {
  1731             this.expression = expression;
  1734         /**
  1735          * Get the expression
  1736          * @return expression
  1737          */
  1738         public String getExpression() {
  1739             return expression;
  1743     /**
  1744      * Temporary container for regular expressions.
  1745      */
  1746     public static class RegexToken extends LexerToken {
  1747         private static final long serialVersionUID = 1L;
  1749         /** Options. */
  1750         private final String options;
  1752         /**
  1753          * Constructor.
  1755          * @param expression  regexp expression
  1756          * @param options     regexp options
  1757          */
  1758         public RegexToken(final String expression, final String options) {
  1759             super(expression);
  1760             this.options = options;
  1763         /**
  1764          * Get regexp options
  1765          * @return options
  1766          */
  1767         public String getOptions() {
  1768             return options;
  1771         @Override
  1772         public String toString() {
  1773             return '/' + getExpression() + '/' + options;
  1777     /**
  1778      * Temporary container for XML expression.
  1779      */
  1780     public static class XMLToken extends LexerToken {
  1781         private static final long serialVersionUID = 1L;
  1783         /**
  1784          * Constructor.
  1786          * @param expression  XML expression
  1787          */
  1788         public XMLToken(final String expression) {
  1789             super(expression);

mercurial