jdk8-mips64-public/nashorn: src/jdk/nashorn/internal/parser/Lexer.java@8c51767d534d

     1 /*

     2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package jdk.nashorn.internal.parser;

    28 import static jdk.nashorn.internal.parser.TokenType.ADD;

    29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;

    30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;

    31 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;

    32 import static jdk.nashorn.internal.parser.TokenType.EOF;

    33 import static jdk.nashorn.internal.parser.TokenType.EOL;

    34 import static jdk.nashorn.internal.parser.TokenType.ERROR;

    35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;

    36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;

    37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;

    38 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;

    39 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;

    40 import static jdk.nashorn.internal.parser.TokenType.LBRACE;

    41 import static jdk.nashorn.internal.parser.TokenType.LPAREN;

    42 import static jdk.nashorn.internal.parser.TokenType.OCTAL;

    43 import static jdk.nashorn.internal.parser.TokenType.RBRACE;

    44 import static jdk.nashorn.internal.parser.TokenType.REGEX;

    45 import static jdk.nashorn.internal.parser.TokenType.RPAREN;

    46 import static jdk.nashorn.internal.parser.TokenType.STRING;

    47 import static jdk.nashorn.internal.parser.TokenType.XML;

    49 import java.io.Serializable;

    50 import jdk.nashorn.internal.runtime.ECMAErrors;

    51 import jdk.nashorn.internal.runtime.ErrorManager;

    52 import jdk.nashorn.internal.runtime.JSErrorType;

    53 import jdk.nashorn.internal.runtime.JSType;

    54 import jdk.nashorn.internal.runtime.ParserException;

    55 import jdk.nashorn.internal.runtime.Source;

    56 import jdk.nashorn.internal.runtime.options.Options;

    58 /**

    59  * Responsible for converting source content into a stream of tokens.

    60  *

    61  */

    62 @SuppressWarnings("fallthrough")

    63 public class Lexer extends Scanner {

    64     private static final long MIN_INT_L = Integer.MIN_VALUE;

    65     private static final long MAX_INT_L = Integer.MAX_VALUE;

    67     private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");

    69     /** Content source. */

    70     private final Source source;

    72     /** Buffered stream for tokens. */

    73     private final TokenStream stream;

    75     /** True if here and edit strings are supported. */

    76     private final boolean scripting;

    78     /** True if a nested scan. (scan to completion, no EOF.) */

    79     private final boolean nested;

    81     /** Pending new line number and position. */

    82     int pendingLine;

    84     /** Position of last EOL + 1. */

    85     private int linePosition;

    87     /** Type of last token added. */

    88     private TokenType last;

    90     private final boolean pauseOnFunctionBody;

    91     private boolean pauseOnNextLeftBrace;

    93     private static final String SPACETAB = " \t";  // ASCII space and tab

    94     private static final String LFCR     = "\n\r"; // line feed and carriage return (ctrl-m)

    96     private static final String JSON_WHITESPACE_EOL = LFCR;

    97     private static final String JSON_WHITESPACE     = SPACETAB + LFCR;

    99     private static final String JAVASCRIPT_WHITESPACE_EOL =

   100         LFCR +

   101         "\u2028" + // line separator

   102         "\u2029"   // paragraph separator

   103         ;

   104     private static final String JAVASCRIPT_WHITESPACE =

   105         SPACETAB +

   106         JAVASCRIPT_WHITESPACE_EOL +

   107         "\u000b" + // tabulation line

   108         "\u000c" + // ff (ctrl-l)

   109         "\u00a0" + // Latin-1 space

   110         "\u1680" + // Ogham space mark

   111         "\u180e" + // separator, Mongolian vowel

   112         "\u2000" + // en quad

   113         "\u2001" + // em quad

   114         "\u2002" + // en space

   115         "\u2003" + // em space

   116         "\u2004" + // three-per-em space

   117         "\u2005" + // four-per-em space

   118         "\u2006" + // six-per-em space

   119         "\u2007" + // figure space

   120         "\u2008" + // punctuation space

   121         "\u2009" + // thin space

   122         "\u200a" + // hair space

   123         "\u202f" + // narrow no-break space

   124         "\u205f" + // medium mathematical space

   125         "\u3000" + // ideographic space

   126         "\ufeff"   // byte order mark

   127         ;

   129     private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =

   130         "\\u000a" + // line feed

   131         "\\u000d" + // carriage return (ctrl-m)

   132         "\\u2028" + // line separator

   133         "\\u2029" + // paragraph separator

   134         "\\u0009" + // tab

   135         "\\u0020" + // ASCII space

   136         "\\u000b" + // tabulation line

   137         "\\u000c" + // ff (ctrl-l)

   138         "\\u00a0" + // Latin-1 space

   139         "\\u1680" + // Ogham space mark

   140         "\\u180e" + // separator, Mongolian vowel

   141         "\\u2000" + // en quad

   142         "\\u2001" + // em quad

   143         "\\u2002" + // en space

   144         "\\u2003" + // em space

   145         "\\u2004" + // three-per-em space

   146         "\\u2005" + // four-per-em space

   147         "\\u2006" + // six-per-em space

   148         "\\u2007" + // figure space

   149         "\\u2008" + // punctuation space

   150         "\\u2009" + // thin space

   151         "\\u200a" + // hair space

   152         "\\u202f" + // narrow no-break space

   153         "\\u205f" + // medium mathematical space

   154         "\\u3000" + // ideographic space

   155         "\\ufeff"   // byte order mark

   156         ;

   158     static String unicodeEscape(final char ch) {

   159         final StringBuilder sb = new StringBuilder();

   161         sb.append("\\u");

   163         final String hex = Integer.toHexString(ch);

   164         for (int i = hex.length(); i < 4; i++) {

   165             sb.append('0');

   166         }

   167         sb.append(hex);

   169         return sb.toString();

   170     }

   172     /**

   173      * Constructor

   174      *

   175      * @param source    the source

   176      * @param stream    the token stream to lex

   177      */

   178     public Lexer(final Source source, final TokenStream stream) {

   179         this(source, stream, false);

   180     }

   182     /**

   183      * Constructor

   184      *

   185      * @param source    the source

   186      * @param stream    the token stream to lex

   187      * @param scripting are we in scripting mode

   188      */

   189     public Lexer(final Source source, final TokenStream stream, final boolean scripting) {

   190         this(source, 0, source.getLength(), stream, scripting, false);

   191     }

   193     /**

   194      * Constructor

   195      *

   196      * @param source    the source

   197      * @param start     start position in source from which to start lexing

   198      * @param len       length of source segment to lex

   199      * @param stream    token stream to lex

   200      * @param scripting are we in scripting mode

   201      * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a

   202      * function body. This is used with the feature where the parser is skipping nested function bodies to

   203      * avoid reading ahead unnecessarily when we skip the function bodies.

   204      */

   206     public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {

   207         super(source.getContent(), 1, start, len);

   208         this.source      = source;

   209         this.stream      = stream;

   210         this.scripting   = scripting;

   211         this.nested      = false;

   212         this.pendingLine = 1;

   213         this.last        = EOL;

   215         this.pauseOnFunctionBody = pauseOnFunctionBody;

   216     }

   218     private Lexer(final Lexer lexer, final State state) {

   219         super(lexer, state);

   221         source = lexer.source;

   222         stream = lexer.stream;

   223         scripting = lexer.scripting;

   224         nested = true;

   226         pendingLine = state.pendingLine;

   227         linePosition = state.linePosition;

   228         last = EOL;

   229         pauseOnFunctionBody = false;

   230     }

   232     static class State extends Scanner.State {

   233         /** Pending new line number and position. */

   234         public final int pendingLine;

   236         /** Position of last EOL + 1. */

   237         public final int linePosition;

   239         /** Type of last token added. */

   240         public final TokenType last;

   242         /*

   243          * Constructor.

   244          */

   246         State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {

   247             super(position, limit, line);

   249             this.pendingLine = pendingLine;

   250             this.linePosition = linePosition;

   251             this.last = last;

   252         }

   253     }

   255     /**

   256      * Save the state of the scan.

   257      *

   258      * @return Captured state.

   259      */

   260     @Override

   261     State saveState() {

   262         return new State(position, limit, line, pendingLine, linePosition, last);

   263     }

   265     /**

   266      * Restore the state of the scan.

   267      *

   268      * @param state

   269      *            Captured state.

   270      */

   271     void restoreState(final State state) {

   272         super.restoreState(state);

   274         pendingLine = state.pendingLine;

   275         linePosition = state.linePosition;

   276         last = state.last;

   277     }

   279     /**

   280      * Add a new token to the stream.

   281      *

   282      * @param type

   283      *            Token type.

   284      * @param start

   285      *            Start position.

   286      * @param end

   287      *            End position.

   288      */

   289     protected void add(final TokenType type, final int start, final int end) {

   290         // Record last token.

   291         last = type;

   293         // Only emit the last EOL in a cluster.

   294         if (type == EOL) {

   295             pendingLine = end;

   296             linePosition = start;

   297         } else {

   298             // Write any pending EOL to stream.

   299             if (pendingLine != -1) {

   300                 stream.put(Token.toDesc(EOL, linePosition, pendingLine));

   301                 pendingLine = -1;

   302             }

   304             // Write token to stream.

   305             stream.put(Token.toDesc(type, start, end - start));

   306         }

   307     }

   309     /**

   310      * Add a new token to the stream.

   311      *

   312      * @param type

   313      *            Token type.

   314      * @param start

   315      *            Start position.

   316      */

   317     protected void add(final TokenType type, final int start) {

   318         add(type, start, position);

   319     }

   321     /**

   322      * Return the String of valid whitespace characters for regular

   323      * expressions in JavaScript

   324      * @return regexp whitespace string

   325      */

   326     public static String getWhitespaceRegExp() {

   327         return JAVASCRIPT_WHITESPACE_IN_REGEXP;

   328     }

   330     /**

   331      * Skip end of line.

   332      *

   333      * @param addEOL true if EOL token should be recorded.

   334      */

   335     private void skipEOL(final boolean addEOL) {

   337         if (ch0 == '\r') { // detect \r\n pattern

   338             skip(1);

   339             if (ch0 == '\n') {

   340                 skip(1);

   341             }

   342         } else { // all other space, ch0 is guaranteed to be EOL or \0

   343             skip(1);

   344         }

   346         // bump up line count

   347         line++;

   349         if (addEOL) {

   350             // Add an EOL token.

   351             add(EOL, position, line);

   352         }

   353     }

   355     /**

   356      * Skip over rest of line including end of line.

   357      *

   358      * @param addEOL true if EOL token should be recorded.

   359      */

   360     private void skipLine(final boolean addEOL) {

   361         // Ignore characters.

   362         while (!isEOL(ch0) && !atEOF()) {

   363             skip(1);

   364         }

   365         // Skip over end of line.

   366         skipEOL(addEOL);

   367     }

   369     /**

   370      * Test whether a char is valid JavaScript whitespace

   371      * @param ch a char

   372      * @return true if valid JavaScript whitespace

   373      */

   374     public static boolean isJSWhitespace(final char ch) {

   375         return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;

   376     }

   378     /**

   379      * Test whether a char is valid JavaScript end of line

   380      * @param ch a char

   381      * @return true if valid JavaScript end of line

   382      */

   383     public static boolean isJSEOL(final char ch) {

   384         return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;

   385     }

   387     /**

   388      * Test whether a char is valid JSON whitespace

   389      * @param ch a char

   390      * @return true if valid JSON whitespace

   391      */

   392     public static boolean isJsonWhitespace(final char ch) {

   393         return JSON_WHITESPACE.indexOf(ch) != -1;

   394     }

   396     /**

   397      * Test whether a char is valid JSON end of line

   398      * @param ch a char

   399      * @return true if valid JSON end of line

   400      */

   401     public static boolean isJsonEOL(final char ch) {

   402         return JSON_WHITESPACE_EOL.indexOf(ch) != -1;

   403     }

   405     /**

   406      * Test if char is a string delimiter, e.g. '\' or '"'.  Also scans exec

   407      * strings ('`') in scripting mode.

   408      * @param ch a char

   409      * @return true if string delimiter

   410      */

   411     protected boolean isStringDelimiter(final char ch) {

   412         return ch == '\'' || ch == '"' || (scripting && ch == '`');

   413     }

   415     /**

   416      * Test whether a char is valid JavaScript whitespace

   417      * @param ch a char

   418      * @return true if valid JavaScript whitespace

   419      */

   420     protected boolean isWhitespace(final char ch) {

   421         return Lexer.isJSWhitespace(ch);

   422     }

   424     /**

   425      * Test whether a char is valid JavaScript end of line

   426      * @param ch a char

   427      * @return true if valid JavaScript end of line

   428      */

   429     protected boolean isEOL(final char ch) {

   430         return Lexer.isJSEOL(ch);

   431     }

   433     /**

   434      * Skip over whitespace and detect end of line, adding EOL tokens if

   435      * encountered.

   436      *

   437      * @param addEOL true if EOL tokens should be recorded.

   438      */

   439     private void skipWhitespace(final boolean addEOL) {

   440         while (isWhitespace(ch0)) {

   441             if (isEOL(ch0)) {

   442                 skipEOL(addEOL);

   443             } else {

   444                 skip(1);

   445             }

   446         }

   447     }

   449     /**

   450      * Skip over comments.

   451      *

   452      * @return True if a comment.

   453      */

   454     protected boolean skipComments() {

   455         // Save the current position.

   456         final int start = position;

   458         if (ch0 == '/') {

   459             // Is it a // comment.

   460             if (ch1 == '/') {

   461                 // Skip over //.

   462                 skip(2);

   464                 boolean directiveComment = false;

   465                 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {

   466                     directiveComment = true;

   467                 }

   469                 // Scan for EOL.

   470                 while (!atEOF() && !isEOL(ch0)) {

   471                     skip(1);

   472                 }

   473                 // Did detect a comment.

   474                 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);

   475                 return true;

   476             } else if (ch1 == '*') {

   477                 // Skip over /*.

   478                 skip(2);

   479                 // Scan for */.

   480                 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {

   481                     // If end of line handle else skip character.

   482                     if (isEOL(ch0)) {

   483                         skipEOL(true);

   484                     } else {

   485                         skip(1);

   486                     }

   487                 }

   489                 if (atEOF()) {

   490                     // TODO - Report closing */ missing in parser.

   491                     add(ERROR, start);

   492                 } else {

   493                     // Skip */.

   494                     skip(2);

   495                 }

   497                 // Did detect a comment.

   498                 add(COMMENT, start);

   499                 return true;

   500             }

   501         } else if (ch0 == '#') {

   502             assert scripting;

   503             // shell style comment

   504             // Skip over #.

   505             skip(1);

   506             // Scan for EOL.

   507             while (!atEOF() && !isEOL(ch0)) {

   508                 skip(1);

   509             }

   510             // Did detect a comment.

   511             add(COMMENT, start);

   512             return true;

   513         }

   515         // Not a comment.

   516         return false;

   517     }

   519     /**

   520      * Convert a regex token to a token object.

   521      *

   522      * @param start  Position in source content.

   523      * @param length Length of regex token.

   524      * @return Regex token object.

   525      */

   526     public RegexToken valueOfPattern(final int start, final int length) {

   527         // Save the current position.

   528         final int savePosition = position;

   529         // Reset to beginning of content.

   530         reset(start);

   531         // Buffer for recording characters.

   532         final StringBuilder sb = new StringBuilder(length);

   534         // Skip /.

   535         skip(1);

   536         boolean inBrackets = false;

   537         // Scan for closing /, stopping at end of line.

   538         while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {

   539             // Skip over escaped character.

   540             if (ch0 == '\\') {

   541                 sb.append(ch0);

   542                 sb.append(ch1);

   543                 skip(2);

   544             } else {

   545                 if (ch0 == '[') {

   546                     inBrackets = true;

   547                 } else if (ch0 == ']') {

   548                     inBrackets = false;

   549                 }

   551                 // Skip literal character.

   552                 sb.append(ch0);

   553                 skip(1);

   554             }

   555         }

   557         // Get pattern as string.

   558         final String regex = sb.toString();

   560         // Skip /.

   561         skip(1);

   563         // Options as string.

   564         final String options = source.getString(position, scanIdentifier());

   566         reset(savePosition);

   568         // Compile the pattern.

   569         return new RegexToken(regex, options);

   570     }

   572     /**

   573      * Return true if the given token can be the beginning of a literal.

   574      *

   575      * @param token a token

   576      * @return true if token can start a literal.

   577      */

   578     public boolean canStartLiteral(final TokenType token) {

   579         return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));

   580     }

   582     /**

   583      * interface to receive line information for multi-line literals.

   584      */

   585     protected interface LineInfoReceiver {

   586         /**

   587          * Receives line information

   588          * @param line last line number

   589          * @param linePosition position of last line

   590          */

   591         public void lineInfo(int line, int linePosition);

   592     }

   594     /**

   595      * Check whether the given token represents the beginning of a literal. If so scan

   596      * the literal and return <tt>true</tt>, otherwise return false.

   597      *

   598      * @param token the token.

   599      * @param startTokenType the token type.

   600      * @param lir LineInfoReceiver that receives line info for multi-line string literals.

   601      * @return True if a literal beginning with startToken was found and scanned.

   602      */

   603     protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {

   604         // Check if it can be a literal.

   605         if (!canStartLiteral(startTokenType)) {

   606             return false;

   607         }

   608         // We break on ambiguous tokens so if we already moved on it can't be a literal.

   609         if (stream.get(stream.last()) != token) {

   610             return false;

   611         }

   612         // Rewind to token start position

   613         reset(Token.descPosition(token));

   615         if (ch0 == '/') {

   616             return scanRegEx();

   617         } else if (ch0 == '<') {

   618             if (ch1 == '<') {

   619                 return scanHereString(lir);

   620             } else if (Character.isJavaIdentifierStart(ch1)) {

   621                 return scanXMLLiteral();

   622             }

   623         }

   625         return false;

   626     }

   628     /**

   629      * Scan over regex literal.

   630      *

   631      * @return True if a regex literal.

   632      */

   633     private boolean scanRegEx() {

   634         assert ch0 == '/';

   635         // Make sure it's not a comment.

   636         if (ch1 != '/' && ch1 != '*') {

   637             // Record beginning of literal.

   638             final int start = position;

   639             // Skip /.

   640             skip(1);

   641             boolean inBrackets = false;

   643             // Scan for closing /, stopping at end of line.

   644             while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {

   645                 // Skip over escaped character.

   646                 if (ch0 == '\\') {

   647                     skip(1);

   648                     if (isEOL(ch0)) {

   649                         reset(start);

   650                         return false;

   651                     }

   652                     skip(1);

   653                 } else {

   654                     if (ch0 == '[') {

   655                         inBrackets = true;

   656                     } else if (ch0 == ']') {

   657                         inBrackets = false;

   658                     }

   660                     // Skip literal character.

   661                     skip(1);

   662                 }

   663             }

   665             // If regex literal.

   666             if (ch0 == '/') {

   667                 // Skip /.

   668                 skip(1);

   670                 // Skip over options.

   671                 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {

   672                     skip(1);

   673                 }

   675                 // Add regex token.

   676                 add(REGEX, start);

   677                 // Regex literal detected.

   678                 return true;

   679             }

   681             // False start try again.

   682             reset(start);

   683         }

   685         // Regex literal not detected.

   686         return false;

   687     }

   689     /**

   690      * Convert a digit to a integer.  Can't use Character.digit since we are

   691      * restricted to ASCII by the spec.

   692      *

   693      * @param ch   Character to convert.

   694      * @param base Numeric base.

   695      *

   696      * @return The converted digit or -1 if invalid.

   697      */

   698     protected static int convertDigit(final char ch, final int base) {

   699         int digit;

   701         if ('0' <= ch && ch <= '9') {

   702             digit = ch - '0';

   703         } else if ('A' <= ch && ch <= 'Z') {

   704             digit = ch - 'A' + 10;

   705         } else if ('a' <= ch && ch <= 'z') {

   706             digit = ch - 'a' + 10;

   707         } else {

   708             return -1;

   709         }

   711         return digit < base ? digit : -1;

   712     }

   715     /**

   716      * Get the value of a hexadecimal numeric sequence.

   717      *

   718      * @param length Number of digits.

   719      * @param type   Type of token to report against.

   720      * @return Value of sequence or < 0 if no digits.

   721      */

   722     private int hexSequence(final int length, final TokenType type) {

   723         int value = 0;

   725         for (int i = 0; i < length; i++) {

   726             final int digit = convertDigit(ch0, 16);

   728             if (digit == -1) {

   729                 error(Lexer.message("invalid.hex"), type, position, limit);

   730                 return i == 0 ? -1 : value;

   731             }

   733             value = digit | value << 4;

   734             skip(1);

   735         }

   737         return value;

   738     }

   740     /**

   741      * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.

   742      *

   743      * @return Value of sequence.

   744      */

   745     private int octalSequence() {

   746         int value = 0;

   748         for (int i = 0; i < 3; i++) {

   749             final int digit = convertDigit(ch0, 8);

   751             if (digit == -1) {

   752                 break;

   753             }

   754             value = digit | value << 3;

   755             skip(1);

   757             if (i == 1 && value >= 32) {

   758                 break;

   759             }

   760         }

   761         return value;

   762     }

   764     /**

   765      * Convert a string to a JavaScript identifier.

   766      *

   767      * @param start  Position in source content.

   768      * @param length Length of token.

   769      * @return Ident string or null if an error.

   770      */

   771     private String valueOfIdent(final int start, final int length) throws RuntimeException {

   772         // Save the current position.

   773         final int savePosition = position;

   774         // End of scan.

   775         final int end = start + length;

   776         // Reset to beginning of content.

   777         reset(start);

   778         // Buffer for recording characters.

   779         final StringBuilder sb = new StringBuilder(length);

   781         // Scan until end of line or end of file.

   782         while (!atEOF() && position < end && !isEOL(ch0)) {

   783             // If escape character.

   784             if (ch0 == '\\' && ch1 == 'u') {

   785                 skip(2);

   786                 final int ch = hexSequence(4, TokenType.IDENT);

   787                 if (isWhitespace((char)ch)) {

   788                     return null;

   789                 }

   790                 if (ch < 0) {

   791                     sb.append('\\');

   792                     sb.append('u');

   793                 } else {

   794                     sb.append((char)ch);

   795                 }

   796             } else {

   797                 // Add regular character.

   798                 sb.append(ch0);

   799                 skip(1);

   800             }

   801         }

   803         // Restore position.

   804         reset(savePosition);

   806         return sb.toString();

   807     }

   809     /**

   810      * Scan over and identifier or keyword. Handles identifiers containing

   811      * encoded Unicode chars.

   812      *

   813      * Example:

   814      *

   815      * var \u0042 = 44;

   816      */

   817     private void scanIdentifierOrKeyword() {

   818         // Record beginning of identifier.

   819         final int start = position;

   820         // Scan identifier.

   821         final int length = scanIdentifier();

   822         // Check to see if it is a keyword.

   823         final TokenType type = TokenLookup.lookupKeyword(content, start, length);

   824         if (type == FUNCTION && pauseOnFunctionBody) {

   825             pauseOnNextLeftBrace = true;

   826         }

   827         // Add keyword or identifier token.

   828         add(type, start);

   829     }

   831     /**

   832      * Convert a string to a JavaScript string object.

   833      *

   834      * @param start  Position in source content.

   835      * @param length Length of token.

   836      * @return JavaScript string object.

   837      */

   838     private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {

   839         // Save the current position.

   840         final int savePosition = position;

   841         // Calculate the end position.

   842         final int end = start + length;

   843         // Reset to beginning of string.

   844         reset(start);

   846         // Buffer for recording characters.

   847         final StringBuilder sb = new StringBuilder(length);

   849         // Scan until end of string.

   850         while (position < end) {

   851             // If escape character.

   852             if (ch0 == '\\') {

   853                 skip(1);

   855                 final char next = ch0;

   856                 final int afterSlash = position;

   858                 skip(1);

   860                 // Special characters.

   861                 switch (next) {

   862                 case '0':

   863                 case '1':

   864                 case '2':

   865                 case '3':

   866                 case '4':

   867                 case '5':

   868                 case '6':

   869                 case '7': {

   870                     if (strict) {

   871                         // "\0" itself is allowed in strict mode. Only other 'real'

   872                         // octal escape sequences are not allowed (eg. "\02", "\31").

   873                         // See section 7.8.4 String literals production EscapeSequence

   874                         if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {

   875                             error(Lexer.message("strict.no.octal"), STRING, position, limit);

   876                         }

   877                     }

   878                     reset(afterSlash);

   879                     // Octal sequence.

   880                     final int ch = octalSequence();

   882                     if (ch < 0) {

   883                         sb.append('\\');

   884                         sb.append('x');

   885                     } else {

   886                         sb.append((char)ch);

   887                     }

   888                     break;

   889                 }

   890                 case 'n':

   891                     sb.append('\n');

   892                     break;

   893                 case 't':

   894                     sb.append('\t');

   895                     break;

   896                 case 'b':

   897                     sb.append('\b');

   898                     break;

   899                 case 'f':

   900                     sb.append('\f');

   901                     break;

   902                 case 'r':

   903                     sb.append('\r');

   904                     break;

   905                 case '\'':

   906                     sb.append('\'');

   907                     break;

   908                 case '\"':

   909                     sb.append('\"');

   910                     break;

   911                 case '\\':

   912                     sb.append('\\');

   913                     break;

   914                 case '\r': // CR | CRLF

   915                     if (ch0 == '\n') {

   916                         skip(1);

   917                     }

   918                     // fall through

   919                 case '\n': // LF

   920                 case '\u2028': // LS

   921                 case '\u2029': // PS

   922                     // continue on the next line, slash-return continues string

   923                     // literal

   924                     break;

   925                 case 'x': {

   926                     // Hex sequence.

   927                     final int ch = hexSequence(2, STRING);

   929                     if (ch < 0) {

   930                         sb.append('\\');

   931                         sb.append('x');

   932                     } else {

   933                         sb.append((char)ch);

   934                     }

   935                 }

   936                     break;

   937                 case 'u': {

   938                     // Unicode sequence.

   939                     final int ch = hexSequence(4, STRING);

   941                     if (ch < 0) {

   942                         sb.append('\\');

   943                         sb.append('u');

   944                     } else {

   945                         sb.append((char)ch);

   946                     }

   947                 }

   948                     break;

   949                 case 'v':

   950                     sb.append('\u000B');

   951                     break;

   952                 // All other characters.

   953                 default:

   954                     sb.append(next);

   955                     break;

   956                 }

   957             } else {

   958                 // Add regular character.

   959                 sb.append(ch0);

   960                 skip(1);

   961             }

   962         }

   964         // Restore position.

   965         reset(savePosition);

   967         return sb.toString();

   968     }

   970     /**

   971      * Scan over a string literal.

   972      * @param add true if we nare not just scanning but should actually modify the token stream

   973      */

   974     protected void scanString(final boolean add) {

   975         // Type of string.

   976         TokenType type = STRING;

   977         // Record starting quote.

   978         final char quote = ch0;

   979         // Skip over quote.

   980         skip(1);

   982         // Record beginning of string content.

   983         final State stringState = saveState();

   985         // Scan until close quote or end of line.

   986         while (!atEOF() && ch0 != quote && !isEOL(ch0)) {

   987             // Skip over escaped character.

   988             if (ch0 == '\\') {

   989                 type = ESCSTRING;

   990                 skip(1);

   991                 if (! isEscapeCharacter(ch0)) {

   992                     error(Lexer.message("invalid.escape.char"), STRING, position, limit);

   993                 }

   994                 if (isEOL(ch0)) {

   995                     // Multiline string literal

   996                     skipEOL(false);

   997                     continue;

   998                 }

   999             }

  1000             // Skip literal character.

  1001             skip(1);

  1002         }

  1004         // If close quote.

  1005         if (ch0 == quote) {

  1006             // Skip close quote.

  1007             skip(1);

  1008         } else {

  1009             error(Lexer.message("missing.close.quote"), STRING, position, limit);

  1010         }

  1012         // If not just scanning.

  1013         if (add) {

  1014             // Record end of string.

  1015             stringState.setLimit(position - 1);

  1017             if (scripting && !stringState.isEmpty()) {

  1018                 switch (quote) {

  1019                 case '`':

  1020                     // Mark the beginning of an exec string.

  1021                     add(EXECSTRING, stringState.position, stringState.limit);

  1022                     // Frame edit string with left brace.

  1023                     add(LBRACE, stringState.position, stringState.position);

  1024                     // Process edit string.

  1025                     editString(type, stringState);

  1026                     // Frame edit string with right brace.

  1027                     add(RBRACE, stringState.limit, stringState.limit);

  1028                     break;

  1029                 case '"':

  1030                     // Only edit double quoted strings.

  1031                     editString(type, stringState);

  1032                     break;

  1033                 case '\'':

  1034                     // Add string token without editing.

  1035                     add(type, stringState.position, stringState.limit);

  1036                     break;

  1037                 default:

  1038                     break;

  1039                 }

  1040             } else {

  1041                 /// Add string token without editing.

  1042                 add(type, stringState.position, stringState.limit);

  1043             }

  1044         }

  1045     }

  1047     /**

  1048      * Is the given character a valid escape char after "\" ?

  1049      *

  1050      * @param ch character to be checked

  1051      * @return if the given character is valid after "\"

  1052      */

  1053     protected boolean isEscapeCharacter(final char ch) {

  1054         return true;

  1055     }

  1057     /**

  1058      * Convert string to number.

  1059      *

  1060      * @param valueString  String to convert.

  1061      * @param radix        Numeric base.

  1062      * @return Converted number.

  1063      */

  1064     private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {

  1065         try {

  1066             final long value = Long.parseLong(valueString, radix);

  1067             if(value >= MIN_INT_L && value <= MAX_INT_L) {

  1068                 return Integer.valueOf((int)value);

  1069             }

  1070             return Long.valueOf(value);

  1071         } catch (final NumberFormatException e) {

  1072             if (radix == 10) {

  1073                 return Double.valueOf(valueString);

  1074             }

  1076             double value = 0.0;

  1078             for (int i = 0; i < valueString.length(); i++) {

  1079                 final char ch = valueString.charAt(i);

  1080                 // Preverified, should always be a valid digit.

  1081                 final int digit = convertDigit(ch, radix);

  1082                 value *= radix;

  1083                 value += digit;

  1084             }

  1086             return value;

  1087         }

  1088     }

  1090     /**

  1091      * Scan a number.

  1092      */

  1093     protected void scanNumber() {

  1094         // Record beginning of number.

  1095         final int start = position;

  1096         // Assume value is a decimal.

  1097         TokenType type = DECIMAL;

  1099         // First digit of number.

  1100         int digit = convertDigit(ch0, 10);

  1102         // If number begins with 0x.

  1103         if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {

  1104             // Skip over 0xN.

  1105             skip(3);

  1106             // Skip over remaining digits.

  1107             while (convertDigit(ch0, 16) != -1) {

  1108                 skip(1);

  1109             }

  1111             type = HEXADECIMAL;

  1112         } else {

  1113             // Check for possible octal constant.

  1114             boolean octal = digit == 0;

  1115             // Skip first digit if not leading '.'.

  1116             if (digit != -1) {

  1117                 skip(1);

  1118             }

  1120             // Skip remaining digits.

  1121             while ((digit = convertDigit(ch0, 10)) != -1) {

  1122                 // Check octal only digits.

  1123                 octal = octal && digit < 8;

  1124                 // Skip digit.

  1125                 skip(1);

  1126             }

  1128             if (octal && position - start > 1) {

  1129                 type = OCTAL;

  1130             } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {

  1131                 // Must be a double.

  1132                 if (ch0 == '.') {

  1133                     // Skip period.

  1134                     skip(1);

  1135                     // Skip mantissa.

  1136                     while (convertDigit(ch0, 10) != -1) {

  1137                         skip(1);

  1138                     }

  1139                 }

  1141                 // Detect exponent.

  1142                 if (ch0 == 'E' || ch0 == 'e') {

  1143                     // Skip E.

  1144                     skip(1);

  1145                     // Detect and skip exponent sign.

  1146                     if (ch0 == '+' || ch0 == '-') {

  1147                         skip(1);

  1148                     }

  1149                     // Skip exponent.

  1150                     while (convertDigit(ch0, 10) != -1) {

  1151                         skip(1);

  1152                     }

  1153                 }

  1155                 type = FLOATING;

  1156             }

  1157         }

  1159         if (Character.isJavaIdentifierStart(ch0)) {

  1160             error(Lexer.message("missing.space.after.number"), type, position, 1);

  1161         }

  1163         // Add number token.

  1164         add(type, start);

  1165     }

  1167     /**

  1168      * Convert a regex token to a token object.

  1169      *

  1170      * @param start  Position in source content.

  1171      * @param length Length of regex token.

  1172      * @return Regex token object.

  1173      */

  1174     XMLToken valueOfXML(final int start, final int length) {

  1175         return new XMLToken(source.getString(start, length));

  1176     }

  1178     /**

  1179      * Scan over a XML token.

  1180      *

  1181      * @return TRUE if is an XML literal.

  1182      */

  1183     private boolean scanXMLLiteral() {

  1184         assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);

  1185         if (XML_LITERALS) {

  1186             // Record beginning of xml expression.

  1187             final int start = position;

  1189             int openCount = 0;

  1191             do {

  1192                 if (ch0 == '<') {

  1193                     if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {

  1194                         skip(3);

  1195                         openCount--;

  1196                     } else if (Character.isJavaIdentifierStart(ch1)) {

  1197                         skip(2);

  1198                         openCount++;

  1199                     } else if (ch1 == '?') {

  1200                         skip(2);

  1201                     } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {

  1202                         skip(4);

  1203                     } else {

  1204                         reset(start);

  1205                         return false;

  1206                     }

  1208                     while (!atEOF() && ch0 != '>') {

  1209                         if (ch0 == '/' && ch1 == '>') {

  1210                             openCount--;

  1211                             skip(1);

  1212                             break;

  1213                         } else if (ch0 == '\"' || ch0 == '\'') {

  1214                             scanString(false);

  1215                         } else {

  1216                             skip(1);

  1217                         }

  1218                     }

  1220                     if (ch0 != '>') {

  1221                         reset(start);

  1222                         return false;

  1223                     }

  1225                     skip(1);

  1226                 } else if (atEOF()) {

  1227                     reset(start);

  1228                     return false;

  1229                 } else {

  1230                     skip(1);

  1231                 }

  1232             } while (openCount > 0);

  1234             add(XML, start);

  1235             return true;

  1236         }

  1238         return false;

  1239     }

  1241     /**

  1242      * Scan over identifier characters.

  1243      *

  1244      * @return Length of identifier or zero if none found.

  1245      */

  1246     private int scanIdentifier() {

  1247         final int start = position;

  1249         // Make sure first character is valid start character.

  1250         if (ch0 == '\\' && ch1 == 'u') {

  1251             skip(2);

  1252             final int ch = hexSequence(4, TokenType.IDENT);

  1254             if (!Character.isJavaIdentifierStart(ch)) {

  1255                 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);

  1256             }

  1257         } else if (!Character.isJavaIdentifierStart(ch0)) {

  1258             // Not an identifier.

  1259             return 0;

  1260         }

  1262         // Make sure remaining characters are valid part characters.

  1263         while (!atEOF()) {

  1264             if (ch0 == '\\' && ch1 == 'u') {

  1265                 skip(2);

  1266                 final int ch = hexSequence(4, TokenType.IDENT);

  1268                 if (!Character.isJavaIdentifierPart(ch)) {

  1269                     error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);

  1270                 }

  1271             } else if (Character.isJavaIdentifierPart(ch0)) {

  1272                 skip(1);

  1273             } else {

  1274                 break;

  1275             }

  1276         }

  1278         // Length of identifier sequence.

  1279         return position - start;

  1280     }

  1282     /**

  1283      * Compare two identifiers (in content) for equality.

  1284      *

  1285      * @param aStart  Start of first identifier.

  1286      * @param aLength Length of first identifier.

  1287      * @param bStart  Start of second identifier.

  1288      * @param bLength Length of second identifier.

  1289      * @return True if equal.

  1290      */

  1291     private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {

  1292         if (aLength == bLength) {

  1293             for (int i = 0; i < aLength; i++) {

  1294                 if (content[aStart + i] != content[bStart + i]) {

  1295                     return false;

  1296                 }

  1297             }

  1299             return true;

  1300         }

  1302         return false;

  1303     }

  1305     /**

  1306      * Detect if a line starts with a marker identifier.

  1307      *

  1308      * @param identStart  Start of identifier.

  1309      * @param identLength Length of identifier.

  1310      * @return True if detected.

  1311      */

  1312     private boolean hasHereMarker(final int identStart, final int identLength) {

  1313         // Skip any whitespace.

  1314         skipWhitespace(false);

  1316         return identifierEqual(identStart, identLength, position, scanIdentifier());

  1317     }

  1319     /**

  1320      * Lexer to service edit strings.

  1321      */

  1322     private static class EditStringLexer extends Lexer {

  1323         /** Type of string literals to emit. */

  1324         final TokenType stringType;

  1326         /*

  1327          * Constructor.

  1328          */

  1330         EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {

  1331             super(lexer, stringState);

  1333             this.stringType = stringType;

  1334         }

  1336         /**

  1337          * Lexify the contents of the string.

  1338          */

  1339         @Override

  1340         public void lexify() {

  1341             // Record start of string position.

  1342             int stringStart = position;

  1343             // Indicate that the priming first string has not been emitted.

  1344             boolean primed = false;

  1346             while (true) {

  1347                 // Detect end of content.

  1348                 if (atEOF()) {

  1349                     break;

  1350                 }

  1352                 // Honour escapes (should be well formed.)

  1353                 if (ch0 == '\\' && stringType == ESCSTRING) {

  1354                     skip(2);

  1356                     continue;

  1357                 }

  1359                 // If start of expression.

  1360                 if (ch0 == '$' && ch1 == '{') {

  1361                     if (!primed || stringStart != position) {

  1362                         if (primed) {

  1363                             add(ADD, stringStart, stringStart + 1);

  1364                         }

  1366                         add(stringType, stringStart, position);

  1367                         primed = true;

  1368                     }

  1370                     // Skip ${

  1371                     skip(2);

  1373                     // Save expression state.

  1374                     final State expressionState = saveState();

  1376                     // Start with one open brace.

  1377                     int braceCount = 1;

  1379                     // Scan for the rest of the string.

  1380                     while (!atEOF()) {

  1381                         // If closing brace.

  1382                         if (ch0 == '}') {

  1383                             // Break only only if matching brace.

  1384                             if (--braceCount == 0) {

  1385                                 break;

  1386                             }

  1387                         } else if (ch0 == '{') {

  1388                             // Bump up the brace count.

  1389                             braceCount++;

  1390                         }

  1392                         // Skip to next character.

  1393                         skip(1);

  1394                     }

  1396                     // If braces don't match then report an error.

  1397                     if (braceCount != 0) {

  1398                         error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);

  1399                     }

  1401                     // Mark end of expression.

  1402                     expressionState.setLimit(position);

  1403                     // Skip closing brace.

  1404                     skip(1);

  1406                     // Start next string.

  1407                     stringStart = position;

  1409                     // Concatenate expression.

  1410                     add(ADD, expressionState.position, expressionState.position + 1);

  1411                     add(LPAREN, expressionState.position, expressionState.position + 1);

  1413                     // Scan expression.

  1414                     final Lexer lexer = new Lexer(this, expressionState);

  1415                     lexer.lexify();

  1417                     // Close out expression parenthesis.

  1418                     add(RPAREN, position - 1, position);

  1420                     continue;

  1421                 }

  1423                 // Next character in string.

  1424                 skip(1);

  1425             }

  1427             // If there is any unemitted string portion.

  1428             if (stringStart != limit) {

  1429                 // Concatenate remaining string.

  1430                 if (primed) {

  1431                     add(ADD, stringStart, 1);

  1432                 }

  1434                 add(stringType, stringStart, limit);

  1435             }

  1436         }

  1438     }

  1440     /**

  1441      * Edit string for nested expressions.

  1442      *

  1443      * @param stringType  Type of string literals to emit.

  1444      * @param stringState State of lexer at start of string.

  1445      */

  1446     private void editString(final TokenType stringType, final State stringState) {

  1447         // Use special lexer to scan string.

  1448         final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);

  1449         lexer.lexify();

  1451         // Need to keep lexer informed.

  1452         last = stringType;

  1453     }

  1455     /**

  1456      * Scan over a here string.

  1457      *

  1458      * @return TRUE if is a here string.

  1459      */

  1460     private boolean scanHereString(final LineInfoReceiver lir) {

  1461         assert ch0 == '<' && ch1 == '<';

  1462         if (scripting) {

  1463             // Record beginning of here string.

  1464             final State saved = saveState();

  1466             // << or <<<

  1467             final boolean excludeLastEOL = ch2 != '<';

  1469             if (excludeLastEOL) {

  1470                 skip(2);

  1471             } else {

  1472                 skip(3);

  1473             }

  1475             // Scan identifier.

  1476             final int identStart = position;

  1477             final int identLength = scanIdentifier();

  1479             // Check for identifier.

  1480             if (identLength == 0) {

  1481                 // Treat as shift.

  1482                 restoreState(saved);

  1484                 return false;

  1485             }

  1487             // Record rest of line.

  1488             final State restState = saveState();

  1489             // keep line number updated

  1490             int lastLine = line;

  1492             skipLine(false);

  1493             lastLine++;

  1494             int lastLinePosition = position;

  1495             restState.setLimit(position);

  1497             // Record beginning of string.

  1498             final State stringState = saveState();

  1499             int stringEnd = position;

  1501             // Hunt down marker.

  1502             while (!atEOF()) {

  1503                 // Skip any whitespace.

  1504                 skipWhitespace(false);

  1506                 if (hasHereMarker(identStart, identLength)) {

  1507                     break;

  1508                 }

  1510                 skipLine(false);

  1511                 lastLine++;

  1512                 lastLinePosition = position;

  1513                 stringEnd = position;

  1514             }

  1516             // notify last line information

  1517             lir.lineInfo(lastLine, lastLinePosition);

  1519             // Record end of string.

  1520             stringState.setLimit(stringEnd);

  1522             // If marker is missing.

  1523             if (stringState.isEmpty() || atEOF()) {

  1524                 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);

  1525                 restoreState(saved);

  1527                 return false;

  1528             }

  1530             // Remove last end of line if specified.

  1531             if (excludeLastEOL) {

  1532                 // Handles \n.

  1533                 if (content[stringEnd - 1] == '\n') {

  1534                     stringEnd--;

  1535                 }

  1537                 // Handles \r and \r\n.

  1538                 if (content[stringEnd - 1] == '\r') {

  1539                     stringEnd--;

  1540                 }

  1542                 // Update end of string.

  1543                 stringState.setLimit(stringEnd);

  1544             }

  1546             // Edit string if appropriate.

  1547             if (scripting && !stringState.isEmpty()) {

  1548                 editString(STRING, stringState);

  1549             } else {

  1550                 // Add here string.

  1551                 add(STRING, stringState.position, stringState.limit);

  1552             }

  1554             // Scan rest of original line.

  1555             final Lexer restLexer = new Lexer(this, restState);

  1557             restLexer.lexify();

  1559             return true;

  1560         }

  1562         return false;

  1563     }

  1565     /**

  1566      * Breaks source content down into lex units, adding tokens to the token

  1567      * stream. The routine scans until the stream buffer is full. Can be called

  1568      * repeatedly until EOF is detected.

  1569      */

  1570     public void lexify() {

  1571         while (!stream.isFull() || nested) {

  1572             // Skip over whitespace.

  1573             skipWhitespace(true);

  1575             // Detect end of file.

  1576             if (atEOF()) {

  1577                 if (!nested) {

  1578                     // Add an EOF token at the end.

  1579                     add(EOF, position);

  1580                 }

  1582                 break;

  1583             }

  1585             // Check for comments. Note that we don't scan for regexp and other literals here as

  1586             // we may not have enough context to distinguish them from similar looking operators.

  1587             // Instead we break on ambiguous operators below and let the parser decide.

  1588             if (ch0 == '/' && skipComments()) {

  1589                 continue;

  1590             }

  1592             if (scripting && ch0 == '#' && skipComments()) {

  1593                 continue;

  1594             }

  1596             // TokenType for lookup of delimiter or operator.

  1597             TokenType type;

  1599             if (ch0 == '.' && convertDigit(ch1, 10) != -1) {

  1600                 // '.' followed by digit.

  1601                 // Scan and add a number.

  1602                 scanNumber();

  1603             } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {

  1604                 // Get the number of characters in the token.

  1605                 final int typeLength = type.getLength();

  1606                 // Skip that many characters.

  1607                 skip(typeLength);

  1608                 // Add operator token.

  1609                 add(type, position - typeLength);

  1610                 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.

  1611                 // We break to let the parser decide what it is.

  1612                 if (canStartLiteral(type)) {

  1613                     break;

  1614                 } else if (type == LBRACE && pauseOnNextLeftBrace) {

  1615                     pauseOnNextLeftBrace = false;

  1616                     break;

  1617                 }

  1618             } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {

  1619                 // Scan and add identifier or keyword.

  1620                 scanIdentifierOrKeyword();

  1621             } else if (isStringDelimiter(ch0)) {

  1622                 // Scan and add a string.

  1623                 scanString(true);

  1624             } else if (Character.isDigit(ch0)) {

  1625                 // Scan and add a number.

  1626                 scanNumber();

  1627             } else {

  1628                 // Don't recognize this character.

  1629                 skip(1);

  1630                 add(ERROR, position - 1);

  1631             }

  1632         }

  1633     }

  1635     /**

  1636      * Return value of token given its token descriptor.

  1637      *

  1638      * @param token  Token descriptor.

  1639      * @return JavaScript value.

  1640      */

  1641     Object getValueOf(final long token, final boolean strict) {

  1642         final int start = Token.descPosition(token);

  1643         final int len   = Token.descLength(token);

  1645         switch (Token.descType(token)) {

  1646         case DECIMAL:

  1647             return Lexer.valueOf(source.getString(start, len), 10); // number

  1648         case OCTAL:

  1649             return Lexer.valueOf(source.getString(start, len), 8); // number

  1650         case HEXADECIMAL:

  1651             return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number

  1652         case FLOATING:

  1653             final String str   = source.getString(start, len);

  1654             final double value = Double.valueOf(str);

  1655             if (str.indexOf('.') != -1) {

  1656                 return value; //number

  1657             }

  1658             //anything without an explicit decimal point is still subject to a

  1659             //"representable as int or long" check. Then the programmer does not

  1660             //explicitly code something as a double. For example new Color(int, int, int)

  1661             //and new Color(float, float, float) will get ambiguous for cases like

  1662             //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.

  1663             //yet we don't want e.g. 1e6 to be a double unnecessarily

  1664             if (JSType.isRepresentableAsInt(value) && !JSType.isNegativeZero(value)) {

  1665                 return (int)value;

  1666             } else if (JSType.isRepresentableAsLong(value) && !JSType.isNegativeZero(value)) {

  1667                 return (long)value;

  1668             }

  1669             return value;

  1670         case STRING:

  1671             return source.getString(start, len); // String

  1672         case ESCSTRING:

  1673             return valueOfString(start, len, strict); // String

  1674         case IDENT:

  1675             return valueOfIdent(start, len); // String

  1676         case REGEX:

  1677             return valueOfPattern(start, len); // RegexToken::LexerToken

  1678         case XML:

  1679             return valueOfXML(start, len); // XMLToken::LexerToken

  1680         case DIRECTIVE_COMMENT:

  1681             return source.getString(start, len);

  1682         default:

  1683             break;

  1684         }

  1686         return null;

  1687     }

  1689     /**

  1690      * Get the correctly localized error message for a given message id format arguments

  1691      * @param msgId message id

  1692      * @param args  format arguments

  1693      * @return message

  1694      */

  1695     protected static String message(final String msgId, final String... args) {

  1696         return ECMAErrors.getMessage("lexer.error." + msgId, args);

  1697     }

  1699     /**

  1700      * Generate a runtime exception

  1701      *

  1702      * @param message       error message

  1703      * @param type          token type

  1704      * @param start         start position of lexed error

  1705      * @param length        length of lexed error

  1706      * @throws ParserException  unconditionally

  1707      */

  1708     protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {

  1709         final long token     = Token.toDesc(type, start, length);

  1710         final int  pos       = Token.descPosition(token);

  1711         final int  lineNum   = source.getLine(pos);

  1712         final int  columnNum = source.getColumn(pos);

  1713         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);

  1714         throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);

  1715     }

  1717     /**

  1718      * Helper class for Lexer tokens, e.g XML or RegExp tokens.

  1719      * This is the abstract superclass

  1720      */

  1721     public static abstract class LexerToken implements Serializable {

  1722         private static final long serialVersionUID = 1L;

  1724         private final String expression;

  1726         /**

  1727          * Constructor

  1728          * @param expression token expression

  1729          */

  1730         protected LexerToken(final String expression) {

  1731             this.expression = expression;

  1732         }

  1734         /**

  1735          * Get the expression

  1736          * @return expression

  1737          */

  1738         public String getExpression() {

  1739             return expression;

  1740         }

  1741     }

  1743     /**

  1744      * Temporary container for regular expressions.

  1745      */

  1746     public static class RegexToken extends LexerToken {

  1747         private static final long serialVersionUID = 1L;

  1749         /** Options. */

  1750         private final String options;

  1752         /**

  1753          * Constructor.

  1754          *

  1755          * @param expression  regexp expression

  1756          * @param options     regexp options

  1757          */

  1758         public RegexToken(final String expression, final String options) {

  1759             super(expression);

  1760             this.options = options;

  1761         }

  1763         /**

  1764          * Get regexp options

  1765          * @return options

  1766          */

  1767         public String getOptions() {

  1768             return options;

  1769         }

  1771         @Override

  1772         public String toString() {

  1773             return '/' + getExpression() + '/' + options;

  1774         }

  1775     }

  1777     /**

  1778      * Temporary container for XML expression.

  1779      */

  1780     public static class XMLToken extends LexerToken {

  1781         private static final long serialVersionUID = 1L;

  1783         /**

  1784          * Constructor.

  1785          *

  1786          * @param expression  XML expression

  1787          */

  1788         public XMLToken(final String expression) {

  1789             super(expression);

  1790         }

  1791     }

  1792 }

Mercurial > jdk8-mips64-public > nashorn / file revision

src/jdk/nashorn/internal/parser/Lexer.java@8c51767d534d

src/jdk/nashorn/internal/parser/Lexer.java