src/jdk/nashorn/internal/parser/AbstractParser.java

Tue, 09 Sep 2014 15:34:14 +0200

author
attila
date
Tue, 09 Sep 2014 15:34:14 +0200
changeset 998
b788246cf987
parent 975
85e3f07fc5fc
child 1029
70597fd25c61
permissions
-rw-r--r--

8057931: Instead of not skipping small functions in parser, make lexer avoid them instead
Reviewed-by: hannesw, sundar

     1 /*
     2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package jdk.nashorn.internal.parser;
    28 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
    29 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
    30 import static jdk.nashorn.internal.parser.TokenType.EOF;
    31 import static jdk.nashorn.internal.parser.TokenType.EOL;
    32 import static jdk.nashorn.internal.parser.TokenType.IDENT;
    34 import java.util.HashMap;
    35 import java.util.Map;
    36 import jdk.nashorn.internal.ir.IdentNode;
    37 import jdk.nashorn.internal.ir.LiteralNode;
    38 import jdk.nashorn.internal.parser.Lexer.LexerToken;
    39 import jdk.nashorn.internal.parser.Lexer.RegexToken;
    40 import jdk.nashorn.internal.runtime.ECMAErrors;
    41 import jdk.nashorn.internal.runtime.ErrorManager;
    42 import jdk.nashorn.internal.runtime.JSErrorType;
    43 import jdk.nashorn.internal.runtime.ParserException;
    44 import jdk.nashorn.internal.runtime.Source;
    45 import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
    47 /**
    48  * Base class for parsers.
    49  */
    50 public abstract class AbstractParser {
    51     /** Source to parse. */
    52     protected final Source source;
    54     /** Error manager to report errors. */
    55     protected final ErrorManager errors;
    57     /** Stream of lex tokens to parse. */
    58     protected TokenStream stream;
    60     /** Index of current token. */
    61     protected int k;
    63     /** Previous token - accessible to sub classes */
    64     protected long previousToken;
    66     /** Descriptor of current token. */
    67     protected long token;
    69     /** Type of current token. */
    70     protected TokenType type;
    72     /** Type of last token. */
    73     protected TokenType last;
    75     /** Start position of current token. */
    76     protected int start;
    78     /** Finish position of previous token. */
    79     protected int finish;
    81     /** Current line number. */
    82     protected int line;
    84     /** Position of last EOL + 1. */
    85     protected int linePosition;
    87     /** Lexer used to scan source content. */
    88     protected Lexer lexer;
    90     /** Is this parser running under strict mode? */
    91     protected boolean isStrictMode;
    93     /** What should line numbers be counted from? */
    94     protected final int lineOffset;
    96     private final Map<String, String> canonicalNames = new HashMap<>();
    98     /**
    99      * Construct a parser.
   100      *
   101      * @param source     Source to parse.
   102      * @param errors     Error reporting manager.
   103      * @param strict     True if we are in strict mode
   104      * @param lineOffset Offset from which lines should be counted
   105      */
   106     protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
   107         this.source       = source;
   108         this.errors       = errors;
   109         this.k            = -1;
   110         this.token        = Token.toDesc(EOL, 0, 1);
   111         this.type         = EOL;
   112         this.last         = EOL;
   113         this.isStrictMode = strict;
   114         this.lineOffset   = lineOffset;
   115     }
   117     /**
   118      * Get the ith token.
   119      *
   120      * @param i Index of token.
   121      *
   122      * @return  the token
   123      */
   124     protected final long getToken(final int i) {
   125         // Make sure there are enough tokens available.
   126         while (i > stream.last()) {
   127             // If we need to buffer more for lookahead.
   128             if (stream.isFull()) {
   129                 stream.grow();
   130             }
   132             // Get more tokens.
   133             lexer.lexify();
   134         }
   136         return stream.get(i);
   137     }
   139     /**
   140      * Return the tokenType of the ith token.
   141      *
   142      * @param i Index of token
   143      *
   144      * @return the token type
   145      */
   146     protected final TokenType T(final int i) {
   147         // Get token descriptor and extract tokenType.
   148         return Token.descType(getToken(i));
   149     }
   151     /**
   152      * Seek next token that is not an EOL or comment.
   153      *
   154      * @return tokenType of next token.
   155      */
   156     protected final TokenType next() {
   157         do {
   158             nextOrEOL();
   159         } while (type == EOL || type == COMMENT);
   161         return type;
   162     }
   164     /**
   165      * Seek next token or EOL (skipping comments.)
   166      *
   167      * @return tokenType of next token.
   168      */
   169     protected final TokenType nextOrEOL() {
   170         do {
   171             nextToken();
   172             if (type == DIRECTIVE_COMMENT) {
   173                 checkDirectiveComment();
   174             }
   175         } while (type == COMMENT || type == DIRECTIVE_COMMENT);
   177         return type;
   178     }
   180     // sourceURL= after directive comment
   181     private static final String SOURCE_URL_PREFIX = "sourceURL=";
   183     // currently only @sourceURL=foo supported
   184     private void checkDirectiveComment() {
   185         // if already set, ignore this one
   186         if (source.getExplicitURL() != null) {
   187             return;
   188         }
   190         final String comment = (String) lexer.getValueOf(token, isStrictMode);
   191         final int len = comment.length();
   192         // 4 characters for directive comment marker //@\s or //#\s
   193         if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
   194             source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
   195         }
   196     }
   198     /**
   199      * Seek next token.
   200      *
   201      * @return tokenType of next token.
   202      */
   203     private TokenType nextToken() {
   204         // Capture last token tokenType.
   205         last = type;
   206         if (type != EOF) {
   208             // Set up next token.
   209             k++;
   210             final long lastToken = token;
   211             previousToken = token;
   212             token = getToken(k);
   213             type = Token.descType(token);
   215             // do this before the start is changed below
   216             if (last != EOL) {
   217                 finish = start + Token.descLength(lastToken);
   218             }
   220             if (type == EOL) {
   221                 line         = Token.descLength(token);
   222                 linePosition = Token.descPosition(token);
   223             } else {
   224                 start = Token.descPosition(token);
   225             }
   227         }
   229         return type;
   230     }
   232     /**
   233      * Get the message string for a message ID and arguments
   234      *
   235      * @param msgId The Message ID
   236      * @param args  The arguments
   237      *
   238      * @return The message string
   239      */
   240     protected static String message(final String msgId, final String... args) {
   241         return ECMAErrors.getMessage("parser.error." + msgId, args);
   242     }
   244     /**
   245      * Report an error.
   246      *
   247      * @param message    Error message.
   248      * @param errorToken Offending token.
   249      * @return ParserException upon failure. Caller should throw and not ignore
   250      */
   251     protected final ParserException error(final String message, final long errorToken) {
   252         return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
   253     }
   255     /**
   256      * Report an error.
   257      *
   258      * @param errorType  The error type
   259      * @param message    Error message.
   260      * @param errorToken Offending token.
   261      * @return ParserException upon failure. Caller should throw and not ignore
   262      */
   263     protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
   264         final int position  = Token.descPosition(errorToken);
   265         final int lineNum   = source.getLine(position);
   266         final int columnNum = source.getColumn(position);
   267         final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
   268         return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
   269     }
   271     /**
   272      * Report an error.
   273      *
   274      * @param message Error message.
   275      * @return ParserException upon failure. Caller should throw and not ignore
   276      */
   277     protected final ParserException error(final String message) {
   278         return error(JSErrorType.SYNTAX_ERROR, message);
   279     }
   281     /**
   282      * Report an error.
   283      *
   284      * @param errorType  The error type
   285      * @param message    Error message.
   286      * @return ParserException upon failure. Caller should throw and not ignore
   287      */
   288     protected final ParserException error(final JSErrorType errorType, final String message) {
   289         // TODO - column needs to account for tabs.
   290         final int position = Token.descPosition(token);
   291         final int column = position - linePosition;
   292         final String formatted = ErrorManager.format(message, source, line, column, token);
   293         return new ParserException(errorType, formatted, source, line, column, token);
   294     }
   296     /**
   297      * Report a warning to the error manager.
   298      *
   299      * @param errorType  The error type of the warning
   300      * @param message    Warning message.
   301      * @param errorToken error token
   302      */
   303     protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
   304         errors.warning(error(errorType, message, errorToken));
   305     }
   307     /**
   308      * Generate 'expected' message.
   309      *
   310      * @param expected Expected tokenType.
   311      *
   312      * @return the message string
   313      */
   314     protected final String expectMessage(final TokenType expected) {
   315         final String tokenString = Token.toString(source, token);
   316         String msg;
   318         if (expected == null) {
   319             msg = AbstractParser.message("expected.stmt", tokenString);
   320         } else {
   321             final String expectedName = expected.getNameOrType();
   322             msg = AbstractParser.message("expected", expectedName, tokenString);
   323         }
   325         return msg;
   326     }
   328     /**
   329      * Check current token and advance to the next token.
   330      *
   331      * @param expected Expected tokenType.
   332      *
   333      * @throws ParserException on unexpected token type
   334      */
   335     protected final void expect(final TokenType expected) throws ParserException {
   336         expectDontAdvance(expected);
   337         next();
   338     }
   340     /**
   341      * Check current token, but don't advance to the next token.
   342      *
   343      * @param expected Expected tokenType.
   344      *
   345      * @throws ParserException on unexpected token type
   346      */
   347     protected final void expectDontAdvance(final TokenType expected) throws ParserException {
   348         if (type != expected) {
   349             throw error(expectMessage(expected));
   350         }
   351     }
   353     /**
   354      * Check next token, get its value and advance.
   355      *
   356      * @param  expected Expected tokenType.
   357      * @return The JavaScript value of the token
   358      * @throws ParserException on unexpected token type
   359      */
   360     protected final Object expectValue(final TokenType expected) throws ParserException {
   361         if (type != expected) {
   362             throw error(expectMessage(expected));
   363         }
   365         final Object value = getValue();
   367         next();
   369         return value;
   370     }
   372     /**
   373      * Get the value of the current token.
   374      *
   375      * @return JavaScript value of the token.
   376      */
   377     protected final Object getValue() {
   378         return getValue(token);
   379     }
   381     /**
   382      * Get the value of a specific token
   383      *
   384      * @param valueToken the token
   385      *
   386      * @return JavaScript value of the token
   387      */
   388     protected final Object getValue(final long valueToken) {
   389         try {
   390             return lexer.getValueOf(valueToken, isStrictMode);
   391         } catch (final ParserException e) {
   392             errors.error(e);
   393         }
   395         return null;
   396     }
   398     /**
   399      * Certain future reserved words can be used as identifiers in
   400      * non-strict mode. Check if the current token is one such.
   401      *
   402      * @return true if non strict mode identifier
   403      */
   404     protected final boolean isNonStrictModeIdent() {
   405         return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
   406     }
   408     /**
   409      * Get ident.
   410      *
   411      * @return Ident node.
   412      */
   413     protected final IdentNode getIdent() {
   414         // Capture IDENT token.
   415         long identToken = token;
   417         if (isNonStrictModeIdent()) {
   418             // Fake out identifier.
   419             identToken = Token.recast(token, IDENT);
   420             // Get IDENT.
   421             final String ident = (String)getValue(identToken);
   423             next();
   425             // Create IDENT node.
   426             return createIdentNode(identToken, finish, ident).setIsFutureStrictName();
   427         }
   429         // Get IDENT.
   430         final String ident = (String)expectValue(IDENT);
   431         if (ident == null) {
   432             return null;
   433         }
   434         // Create IDENT node.
   435         return createIdentNode(identToken, finish, ident);
   436     }
   438     /**
   439      * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String)
   440      * constructor} but making sure that the {@code name} is deduplicated within this parse job.
   441      * @param identToken the token for the new {@code IdentNode}
   442      * @param identFinish the finish for the new {@code IdentNode}
   443      * @param name the name for the new {@code IdentNode}. It will be de-duplicated.
   444      * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will
   445      * be deduplicated.
   446      */
   447     protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) {
   448         final String existingName = canonicalNames.putIfAbsent(name, name);
   449         final String canonicalName = existingName != null ? existingName : name;
   450         return new IdentNode(identToken, identFinish, canonicalName);
   451     }
   453     /**
   454      * Check if current token is in identifier name
   455      *
   456      * @return true if current token is an identifier name
   457      */
   458     protected final boolean isIdentifierName() {
   459         final TokenKind kind = type.getKind();
   460         if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
   461             return true;
   462         }
   463         // Fake out identifier.
   464         final long identToken = Token.recast(token, IDENT);
   465         // Get IDENT.
   466         final String ident = (String)getValue(identToken);
   467         return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
   468     }
   470     /**
   471      * Create an IdentNode from the current token
   472      *
   473      * @return an IdentNode representing the current token
   474      */
   475     protected final IdentNode getIdentifierName() {
   476         if (type == IDENT) {
   477             return getIdent();
   478         } else if (isIdentifierName()) {
   479             // Fake out identifier.
   480             final long identToken = Token.recast(token, IDENT);
   481             // Get IDENT.
   482             final String ident = (String)getValue(identToken);
   483             next();
   484             // Create IDENT node.
   485             return createIdentNode(identToken, finish, ident);
   486         } else {
   487             expect(IDENT);
   488             return null;
   489         }
   490     }
   492     /**
   493      * Create a LiteralNode from the current token
   494      *
   495      * @return LiteralNode representing the current token
   496      * @throws ParserException if any literals fails to parse
   497      */
   498     protected final LiteralNode<?> getLiteral() throws ParserException {
   499         // Capture LITERAL token.
   500         final long literalToken = token;
   502         // Create literal node.
   503         final Object value = getValue();
   504         // Advance to have a correct finish
   505         next();
   507         LiteralNode<?> node = null;
   509         if (value == null) {
   510             node = LiteralNode.newInstance(literalToken, finish);
   511         } else if (value instanceof Number) {
   512             node = LiteralNode.newInstance(literalToken, finish, (Number)value);
   513         } else if (value instanceof String) {
   514             node = LiteralNode.newInstance(literalToken, finish, (String)value);
   515         } else if (value instanceof LexerToken) {
   516             if (value instanceof RegexToken) {
   517                 final RegexToken regex = (RegexToken)value;
   518                 try {
   519                     RegExpFactory.validate(regex.getExpression(), regex.getOptions());
   520                 } catch (final ParserException e) {
   521                     throw error(e.getMessage());
   522                 }
   523             }
   524             node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
   525         } else {
   526             assert false : "unknown type for LiteralNode: " + value.getClass();
   527         }
   529         return node;
   530     }
   531 }

mercurial