Tue, 09 Sep 2014 15:34:14 +0200
8057931: Instead of not skipping small functions in parser, make lexer avoid them instead
Reviewed-by: hannesw, sundar
1 /*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package jdk.nashorn.internal.parser;
28 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
29 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
30 import static jdk.nashorn.internal.parser.TokenType.EOF;
31 import static jdk.nashorn.internal.parser.TokenType.EOL;
32 import static jdk.nashorn.internal.parser.TokenType.IDENT;
34 import java.util.HashMap;
35 import java.util.Map;
36 import jdk.nashorn.internal.ir.IdentNode;
37 import jdk.nashorn.internal.ir.LiteralNode;
38 import jdk.nashorn.internal.parser.Lexer.LexerToken;
39 import jdk.nashorn.internal.parser.Lexer.RegexToken;
40 import jdk.nashorn.internal.runtime.ECMAErrors;
41 import jdk.nashorn.internal.runtime.ErrorManager;
42 import jdk.nashorn.internal.runtime.JSErrorType;
43 import jdk.nashorn.internal.runtime.ParserException;
44 import jdk.nashorn.internal.runtime.Source;
45 import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
47 /**
48 * Base class for parsers.
49 */
50 public abstract class AbstractParser {
51 /** Source to parse. */
52 protected final Source source;
54 /** Error manager to report errors. */
55 protected final ErrorManager errors;
57 /** Stream of lex tokens to parse. */
58 protected TokenStream stream;
60 /** Index of current token. */
61 protected int k;
63 /** Previous token - accessible to sub classes */
64 protected long previousToken;
66 /** Descriptor of current token. */
67 protected long token;
69 /** Type of current token. */
70 protected TokenType type;
72 /** Type of last token. */
73 protected TokenType last;
75 /** Start position of current token. */
76 protected int start;
78 /** Finish position of previous token. */
79 protected int finish;
81 /** Current line number. */
82 protected int line;
84 /** Position of last EOL + 1. */
85 protected int linePosition;
87 /** Lexer used to scan source content. */
88 protected Lexer lexer;
90 /** Is this parser running under strict mode? */
91 protected boolean isStrictMode;
93 /** What should line numbers be counted from? */
94 protected final int lineOffset;
96 private final Map<String, String> canonicalNames = new HashMap<>();
98 /**
99 * Construct a parser.
100 *
101 * @param source Source to parse.
102 * @param errors Error reporting manager.
103 * @param strict True if we are in strict mode
104 * @param lineOffset Offset from which lines should be counted
105 */
106 protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
107 this.source = source;
108 this.errors = errors;
109 this.k = -1;
110 this.token = Token.toDesc(EOL, 0, 1);
111 this.type = EOL;
112 this.last = EOL;
113 this.isStrictMode = strict;
114 this.lineOffset = lineOffset;
115 }
117 /**
118 * Get the ith token.
119 *
120 * @param i Index of token.
121 *
122 * @return the token
123 */
124 protected final long getToken(final int i) {
125 // Make sure there are enough tokens available.
126 while (i > stream.last()) {
127 // If we need to buffer more for lookahead.
128 if (stream.isFull()) {
129 stream.grow();
130 }
132 // Get more tokens.
133 lexer.lexify();
134 }
136 return stream.get(i);
137 }
139 /**
140 * Return the tokenType of the ith token.
141 *
142 * @param i Index of token
143 *
144 * @return the token type
145 */
146 protected final TokenType T(final int i) {
147 // Get token descriptor and extract tokenType.
148 return Token.descType(getToken(i));
149 }
151 /**
152 * Seek next token that is not an EOL or comment.
153 *
154 * @return tokenType of next token.
155 */
156 protected final TokenType next() {
157 do {
158 nextOrEOL();
159 } while (type == EOL || type == COMMENT);
161 return type;
162 }
164 /**
165 * Seek next token or EOL (skipping comments.)
166 *
167 * @return tokenType of next token.
168 */
169 protected final TokenType nextOrEOL() {
170 do {
171 nextToken();
172 if (type == DIRECTIVE_COMMENT) {
173 checkDirectiveComment();
174 }
175 } while (type == COMMENT || type == DIRECTIVE_COMMENT);
177 return type;
178 }
180 // sourceURL= after directive comment
181 private static final String SOURCE_URL_PREFIX = "sourceURL=";
183 // currently only @sourceURL=foo supported
184 private void checkDirectiveComment() {
185 // if already set, ignore this one
186 if (source.getExplicitURL() != null) {
187 return;
188 }
190 final String comment = (String) lexer.getValueOf(token, isStrictMode);
191 final int len = comment.length();
192 // 4 characters for directive comment marker //@\s or //#\s
193 if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
194 source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
195 }
196 }
198 /**
199 * Seek next token.
200 *
201 * @return tokenType of next token.
202 */
203 private TokenType nextToken() {
204 // Capture last token tokenType.
205 last = type;
206 if (type != EOF) {
208 // Set up next token.
209 k++;
210 final long lastToken = token;
211 previousToken = token;
212 token = getToken(k);
213 type = Token.descType(token);
215 // do this before the start is changed below
216 if (last != EOL) {
217 finish = start + Token.descLength(lastToken);
218 }
220 if (type == EOL) {
221 line = Token.descLength(token);
222 linePosition = Token.descPosition(token);
223 } else {
224 start = Token.descPosition(token);
225 }
227 }
229 return type;
230 }
232 /**
233 * Get the message string for a message ID and arguments
234 *
235 * @param msgId The Message ID
236 * @param args The arguments
237 *
238 * @return The message string
239 */
240 protected static String message(final String msgId, final String... args) {
241 return ECMAErrors.getMessage("parser.error." + msgId, args);
242 }
244 /**
245 * Report an error.
246 *
247 * @param message Error message.
248 * @param errorToken Offending token.
249 * @return ParserException upon failure. Caller should throw and not ignore
250 */
251 protected final ParserException error(final String message, final long errorToken) {
252 return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
253 }
255 /**
256 * Report an error.
257 *
258 * @param errorType The error type
259 * @param message Error message.
260 * @param errorToken Offending token.
261 * @return ParserException upon failure. Caller should throw and not ignore
262 */
263 protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
264 final int position = Token.descPosition(errorToken);
265 final int lineNum = source.getLine(position);
266 final int columnNum = source.getColumn(position);
267 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
268 return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
269 }
271 /**
272 * Report an error.
273 *
274 * @param message Error message.
275 * @return ParserException upon failure. Caller should throw and not ignore
276 */
277 protected final ParserException error(final String message) {
278 return error(JSErrorType.SYNTAX_ERROR, message);
279 }
281 /**
282 * Report an error.
283 *
284 * @param errorType The error type
285 * @param message Error message.
286 * @return ParserException upon failure. Caller should throw and not ignore
287 */
288 protected final ParserException error(final JSErrorType errorType, final String message) {
289 // TODO - column needs to account for tabs.
290 final int position = Token.descPosition(token);
291 final int column = position - linePosition;
292 final String formatted = ErrorManager.format(message, source, line, column, token);
293 return new ParserException(errorType, formatted, source, line, column, token);
294 }
296 /**
297 * Report a warning to the error manager.
298 *
299 * @param errorType The error type of the warning
300 * @param message Warning message.
301 * @param errorToken error token
302 */
303 protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
304 errors.warning(error(errorType, message, errorToken));
305 }
307 /**
308 * Generate 'expected' message.
309 *
310 * @param expected Expected tokenType.
311 *
312 * @return the message string
313 */
314 protected final String expectMessage(final TokenType expected) {
315 final String tokenString = Token.toString(source, token);
316 String msg;
318 if (expected == null) {
319 msg = AbstractParser.message("expected.stmt", tokenString);
320 } else {
321 final String expectedName = expected.getNameOrType();
322 msg = AbstractParser.message("expected", expectedName, tokenString);
323 }
325 return msg;
326 }
328 /**
329 * Check current token and advance to the next token.
330 *
331 * @param expected Expected tokenType.
332 *
333 * @throws ParserException on unexpected token type
334 */
335 protected final void expect(final TokenType expected) throws ParserException {
336 expectDontAdvance(expected);
337 next();
338 }
340 /**
341 * Check current token, but don't advance to the next token.
342 *
343 * @param expected Expected tokenType.
344 *
345 * @throws ParserException on unexpected token type
346 */
347 protected final void expectDontAdvance(final TokenType expected) throws ParserException {
348 if (type != expected) {
349 throw error(expectMessage(expected));
350 }
351 }
353 /**
354 * Check next token, get its value and advance.
355 *
356 * @param expected Expected tokenType.
357 * @return The JavaScript value of the token
358 * @throws ParserException on unexpected token type
359 */
360 protected final Object expectValue(final TokenType expected) throws ParserException {
361 if (type != expected) {
362 throw error(expectMessage(expected));
363 }
365 final Object value = getValue();
367 next();
369 return value;
370 }
372 /**
373 * Get the value of the current token.
374 *
375 * @return JavaScript value of the token.
376 */
377 protected final Object getValue() {
378 return getValue(token);
379 }
381 /**
382 * Get the value of a specific token
383 *
384 * @param valueToken the token
385 *
386 * @return JavaScript value of the token
387 */
388 protected final Object getValue(final long valueToken) {
389 try {
390 return lexer.getValueOf(valueToken, isStrictMode);
391 } catch (final ParserException e) {
392 errors.error(e);
393 }
395 return null;
396 }
398 /**
399 * Certain future reserved words can be used as identifiers in
400 * non-strict mode. Check if the current token is one such.
401 *
402 * @return true if non strict mode identifier
403 */
404 protected final boolean isNonStrictModeIdent() {
405 return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
406 }
408 /**
409 * Get ident.
410 *
411 * @return Ident node.
412 */
413 protected final IdentNode getIdent() {
414 // Capture IDENT token.
415 long identToken = token;
417 if (isNonStrictModeIdent()) {
418 // Fake out identifier.
419 identToken = Token.recast(token, IDENT);
420 // Get IDENT.
421 final String ident = (String)getValue(identToken);
423 next();
425 // Create IDENT node.
426 return createIdentNode(identToken, finish, ident).setIsFutureStrictName();
427 }
429 // Get IDENT.
430 final String ident = (String)expectValue(IDENT);
431 if (ident == null) {
432 return null;
433 }
434 // Create IDENT node.
435 return createIdentNode(identToken, finish, ident);
436 }
438 /**
439 * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String)
440 * constructor} but making sure that the {@code name} is deduplicated within this parse job.
441 * @param identToken the token for the new {@code IdentNode}
442 * @param identFinish the finish for the new {@code IdentNode}
443 * @param name the name for the new {@code IdentNode}. It will be de-duplicated.
444 * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will
445 * be deduplicated.
446 */
447 protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) {
448 final String existingName = canonicalNames.putIfAbsent(name, name);
449 final String canonicalName = existingName != null ? existingName : name;
450 return new IdentNode(identToken, identFinish, canonicalName);
451 }
453 /**
454 * Check if current token is in identifier name
455 *
456 * @return true if current token is an identifier name
457 */
458 protected final boolean isIdentifierName() {
459 final TokenKind kind = type.getKind();
460 if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
461 return true;
462 }
463 // Fake out identifier.
464 final long identToken = Token.recast(token, IDENT);
465 // Get IDENT.
466 final String ident = (String)getValue(identToken);
467 return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
468 }
470 /**
471 * Create an IdentNode from the current token
472 *
473 * @return an IdentNode representing the current token
474 */
475 protected final IdentNode getIdentifierName() {
476 if (type == IDENT) {
477 return getIdent();
478 } else if (isIdentifierName()) {
479 // Fake out identifier.
480 final long identToken = Token.recast(token, IDENT);
481 // Get IDENT.
482 final String ident = (String)getValue(identToken);
483 next();
484 // Create IDENT node.
485 return createIdentNode(identToken, finish, ident);
486 } else {
487 expect(IDENT);
488 return null;
489 }
490 }
492 /**
493 * Create a LiteralNode from the current token
494 *
495 * @return LiteralNode representing the current token
496 * @throws ParserException if any literals fails to parse
497 */
498 protected final LiteralNode<?> getLiteral() throws ParserException {
499 // Capture LITERAL token.
500 final long literalToken = token;
502 // Create literal node.
503 final Object value = getValue();
504 // Advance to have a correct finish
505 next();
507 LiteralNode<?> node = null;
509 if (value == null) {
510 node = LiteralNode.newInstance(literalToken, finish);
511 } else if (value instanceof Number) {
512 node = LiteralNode.newInstance(literalToken, finish, (Number)value);
513 } else if (value instanceof String) {
514 node = LiteralNode.newInstance(literalToken, finish, (String)value);
515 } else if (value instanceof LexerToken) {
516 if (value instanceof RegexToken) {
517 final RegexToken regex = (RegexToken)value;
518 try {
519 RegExpFactory.validate(regex.getExpression(), regex.getOptions());
520 } catch (final ParserException e) {
521 throw error(e.getMessage());
522 }
523 }
524 node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
525 } else {
526 assert false : "unknown type for LiteralNode: " + value.getClass();
527 }
529 return node;
530 }
531 }