Tue, 09 Sep 2014 15:34:14 +0200
8057931: Instead of not skipping small functions in parser, make lexer avoid them instead
Reviewed-by: hannesw, sundar
1.1 --- a/src/jdk/nashorn/internal/parser/AbstractParser.java Tue Sep 09 15:33:58 2014 +0200 1.2 +++ b/src/jdk/nashorn/internal/parser/AbstractParser.java Tue Sep 09 15:34:14 2014 +0200 1.3 @@ -326,18 +326,28 @@ 1.4 } 1.5 1.6 /** 1.7 - * Check next token and advance. 1.8 + * Check current token and advance to the next token. 1.9 * 1.10 * @param expected Expected tokenType. 1.11 * 1.12 * @throws ParserException on unexpected token type 1.13 */ 1.14 protected final void expect(final TokenType expected) throws ParserException { 1.15 + expectDontAdvance(expected); 1.16 + next(); 1.17 + } 1.18 + 1.19 + /** 1.20 + * Check current token, but don't advance to the next token. 1.21 + * 1.22 + * @param expected Expected tokenType. 1.23 + * 1.24 + * @throws ParserException on unexpected token type 1.25 + */ 1.26 + protected final void expectDontAdvance(final TokenType expected) throws ParserException { 1.27 if (type != expected) { 1.28 throw error(expectMessage(expected)); 1.29 } 1.30 - 1.31 - next(); 1.32 } 1.33 1.34 /**
2.1 --- a/src/jdk/nashorn/internal/parser/Lexer.java Tue Sep 09 15:33:58 2014 +0200 2.2 +++ b/src/jdk/nashorn/internal/parser/Lexer.java Tue Sep 09 15:34:14 2014 +0200 2.3 @@ -35,6 +35,7 @@ 2.4 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 2.5 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING; 2.6 import static jdk.nashorn.internal.parser.TokenType.FLOATING; 2.7 +import static jdk.nashorn.internal.parser.TokenType.FUNCTION; 2.8 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL; 2.9 import static jdk.nashorn.internal.parser.TokenType.LBRACE; 2.10 import static jdk.nashorn.internal.parser.TokenType.LPAREN; 2.11 @@ -85,6 +86,9 @@ 2.12 /** Type of last token added. */ 2.13 private TokenType last; 2.14 2.15 + private final boolean pauseOnFunctionBody; 2.16 + private boolean pauseOnNextLeftBrace; 2.17 + 2.18 private static final String SPACETAB = " \t"; // ASCII space and tab 2.19 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m) 2.20 2.21 @@ -182,20 +186,23 @@ 2.22 * @param scripting are we in scripting mode 2.23 */ 2.24 public Lexer(final Source source, final TokenStream stream, final boolean scripting) { 2.25 - this(source, 0, source.getLength(), stream, scripting); 2.26 + this(source, 0, source.getLength(), stream, scripting, false); 2.27 } 2.28 2.29 /** 2.30 - * Contructor 2.31 + * Constructor 2.32 * 2.33 * @param source the source 2.34 * @param start start position in source from which to start lexing 2.35 * @param len length of source segment to lex 2.36 * @param stream token stream to lex 2.37 * @param scripting are we in scripting mode 2.38 + * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a 2.39 + * function body. This is used with the feature where the parser is skipping nested function bodies to 2.40 + * avoid reading ahead unnecessarily when we skip the function bodies. 2.41 */ 2.42 2.43 - public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting) { 2.44 + public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) { 2.45 super(source.getContent(), 1, start, len); 2.46 this.source = source; 2.47 this.stream = stream; 2.48 @@ -203,6 +210,8 @@ 2.49 this.nested = false; 2.50 this.pendingLine = 1; 2.51 this.last = EOL; 2.52 + 2.53 + this.pauseOnFunctionBody = pauseOnFunctionBody; 2.54 } 2.55 2.56 private Lexer(final Lexer lexer, final State state) { 2.57 @@ -216,6 +225,7 @@ 2.58 pendingLine = state.pendingLine; 2.59 linePosition = state.linePosition; 2.60 last = EOL; 2.61 + pauseOnFunctionBody = false; 2.62 } 2.63 2.64 static class State extends Scanner.State { 2.65 @@ -810,6 +820,9 @@ 2.66 final int length = scanIdentifier(); 2.67 // Check to see if it is a keyword. 2.68 final TokenType type = TokenLookup.lookupKeyword(content, start, length); 2.69 + if (type == FUNCTION && pauseOnFunctionBody) { 2.70 + pauseOnNextLeftBrace = true; 2.71 + } 2.72 // Add keyword or identifier token. 2.73 add(type, start); 2.74 } 2.75 @@ -1597,6 +1610,9 @@ 2.76 // We break to let the parser decide what it is. 2.77 if (canStartLiteral(type)) { 2.78 break; 2.79 + } else if (type == LBRACE && pauseOnNextLeftBrace) { 2.80 + pauseOnNextLeftBrace = false; 2.81 + break; 2.82 } 2.83 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') { 2.84 // Scan and add identifier or keyword.
3.1 --- a/src/jdk/nashorn/internal/parser/Parser.java Tue Sep 09 15:33:58 2014 +0200 3.2 +++ b/src/jdk/nashorn/internal/parser/Parser.java Tue Sep 09 15:34:14 2014 +0200 3.3 @@ -272,7 +272,7 @@ 3.4 3.5 try { 3.6 stream = new TokenStream(); 3.7 - lexer = new Lexer(source, startPos, len, stream, scripting && !env._no_syntax_extensions); 3.8 + lexer = new Lexer(source, startPos, len, stream, scripting && !env._no_syntax_extensions, reparsedFunction != null); 3.9 lexer.line = lexer.pendingLine = lineOffset + 1; 3.10 line = lineOffset; 3.11 3.12 @@ -2869,9 +2869,9 @@ 3.13 } 3.14 functionNode.setFinish(lastFinish); 3.15 } else { 3.16 - expect(LBRACE); 3.17 - final int lastLexed = stream.last(); 3.18 + expectDontAdvance(LBRACE); 3.19 if (parseBody || !skipFunctionBody(functionNode)) { 3.20 + next(); 3.21 // Gather the function elements. 3.22 final List<Statement> prevFunctionDecls = functionDeclarations; 3.23 functionDeclarations = new ArrayList<>(); 3.24 @@ -2883,13 +2883,7 @@ 3.25 } 3.26 3.27 lastToken = token; 3.28 - // Avoiding storing parser state if the function body was small (that is, the next token 3.29 - // to be read from the token stream is before the last token lexed before we entered 3.30 - // function body). That'll force the function to be reparsed instead of skipped. Skipping 3.31 - // involves throwing away and recreating the lexer and the token stream, so for small 3.32 - // functions it is likely more economical to not bother with skipping (both in terms of 3.33 - // storing the state, and in terms of throwing away lexer and token stream). 3.34 - if (parseBody && lastLexed < stream.first()) { 3.35 + if (parseBody) { 3.36 // Since the lexer can read ahead and lexify some number of tokens in advance and have 3.37 // them buffered in the TokenStream, we need to produce a lexer state as it was just 3.38 // before it lexified RBRACE, and not whatever is its current (quite possibly well read 3.39 @@ -2964,10 +2958,7 @@ 3.40 return false; 3.41 } 3.42 final ParserState parserState = (ParserState)data.getEndParserState(); 3.43 - if (parserState == null) { 3.44 - // The function has no stored parser state; it was deemed too small to be skipped. 3.45 - return false; 3.46 - } 3.47 + assert parserState != null; 3.48 3.49 stream.reset(); 3.50 lexer = parserState.createLexer(source, lexer, stream, scripting && !env._no_syntax_extensions); 3.51 @@ -2998,7 +2989,7 @@ 3.52 } 3.53 3.54 Lexer createLexer(final Source source, final Lexer lexer, final TokenStream stream, final boolean scripting) { 3.55 - final Lexer newLexer = new Lexer(source, position, lexer.limit - position, stream, scripting); 3.56 + final Lexer newLexer = new Lexer(source, position, lexer.limit - position, stream, scripting, true); 3.57 newLexer.restoreState(new Lexer.State(position, Integer.MAX_VALUE, line, -1, linePosition, SEMICOLON)); 3.58 return newLexer; 3.59 }