Fri, 17 Oct 2014 14:24:26 +0200
8059843: Make AST serializable
Reviewed-by: hannesw, lagergren
1 /*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package jdk.nashorn.internal.parser;
28 import static jdk.nashorn.internal.parser.TokenType.ADD;
29 import static jdk.nashorn.internal.parser.TokenType.COMMENT;
30 import static jdk.nashorn.internal.parser.TokenType.DECIMAL;
31 import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
32 import static jdk.nashorn.internal.parser.TokenType.EOF;
33 import static jdk.nashorn.internal.parser.TokenType.EOL;
34 import static jdk.nashorn.internal.parser.TokenType.ERROR;
35 import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
36 import static jdk.nashorn.internal.parser.TokenType.EXECSTRING;
37 import static jdk.nashorn.internal.parser.TokenType.FLOATING;
38 import static jdk.nashorn.internal.parser.TokenType.FUNCTION;
39 import static jdk.nashorn.internal.parser.TokenType.HEXADECIMAL;
40 import static jdk.nashorn.internal.parser.TokenType.LBRACE;
41 import static jdk.nashorn.internal.parser.TokenType.LPAREN;
42 import static jdk.nashorn.internal.parser.TokenType.OCTAL;
43 import static jdk.nashorn.internal.parser.TokenType.RBRACE;
44 import static jdk.nashorn.internal.parser.TokenType.REGEX;
45 import static jdk.nashorn.internal.parser.TokenType.RPAREN;
46 import static jdk.nashorn.internal.parser.TokenType.STRING;
47 import static jdk.nashorn.internal.parser.TokenType.XML;
49 import java.io.Serializable;
50 import jdk.nashorn.internal.runtime.ECMAErrors;
51 import jdk.nashorn.internal.runtime.ErrorManager;
52 import jdk.nashorn.internal.runtime.JSErrorType;
53 import jdk.nashorn.internal.runtime.JSType;
54 import jdk.nashorn.internal.runtime.ParserException;
55 import jdk.nashorn.internal.runtime.Source;
56 import jdk.nashorn.internal.runtime.options.Options;
58 /**
59 * Responsible for converting source content into a stream of tokens.
60 *
61 */
62 @SuppressWarnings("fallthrough")
63 public class Lexer extends Scanner {
64 private static final long MIN_INT_L = Integer.MIN_VALUE;
65 private static final long MAX_INT_L = Integer.MAX_VALUE;
67 private static final boolean XML_LITERALS = Options.getBooleanProperty("nashorn.lexer.xmlliterals");
69 /** Content source. */
70 private final Source source;
72 /** Buffered stream for tokens. */
73 private final TokenStream stream;
75 /** True if here and edit strings are supported. */
76 private final boolean scripting;
78 /** True if a nested scan. (scan to completion, no EOF.) */
79 private final boolean nested;
81 /** Pending new line number and position. */
82 int pendingLine;
84 /** Position of last EOL + 1. */
85 private int linePosition;
87 /** Type of last token added. */
88 private TokenType last;
90 private final boolean pauseOnFunctionBody;
91 private boolean pauseOnNextLeftBrace;
93 private static final String SPACETAB = " \t"; // ASCII space and tab
94 private static final String LFCR = "\n\r"; // line feed and carriage return (ctrl-m)
96 private static final String JSON_WHITESPACE_EOL = LFCR;
97 private static final String JSON_WHITESPACE = SPACETAB + LFCR;
99 private static final String JAVASCRIPT_WHITESPACE_EOL =
100 LFCR +
101 "\u2028" + // line separator
102 "\u2029" // paragraph separator
103 ;
104 private static final String JAVASCRIPT_WHITESPACE =
105 SPACETAB +
106 JAVASCRIPT_WHITESPACE_EOL +
107 "\u000b" + // tabulation line
108 "\u000c" + // ff (ctrl-l)
109 "\u00a0" + // Latin-1 space
110 "\u1680" + // Ogham space mark
111 "\u180e" + // separator, Mongolian vowel
112 "\u2000" + // en quad
113 "\u2001" + // em quad
114 "\u2002" + // en space
115 "\u2003" + // em space
116 "\u2004" + // three-per-em space
117 "\u2005" + // four-per-em space
118 "\u2006" + // six-per-em space
119 "\u2007" + // figure space
120 "\u2008" + // punctuation space
121 "\u2009" + // thin space
122 "\u200a" + // hair space
123 "\u202f" + // narrow no-break space
124 "\u205f" + // medium mathematical space
125 "\u3000" + // ideographic space
126 "\ufeff" // byte order mark
127 ;
129 private static final String JAVASCRIPT_WHITESPACE_IN_REGEXP =
130 "\\u000a" + // line feed
131 "\\u000d" + // carriage return (ctrl-m)
132 "\\u2028" + // line separator
133 "\\u2029" + // paragraph separator
134 "\\u0009" + // tab
135 "\\u0020" + // ASCII space
136 "\\u000b" + // tabulation line
137 "\\u000c" + // ff (ctrl-l)
138 "\\u00a0" + // Latin-1 space
139 "\\u1680" + // Ogham space mark
140 "\\u180e" + // separator, Mongolian vowel
141 "\\u2000" + // en quad
142 "\\u2001" + // em quad
143 "\\u2002" + // en space
144 "\\u2003" + // em space
145 "\\u2004" + // three-per-em space
146 "\\u2005" + // four-per-em space
147 "\\u2006" + // six-per-em space
148 "\\u2007" + // figure space
149 "\\u2008" + // punctuation space
150 "\\u2009" + // thin space
151 "\\u200a" + // hair space
152 "\\u202f" + // narrow no-break space
153 "\\u205f" + // medium mathematical space
154 "\\u3000" + // ideographic space
155 "\\ufeff" // byte order mark
156 ;
158 static String unicodeEscape(final char ch) {
159 final StringBuilder sb = new StringBuilder();
161 sb.append("\\u");
163 final String hex = Integer.toHexString(ch);
164 for (int i = hex.length(); i < 4; i++) {
165 sb.append('0');
166 }
167 sb.append(hex);
169 return sb.toString();
170 }
172 /**
173 * Constructor
174 *
175 * @param source the source
176 * @param stream the token stream to lex
177 */
178 public Lexer(final Source source, final TokenStream stream) {
179 this(source, stream, false);
180 }
182 /**
183 * Constructor
184 *
185 * @param source the source
186 * @param stream the token stream to lex
187 * @param scripting are we in scripting mode
188 */
189 public Lexer(final Source source, final TokenStream stream, final boolean scripting) {
190 this(source, 0, source.getLength(), stream, scripting, false);
191 }
193 /**
194 * Constructor
195 *
196 * @param source the source
197 * @param start start position in source from which to start lexing
198 * @param len length of source segment to lex
199 * @param stream token stream to lex
200 * @param scripting are we in scripting mode
201 * @param pauseOnFunctionBody if true, lexer will return from {@link #lexify()} when it encounters a
202 * function body. This is used with the feature where the parser is skipping nested function bodies to
203 * avoid reading ahead unnecessarily when we skip the function bodies.
204 */
206 public Lexer(final Source source, final int start, final int len, final TokenStream stream, final boolean scripting, final boolean pauseOnFunctionBody) {
207 super(source.getContent(), 1, start, len);
208 this.source = source;
209 this.stream = stream;
210 this.scripting = scripting;
211 this.nested = false;
212 this.pendingLine = 1;
213 this.last = EOL;
215 this.pauseOnFunctionBody = pauseOnFunctionBody;
216 }
218 private Lexer(final Lexer lexer, final State state) {
219 super(lexer, state);
221 source = lexer.source;
222 stream = lexer.stream;
223 scripting = lexer.scripting;
224 nested = true;
226 pendingLine = state.pendingLine;
227 linePosition = state.linePosition;
228 last = EOL;
229 pauseOnFunctionBody = false;
230 }
232 static class State extends Scanner.State {
233 /** Pending new line number and position. */
234 public final int pendingLine;
236 /** Position of last EOL + 1. */
237 public final int linePosition;
239 /** Type of last token added. */
240 public final TokenType last;
242 /*
243 * Constructor.
244 */
246 State(final int position, final int limit, final int line, final int pendingLine, final int linePosition, final TokenType last) {
247 super(position, limit, line);
249 this.pendingLine = pendingLine;
250 this.linePosition = linePosition;
251 this.last = last;
252 }
253 }
255 /**
256 * Save the state of the scan.
257 *
258 * @return Captured state.
259 */
260 @Override
261 State saveState() {
262 return new State(position, limit, line, pendingLine, linePosition, last);
263 }
265 /**
266 * Restore the state of the scan.
267 *
268 * @param state
269 * Captured state.
270 */
271 void restoreState(final State state) {
272 super.restoreState(state);
274 pendingLine = state.pendingLine;
275 linePosition = state.linePosition;
276 last = state.last;
277 }
279 /**
280 * Add a new token to the stream.
281 *
282 * @param type
283 * Token type.
284 * @param start
285 * Start position.
286 * @param end
287 * End position.
288 */
289 protected void add(final TokenType type, final int start, final int end) {
290 // Record last token.
291 last = type;
293 // Only emit the last EOL in a cluster.
294 if (type == EOL) {
295 pendingLine = end;
296 linePosition = start;
297 } else {
298 // Write any pending EOL to stream.
299 if (pendingLine != -1) {
300 stream.put(Token.toDesc(EOL, linePosition, pendingLine));
301 pendingLine = -1;
302 }
304 // Write token to stream.
305 stream.put(Token.toDesc(type, start, end - start));
306 }
307 }
309 /**
310 * Add a new token to the stream.
311 *
312 * @param type
313 * Token type.
314 * @param start
315 * Start position.
316 */
317 protected void add(final TokenType type, final int start) {
318 add(type, start, position);
319 }
321 /**
322 * Return the String of valid whitespace characters for regular
323 * expressions in JavaScript
324 * @return regexp whitespace string
325 */
326 public static String getWhitespaceRegExp() {
327 return JAVASCRIPT_WHITESPACE_IN_REGEXP;
328 }
330 /**
331 * Skip end of line.
332 *
333 * @param addEOL true if EOL token should be recorded.
334 */
335 private void skipEOL(final boolean addEOL) {
337 if (ch0 == '\r') { // detect \r\n pattern
338 skip(1);
339 if (ch0 == '\n') {
340 skip(1);
341 }
342 } else { // all other space, ch0 is guaranteed to be EOL or \0
343 skip(1);
344 }
346 // bump up line count
347 line++;
349 if (addEOL) {
350 // Add an EOL token.
351 add(EOL, position, line);
352 }
353 }
355 /**
356 * Skip over rest of line including end of line.
357 *
358 * @param addEOL true if EOL token should be recorded.
359 */
360 private void skipLine(final boolean addEOL) {
361 // Ignore characters.
362 while (!isEOL(ch0) && !atEOF()) {
363 skip(1);
364 }
365 // Skip over end of line.
366 skipEOL(addEOL);
367 }
369 /**
370 * Test whether a char is valid JavaScript whitespace
371 * @param ch a char
372 * @return true if valid JavaScript whitespace
373 */
374 public static boolean isJSWhitespace(final char ch) {
375 return JAVASCRIPT_WHITESPACE.indexOf(ch) != -1;
376 }
378 /**
379 * Test whether a char is valid JavaScript end of line
380 * @param ch a char
381 * @return true if valid JavaScript end of line
382 */
383 public static boolean isJSEOL(final char ch) {
384 return JAVASCRIPT_WHITESPACE_EOL.indexOf(ch) != -1;
385 }
387 /**
388 * Test whether a char is valid JSON whitespace
389 * @param ch a char
390 * @return true if valid JSON whitespace
391 */
392 public static boolean isJsonWhitespace(final char ch) {
393 return JSON_WHITESPACE.indexOf(ch) != -1;
394 }
396 /**
397 * Test whether a char is valid JSON end of line
398 * @param ch a char
399 * @return true if valid JSON end of line
400 */
401 public static boolean isJsonEOL(final char ch) {
402 return JSON_WHITESPACE_EOL.indexOf(ch) != -1;
403 }
405 /**
406 * Test if char is a string delimiter, e.g. '\' or '"'. Also scans exec
407 * strings ('`') in scripting mode.
408 * @param ch a char
409 * @return true if string delimiter
410 */
411 protected boolean isStringDelimiter(final char ch) {
412 return ch == '\'' || ch == '"' || (scripting && ch == '`');
413 }
415 /**
416 * Test whether a char is valid JavaScript whitespace
417 * @param ch a char
418 * @return true if valid JavaScript whitespace
419 */
420 protected boolean isWhitespace(final char ch) {
421 return Lexer.isJSWhitespace(ch);
422 }
424 /**
425 * Test whether a char is valid JavaScript end of line
426 * @param ch a char
427 * @return true if valid JavaScript end of line
428 */
429 protected boolean isEOL(final char ch) {
430 return Lexer.isJSEOL(ch);
431 }
433 /**
434 * Skip over whitespace and detect end of line, adding EOL tokens if
435 * encountered.
436 *
437 * @param addEOL true if EOL tokens should be recorded.
438 */
439 private void skipWhitespace(final boolean addEOL) {
440 while (isWhitespace(ch0)) {
441 if (isEOL(ch0)) {
442 skipEOL(addEOL);
443 } else {
444 skip(1);
445 }
446 }
447 }
449 /**
450 * Skip over comments.
451 *
452 * @return True if a comment.
453 */
454 protected boolean skipComments() {
455 // Save the current position.
456 final int start = position;
458 if (ch0 == '/') {
459 // Is it a // comment.
460 if (ch1 == '/') {
461 // Skip over //.
462 skip(2);
464 boolean directiveComment = false;
465 if ((ch0 == '#' || ch0 == '@') && (ch1 == ' ')) {
466 directiveComment = true;
467 }
469 // Scan for EOL.
470 while (!atEOF() && !isEOL(ch0)) {
471 skip(1);
472 }
473 // Did detect a comment.
474 add(directiveComment? DIRECTIVE_COMMENT : COMMENT, start);
475 return true;
476 } else if (ch1 == '*') {
477 // Skip over /*.
478 skip(2);
479 // Scan for */.
480 while (!atEOF() && !(ch0 == '*' && ch1 == '/')) {
481 // If end of line handle else skip character.
482 if (isEOL(ch0)) {
483 skipEOL(true);
484 } else {
485 skip(1);
486 }
487 }
489 if (atEOF()) {
490 // TODO - Report closing */ missing in parser.
491 add(ERROR, start);
492 } else {
493 // Skip */.
494 skip(2);
495 }
497 // Did detect a comment.
498 add(COMMENT, start);
499 return true;
500 }
501 } else if (ch0 == '#') {
502 assert scripting;
503 // shell style comment
504 // Skip over #.
505 skip(1);
506 // Scan for EOL.
507 while (!atEOF() && !isEOL(ch0)) {
508 skip(1);
509 }
510 // Did detect a comment.
511 add(COMMENT, start);
512 return true;
513 }
515 // Not a comment.
516 return false;
517 }
519 /**
520 * Convert a regex token to a token object.
521 *
522 * @param start Position in source content.
523 * @param length Length of regex token.
524 * @return Regex token object.
525 */
526 public RegexToken valueOfPattern(final int start, final int length) {
527 // Save the current position.
528 final int savePosition = position;
529 // Reset to beginning of content.
530 reset(start);
531 // Buffer for recording characters.
532 final StringBuilder sb = new StringBuilder(length);
534 // Skip /.
535 skip(1);
536 boolean inBrackets = false;
537 // Scan for closing /, stopping at end of line.
538 while (!atEOF() && ch0 != '/' && !isEOL(ch0) || inBrackets) {
539 // Skip over escaped character.
540 if (ch0 == '\\') {
541 sb.append(ch0);
542 sb.append(ch1);
543 skip(2);
544 } else {
545 if (ch0 == '[') {
546 inBrackets = true;
547 } else if (ch0 == ']') {
548 inBrackets = false;
549 }
551 // Skip literal character.
552 sb.append(ch0);
553 skip(1);
554 }
555 }
557 // Get pattern as string.
558 final String regex = sb.toString();
560 // Skip /.
561 skip(1);
563 // Options as string.
564 final String options = source.getString(position, scanIdentifier());
566 reset(savePosition);
568 // Compile the pattern.
569 return new RegexToken(regex, options);
570 }
572 /**
573 * Return true if the given token can be the beginning of a literal.
574 *
575 * @param token a token
576 * @return true if token can start a literal.
577 */
578 public boolean canStartLiteral(final TokenType token) {
579 return token.startsWith('/') || ((scripting || XML_LITERALS) && token.startsWith('<'));
580 }
582 /**
583 * interface to receive line information for multi-line literals.
584 */
585 protected interface LineInfoReceiver {
586 /**
587 * Receives line information
588 * @param line last line number
589 * @param linePosition position of last line
590 */
591 public void lineInfo(int line, int linePosition);
592 }
594 /**
595 * Check whether the given token represents the beginning of a literal. If so scan
596 * the literal and return <tt>true</tt>, otherwise return false.
597 *
598 * @param token the token.
599 * @param startTokenType the token type.
600 * @param lir LineInfoReceiver that receives line info for multi-line string literals.
601 * @return True if a literal beginning with startToken was found and scanned.
602 */
603 protected boolean scanLiteral(final long token, final TokenType startTokenType, final LineInfoReceiver lir) {
604 // Check if it can be a literal.
605 if (!canStartLiteral(startTokenType)) {
606 return false;
607 }
608 // We break on ambiguous tokens so if we already moved on it can't be a literal.
609 if (stream.get(stream.last()) != token) {
610 return false;
611 }
612 // Rewind to token start position
613 reset(Token.descPosition(token));
615 if (ch0 == '/') {
616 return scanRegEx();
617 } else if (ch0 == '<') {
618 if (ch1 == '<') {
619 return scanHereString(lir);
620 } else if (Character.isJavaIdentifierStart(ch1)) {
621 return scanXMLLiteral();
622 }
623 }
625 return false;
626 }
628 /**
629 * Scan over regex literal.
630 *
631 * @return True if a regex literal.
632 */
633 private boolean scanRegEx() {
634 assert ch0 == '/';
635 // Make sure it's not a comment.
636 if (ch1 != '/' && ch1 != '*') {
637 // Record beginning of literal.
638 final int start = position;
639 // Skip /.
640 skip(1);
641 boolean inBrackets = false;
643 // Scan for closing /, stopping at end of line.
644 while (!atEOF() && (ch0 != '/' || inBrackets) && !isEOL(ch0)) {
645 // Skip over escaped character.
646 if (ch0 == '\\') {
647 skip(1);
648 if (isEOL(ch0)) {
649 reset(start);
650 return false;
651 }
652 skip(1);
653 } else {
654 if (ch0 == '[') {
655 inBrackets = true;
656 } else if (ch0 == ']') {
657 inBrackets = false;
658 }
660 // Skip literal character.
661 skip(1);
662 }
663 }
665 // If regex literal.
666 if (ch0 == '/') {
667 // Skip /.
668 skip(1);
670 // Skip over options.
671 while (!atEOF() && Character.isJavaIdentifierPart(ch0) || ch0 == '\\' && ch1 == 'u') {
672 skip(1);
673 }
675 // Add regex token.
676 add(REGEX, start);
677 // Regex literal detected.
678 return true;
679 }
681 // False start try again.
682 reset(start);
683 }
685 // Regex literal not detected.
686 return false;
687 }
689 /**
690 * Convert a digit to a integer. Can't use Character.digit since we are
691 * restricted to ASCII by the spec.
692 *
693 * @param ch Character to convert.
694 * @param base Numeric base.
695 *
696 * @return The converted digit or -1 if invalid.
697 */
698 protected static int convertDigit(final char ch, final int base) {
699 int digit;
701 if ('0' <= ch && ch <= '9') {
702 digit = ch - '0';
703 } else if ('A' <= ch && ch <= 'Z') {
704 digit = ch - 'A' + 10;
705 } else if ('a' <= ch && ch <= 'z') {
706 digit = ch - 'a' + 10;
707 } else {
708 return -1;
709 }
711 return digit < base ? digit : -1;
712 }
715 /**
716 * Get the value of a hexadecimal numeric sequence.
717 *
718 * @param length Number of digits.
719 * @param type Type of token to report against.
720 * @return Value of sequence or < 0 if no digits.
721 */
722 private int hexSequence(final int length, final TokenType type) {
723 int value = 0;
725 for (int i = 0; i < length; i++) {
726 final int digit = convertDigit(ch0, 16);
728 if (digit == -1) {
729 error(Lexer.message("invalid.hex"), type, position, limit);
730 return i == 0 ? -1 : value;
731 }
733 value = digit | value << 4;
734 skip(1);
735 }
737 return value;
738 }
740 /**
741 * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
742 *
743 * @return Value of sequence.
744 */
745 private int octalSequence() {
746 int value = 0;
748 for (int i = 0; i < 3; i++) {
749 final int digit = convertDigit(ch0, 8);
751 if (digit == -1) {
752 break;
753 }
754 value = digit | value << 3;
755 skip(1);
757 if (i == 1 && value >= 32) {
758 break;
759 }
760 }
761 return value;
762 }
764 /**
765 * Convert a string to a JavaScript identifier.
766 *
767 * @param start Position in source content.
768 * @param length Length of token.
769 * @return Ident string or null if an error.
770 */
771 private String valueOfIdent(final int start, final int length) throws RuntimeException {
772 // Save the current position.
773 final int savePosition = position;
774 // End of scan.
775 final int end = start + length;
776 // Reset to beginning of content.
777 reset(start);
778 // Buffer for recording characters.
779 final StringBuilder sb = new StringBuilder(length);
781 // Scan until end of line or end of file.
782 while (!atEOF() && position < end && !isEOL(ch0)) {
783 // If escape character.
784 if (ch0 == '\\' && ch1 == 'u') {
785 skip(2);
786 final int ch = hexSequence(4, TokenType.IDENT);
787 if (isWhitespace((char)ch)) {
788 return null;
789 }
790 if (ch < 0) {
791 sb.append('\\');
792 sb.append('u');
793 } else {
794 sb.append((char)ch);
795 }
796 } else {
797 // Add regular character.
798 sb.append(ch0);
799 skip(1);
800 }
801 }
803 // Restore position.
804 reset(savePosition);
806 return sb.toString();
807 }
809 /**
810 * Scan over and identifier or keyword. Handles identifiers containing
811 * encoded Unicode chars.
812 *
813 * Example:
814 *
815 * var \u0042 = 44;
816 */
817 private void scanIdentifierOrKeyword() {
818 // Record beginning of identifier.
819 final int start = position;
820 // Scan identifier.
821 final int length = scanIdentifier();
822 // Check to see if it is a keyword.
823 final TokenType type = TokenLookup.lookupKeyword(content, start, length);
824 if (type == FUNCTION && pauseOnFunctionBody) {
825 pauseOnNextLeftBrace = true;
826 }
827 // Add keyword or identifier token.
828 add(type, start);
829 }
831 /**
832 * Convert a string to a JavaScript string object.
833 *
834 * @param start Position in source content.
835 * @param length Length of token.
836 * @return JavaScript string object.
837 */
838 private String valueOfString(final int start, final int length, final boolean strict) throws RuntimeException {
839 // Save the current position.
840 final int savePosition = position;
841 // Calculate the end position.
842 final int end = start + length;
843 // Reset to beginning of string.
844 reset(start);
846 // Buffer for recording characters.
847 final StringBuilder sb = new StringBuilder(length);
849 // Scan until end of string.
850 while (position < end) {
851 // If escape character.
852 if (ch0 == '\\') {
853 skip(1);
855 final char next = ch0;
856 final int afterSlash = position;
858 skip(1);
860 // Special characters.
861 switch (next) {
862 case '0':
863 case '1':
864 case '2':
865 case '3':
866 case '4':
867 case '5':
868 case '6':
869 case '7': {
870 if (strict) {
871 // "\0" itself is allowed in strict mode. Only other 'real'
872 // octal escape sequences are not allowed (eg. "\02", "\31").
873 // See section 7.8.4 String literals production EscapeSequence
874 if (next != '0' || (ch0 >= '0' && ch0 <= '9')) {
875 error(Lexer.message("strict.no.octal"), STRING, position, limit);
876 }
877 }
878 reset(afterSlash);
879 // Octal sequence.
880 final int ch = octalSequence();
882 if (ch < 0) {
883 sb.append('\\');
884 sb.append('x');
885 } else {
886 sb.append((char)ch);
887 }
888 break;
889 }
890 case 'n':
891 sb.append('\n');
892 break;
893 case 't':
894 sb.append('\t');
895 break;
896 case 'b':
897 sb.append('\b');
898 break;
899 case 'f':
900 sb.append('\f');
901 break;
902 case 'r':
903 sb.append('\r');
904 break;
905 case '\'':
906 sb.append('\'');
907 break;
908 case '\"':
909 sb.append('\"');
910 break;
911 case '\\':
912 sb.append('\\');
913 break;
914 case '\r': // CR | CRLF
915 if (ch0 == '\n') {
916 skip(1);
917 }
918 // fall through
919 case '\n': // LF
920 case '\u2028': // LS
921 case '\u2029': // PS
922 // continue on the next line, slash-return continues string
923 // literal
924 break;
925 case 'x': {
926 // Hex sequence.
927 final int ch = hexSequence(2, STRING);
929 if (ch < 0) {
930 sb.append('\\');
931 sb.append('x');
932 } else {
933 sb.append((char)ch);
934 }
935 }
936 break;
937 case 'u': {
938 // Unicode sequence.
939 final int ch = hexSequence(4, STRING);
941 if (ch < 0) {
942 sb.append('\\');
943 sb.append('u');
944 } else {
945 sb.append((char)ch);
946 }
947 }
948 break;
949 case 'v':
950 sb.append('\u000B');
951 break;
952 // All other characters.
953 default:
954 sb.append(next);
955 break;
956 }
957 } else {
958 // Add regular character.
959 sb.append(ch0);
960 skip(1);
961 }
962 }
964 // Restore position.
965 reset(savePosition);
967 return sb.toString();
968 }
970 /**
971 * Scan over a string literal.
972 * @param add true if we nare not just scanning but should actually modify the token stream
973 */
974 protected void scanString(final boolean add) {
975 // Type of string.
976 TokenType type = STRING;
977 // Record starting quote.
978 final char quote = ch0;
979 // Skip over quote.
980 skip(1);
982 // Record beginning of string content.
983 final State stringState = saveState();
985 // Scan until close quote or end of line.
986 while (!atEOF() && ch0 != quote && !isEOL(ch0)) {
987 // Skip over escaped character.
988 if (ch0 == '\\') {
989 type = ESCSTRING;
990 skip(1);
991 if (! isEscapeCharacter(ch0)) {
992 error(Lexer.message("invalid.escape.char"), STRING, position, limit);
993 }
994 if (isEOL(ch0)) {
995 // Multiline string literal
996 skipEOL(false);
997 continue;
998 }
999 }
1000 // Skip literal character.
1001 skip(1);
1002 }
1004 // If close quote.
1005 if (ch0 == quote) {
1006 // Skip close quote.
1007 skip(1);
1008 } else {
1009 error(Lexer.message("missing.close.quote"), STRING, position, limit);
1010 }
1012 // If not just scanning.
1013 if (add) {
1014 // Record end of string.
1015 stringState.setLimit(position - 1);
1017 if (scripting && !stringState.isEmpty()) {
1018 switch (quote) {
1019 case '`':
1020 // Mark the beginning of an exec string.
1021 add(EXECSTRING, stringState.position, stringState.limit);
1022 // Frame edit string with left brace.
1023 add(LBRACE, stringState.position, stringState.position);
1024 // Process edit string.
1025 editString(type, stringState);
1026 // Frame edit string with right brace.
1027 add(RBRACE, stringState.limit, stringState.limit);
1028 break;
1029 case '"':
1030 // Only edit double quoted strings.
1031 editString(type, stringState);
1032 break;
1033 case '\'':
1034 // Add string token without editing.
1035 add(type, stringState.position, stringState.limit);
1036 break;
1037 default:
1038 break;
1039 }
1040 } else {
1041 /// Add string token without editing.
1042 add(type, stringState.position, stringState.limit);
1043 }
1044 }
1045 }
1047 /**
1048 * Is the given character a valid escape char after "\" ?
1049 *
1050 * @param ch character to be checked
1051 * @return if the given character is valid after "\"
1052 */
1053 protected boolean isEscapeCharacter(final char ch) {
1054 return true;
1055 }
1057 /**
1058 * Convert string to number.
1059 *
1060 * @param valueString String to convert.
1061 * @param radix Numeric base.
1062 * @return Converted number.
1063 */
1064 private static Number valueOf(final String valueString, final int radix) throws NumberFormatException {
1065 try {
1066 final long value = Long.parseLong(valueString, radix);
1067 if(value >= MIN_INT_L && value <= MAX_INT_L) {
1068 return Integer.valueOf((int)value);
1069 }
1070 return Long.valueOf(value);
1071 } catch (final NumberFormatException e) {
1072 if (radix == 10) {
1073 return Double.valueOf(valueString);
1074 }
1076 double value = 0.0;
1078 for (int i = 0; i < valueString.length(); i++) {
1079 final char ch = valueString.charAt(i);
1080 // Preverified, should always be a valid digit.
1081 final int digit = convertDigit(ch, radix);
1082 value *= radix;
1083 value += digit;
1084 }
1086 return value;
1087 }
1088 }
1090 /**
1091 * Scan a number.
1092 */
1093 protected void scanNumber() {
1094 // Record beginning of number.
1095 final int start = position;
1096 // Assume value is a decimal.
1097 TokenType type = DECIMAL;
1099 // First digit of number.
1100 int digit = convertDigit(ch0, 10);
1102 // If number begins with 0x.
1103 if (digit == 0 && (ch1 == 'x' || ch1 == 'X') && convertDigit(ch2, 16) != -1) {
1104 // Skip over 0xN.
1105 skip(3);
1106 // Skip over remaining digits.
1107 while (convertDigit(ch0, 16) != -1) {
1108 skip(1);
1109 }
1111 type = HEXADECIMAL;
1112 } else {
1113 // Check for possible octal constant.
1114 boolean octal = digit == 0;
1115 // Skip first digit if not leading '.'.
1116 if (digit != -1) {
1117 skip(1);
1118 }
1120 // Skip remaining digits.
1121 while ((digit = convertDigit(ch0, 10)) != -1) {
1122 // Check octal only digits.
1123 octal = octal && digit < 8;
1124 // Skip digit.
1125 skip(1);
1126 }
1128 if (octal && position - start > 1) {
1129 type = OCTAL;
1130 } else if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
1131 // Must be a double.
1132 if (ch0 == '.') {
1133 // Skip period.
1134 skip(1);
1135 // Skip mantissa.
1136 while (convertDigit(ch0, 10) != -1) {
1137 skip(1);
1138 }
1139 }
1141 // Detect exponent.
1142 if (ch0 == 'E' || ch0 == 'e') {
1143 // Skip E.
1144 skip(1);
1145 // Detect and skip exponent sign.
1146 if (ch0 == '+' || ch0 == '-') {
1147 skip(1);
1148 }
1149 // Skip exponent.
1150 while (convertDigit(ch0, 10) != -1) {
1151 skip(1);
1152 }
1153 }
1155 type = FLOATING;
1156 }
1157 }
1159 if (Character.isJavaIdentifierStart(ch0)) {
1160 error(Lexer.message("missing.space.after.number"), type, position, 1);
1161 }
1163 // Add number token.
1164 add(type, start);
1165 }
1167 /**
1168 * Convert a regex token to a token object.
1169 *
1170 * @param start Position in source content.
1171 * @param length Length of regex token.
1172 * @return Regex token object.
1173 */
1174 XMLToken valueOfXML(final int start, final int length) {
1175 return new XMLToken(source.getString(start, length));
1176 }
1178 /**
1179 * Scan over a XML token.
1180 *
1181 * @return TRUE if is an XML literal.
1182 */
1183 private boolean scanXMLLiteral() {
1184 assert ch0 == '<' && Character.isJavaIdentifierStart(ch1);
1185 if (XML_LITERALS) {
1186 // Record beginning of xml expression.
1187 final int start = position;
1189 int openCount = 0;
1191 do {
1192 if (ch0 == '<') {
1193 if (ch1 == '/' && Character.isJavaIdentifierStart(ch2)) {
1194 skip(3);
1195 openCount--;
1196 } else if (Character.isJavaIdentifierStart(ch1)) {
1197 skip(2);
1198 openCount++;
1199 } else if (ch1 == '?') {
1200 skip(2);
1201 } else if (ch1 == '!' && ch2 == '-' && ch3 == '-') {
1202 skip(4);
1203 } else {
1204 reset(start);
1205 return false;
1206 }
1208 while (!atEOF() && ch0 != '>') {
1209 if (ch0 == '/' && ch1 == '>') {
1210 openCount--;
1211 skip(1);
1212 break;
1213 } else if (ch0 == '\"' || ch0 == '\'') {
1214 scanString(false);
1215 } else {
1216 skip(1);
1217 }
1218 }
1220 if (ch0 != '>') {
1221 reset(start);
1222 return false;
1223 }
1225 skip(1);
1226 } else if (atEOF()) {
1227 reset(start);
1228 return false;
1229 } else {
1230 skip(1);
1231 }
1232 } while (openCount > 0);
1234 add(XML, start);
1235 return true;
1236 }
1238 return false;
1239 }
1241 /**
1242 * Scan over identifier characters.
1243 *
1244 * @return Length of identifier or zero if none found.
1245 */
1246 private int scanIdentifier() {
1247 final int start = position;
1249 // Make sure first character is valid start character.
1250 if (ch0 == '\\' && ch1 == 'u') {
1251 skip(2);
1252 final int ch = hexSequence(4, TokenType.IDENT);
1254 if (!Character.isJavaIdentifierStart(ch)) {
1255 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1256 }
1257 } else if (!Character.isJavaIdentifierStart(ch0)) {
1258 // Not an identifier.
1259 return 0;
1260 }
1262 // Make sure remaining characters are valid part characters.
1263 while (!atEOF()) {
1264 if (ch0 == '\\' && ch1 == 'u') {
1265 skip(2);
1266 final int ch = hexSequence(4, TokenType.IDENT);
1268 if (!Character.isJavaIdentifierPart(ch)) {
1269 error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
1270 }
1271 } else if (Character.isJavaIdentifierPart(ch0)) {
1272 skip(1);
1273 } else {
1274 break;
1275 }
1276 }
1278 // Length of identifier sequence.
1279 return position - start;
1280 }
1282 /**
1283 * Compare two identifiers (in content) for equality.
1284 *
1285 * @param aStart Start of first identifier.
1286 * @param aLength Length of first identifier.
1287 * @param bStart Start of second identifier.
1288 * @param bLength Length of second identifier.
1289 * @return True if equal.
1290 */
1291 private boolean identifierEqual(final int aStart, final int aLength, final int bStart, final int bLength) {
1292 if (aLength == bLength) {
1293 for (int i = 0; i < aLength; i++) {
1294 if (content[aStart + i] != content[bStart + i]) {
1295 return false;
1296 }
1297 }
1299 return true;
1300 }
1302 return false;
1303 }
1305 /**
1306 * Detect if a line starts with a marker identifier.
1307 *
1308 * @param identStart Start of identifier.
1309 * @param identLength Length of identifier.
1310 * @return True if detected.
1311 */
1312 private boolean hasHereMarker(final int identStart, final int identLength) {
1313 // Skip any whitespace.
1314 skipWhitespace(false);
1316 return identifierEqual(identStart, identLength, position, scanIdentifier());
1317 }
1319 /**
1320 * Lexer to service edit strings.
1321 */
1322 private static class EditStringLexer extends Lexer {
1323 /** Type of string literals to emit. */
1324 final TokenType stringType;
1326 /*
1327 * Constructor.
1328 */
1330 EditStringLexer(final Lexer lexer, final TokenType stringType, final State stringState) {
1331 super(lexer, stringState);
1333 this.stringType = stringType;
1334 }
1336 /**
1337 * Lexify the contents of the string.
1338 */
1339 @Override
1340 public void lexify() {
1341 // Record start of string position.
1342 int stringStart = position;
1343 // Indicate that the priming first string has not been emitted.
1344 boolean primed = false;
1346 while (true) {
1347 // Detect end of content.
1348 if (atEOF()) {
1349 break;
1350 }
1352 // Honour escapes (should be well formed.)
1353 if (ch0 == '\\' && stringType == ESCSTRING) {
1354 skip(2);
1356 continue;
1357 }
1359 // If start of expression.
1360 if (ch0 == '$' && ch1 == '{') {
1361 if (!primed || stringStart != position) {
1362 if (primed) {
1363 add(ADD, stringStart, stringStart + 1);
1364 }
1366 add(stringType, stringStart, position);
1367 primed = true;
1368 }
1370 // Skip ${
1371 skip(2);
1373 // Save expression state.
1374 final State expressionState = saveState();
1376 // Start with one open brace.
1377 int braceCount = 1;
1379 // Scan for the rest of the string.
1380 while (!atEOF()) {
1381 // If closing brace.
1382 if (ch0 == '}') {
1383 // Break only only if matching brace.
1384 if (--braceCount == 0) {
1385 break;
1386 }
1387 } else if (ch0 == '{') {
1388 // Bump up the brace count.
1389 braceCount++;
1390 }
1392 // Skip to next character.
1393 skip(1);
1394 }
1396 // If braces don't match then report an error.
1397 if (braceCount != 0) {
1398 error(Lexer.message("edit.string.missing.brace"), LBRACE, expressionState.position - 1, 1);
1399 }
1401 // Mark end of expression.
1402 expressionState.setLimit(position);
1403 // Skip closing brace.
1404 skip(1);
1406 // Start next string.
1407 stringStart = position;
1409 // Concatenate expression.
1410 add(ADD, expressionState.position, expressionState.position + 1);
1411 add(LPAREN, expressionState.position, expressionState.position + 1);
1413 // Scan expression.
1414 final Lexer lexer = new Lexer(this, expressionState);
1415 lexer.lexify();
1417 // Close out expression parenthesis.
1418 add(RPAREN, position - 1, position);
1420 continue;
1421 }
1423 // Next character in string.
1424 skip(1);
1425 }
1427 // If there is any unemitted string portion.
1428 if (stringStart != limit) {
1429 // Concatenate remaining string.
1430 if (primed) {
1431 add(ADD, stringStart, 1);
1432 }
1434 add(stringType, stringStart, limit);
1435 }
1436 }
1438 }
1440 /**
1441 * Edit string for nested expressions.
1442 *
1443 * @param stringType Type of string literals to emit.
1444 * @param stringState State of lexer at start of string.
1445 */
1446 private void editString(final TokenType stringType, final State stringState) {
1447 // Use special lexer to scan string.
1448 final EditStringLexer lexer = new EditStringLexer(this, stringType, stringState);
1449 lexer.lexify();
1451 // Need to keep lexer informed.
1452 last = stringType;
1453 }
1455 /**
1456 * Scan over a here string.
1457 *
1458 * @return TRUE if is a here string.
1459 */
1460 private boolean scanHereString(final LineInfoReceiver lir) {
1461 assert ch0 == '<' && ch1 == '<';
1462 if (scripting) {
1463 // Record beginning of here string.
1464 final State saved = saveState();
1466 // << or <<<
1467 final boolean excludeLastEOL = ch2 != '<';
1469 if (excludeLastEOL) {
1470 skip(2);
1471 } else {
1472 skip(3);
1473 }
1475 // Scan identifier.
1476 final int identStart = position;
1477 final int identLength = scanIdentifier();
1479 // Check for identifier.
1480 if (identLength == 0) {
1481 // Treat as shift.
1482 restoreState(saved);
1484 return false;
1485 }
1487 // Record rest of line.
1488 final State restState = saveState();
1489 // keep line number updated
1490 int lastLine = line;
1492 skipLine(false);
1493 lastLine++;
1494 int lastLinePosition = position;
1495 restState.setLimit(position);
1497 // Record beginning of string.
1498 final State stringState = saveState();
1499 int stringEnd = position;
1501 // Hunt down marker.
1502 while (!atEOF()) {
1503 // Skip any whitespace.
1504 skipWhitespace(false);
1506 if (hasHereMarker(identStart, identLength)) {
1507 break;
1508 }
1510 skipLine(false);
1511 lastLine++;
1512 lastLinePosition = position;
1513 stringEnd = position;
1514 }
1516 // notify last line information
1517 lir.lineInfo(lastLine, lastLinePosition);
1519 // Record end of string.
1520 stringState.setLimit(stringEnd);
1522 // If marker is missing.
1523 if (stringState.isEmpty() || atEOF()) {
1524 error(Lexer.message("here.missing.end.marker", source.getString(identStart, identLength)), last, position, position);
1525 restoreState(saved);
1527 return false;
1528 }
1530 // Remove last end of line if specified.
1531 if (excludeLastEOL) {
1532 // Handles \n.
1533 if (content[stringEnd - 1] == '\n') {
1534 stringEnd--;
1535 }
1537 // Handles \r and \r\n.
1538 if (content[stringEnd - 1] == '\r') {
1539 stringEnd--;
1540 }
1542 // Update end of string.
1543 stringState.setLimit(stringEnd);
1544 }
1546 // Edit string if appropriate.
1547 if (scripting && !stringState.isEmpty()) {
1548 editString(STRING, stringState);
1549 } else {
1550 // Add here string.
1551 add(STRING, stringState.position, stringState.limit);
1552 }
1554 // Scan rest of original line.
1555 final Lexer restLexer = new Lexer(this, restState);
1557 restLexer.lexify();
1559 return true;
1560 }
1562 return false;
1563 }
1565 /**
1566 * Breaks source content down into lex units, adding tokens to the token
1567 * stream. The routine scans until the stream buffer is full. Can be called
1568 * repeatedly until EOF is detected.
1569 */
1570 public void lexify() {
1571 while (!stream.isFull() || nested) {
1572 // Skip over whitespace.
1573 skipWhitespace(true);
1575 // Detect end of file.
1576 if (atEOF()) {
1577 if (!nested) {
1578 // Add an EOF token at the end.
1579 add(EOF, position);
1580 }
1582 break;
1583 }
1585 // Check for comments. Note that we don't scan for regexp and other literals here as
1586 // we may not have enough context to distinguish them from similar looking operators.
1587 // Instead we break on ambiguous operators below and let the parser decide.
1588 if (ch0 == '/' && skipComments()) {
1589 continue;
1590 }
1592 if (scripting && ch0 == '#' && skipComments()) {
1593 continue;
1594 }
1596 // TokenType for lookup of delimiter or operator.
1597 TokenType type;
1599 if (ch0 == '.' && convertDigit(ch1, 10) != -1) {
1600 // '.' followed by digit.
1601 // Scan and add a number.
1602 scanNumber();
1603 } else if ((type = TokenLookup.lookupOperator(ch0, ch1, ch2, ch3)) != null) {
1604 // Get the number of characters in the token.
1605 final int typeLength = type.getLength();
1606 // Skip that many characters.
1607 skip(typeLength);
1608 // Add operator token.
1609 add(type, position - typeLength);
1610 // Some operator tokens also mark the beginning of regexp, XML, or here string literals.
1611 // We break to let the parser decide what it is.
1612 if (canStartLiteral(type)) {
1613 break;
1614 } else if (type == LBRACE && pauseOnNextLeftBrace) {
1615 pauseOnNextLeftBrace = false;
1616 break;
1617 }
1618 } else if (Character.isJavaIdentifierStart(ch0) || ch0 == '\\' && ch1 == 'u') {
1619 // Scan and add identifier or keyword.
1620 scanIdentifierOrKeyword();
1621 } else if (isStringDelimiter(ch0)) {
1622 // Scan and add a string.
1623 scanString(true);
1624 } else if (Character.isDigit(ch0)) {
1625 // Scan and add a number.
1626 scanNumber();
1627 } else {
1628 // Don't recognize this character.
1629 skip(1);
1630 add(ERROR, position - 1);
1631 }
1632 }
1633 }
1635 /**
1636 * Return value of token given its token descriptor.
1637 *
1638 * @param token Token descriptor.
1639 * @return JavaScript value.
1640 */
1641 Object getValueOf(final long token, final boolean strict) {
1642 final int start = Token.descPosition(token);
1643 final int len = Token.descLength(token);
1645 switch (Token.descType(token)) {
1646 case DECIMAL:
1647 return Lexer.valueOf(source.getString(start, len), 10); // number
1648 case OCTAL:
1649 return Lexer.valueOf(source.getString(start, len), 8); // number
1650 case HEXADECIMAL:
1651 return Lexer.valueOf(source.getString(start + 2, len - 2), 16); // number
1652 case FLOATING:
1653 final String str = source.getString(start, len);
1654 final double value = Double.valueOf(str);
1655 if (str.indexOf('.') != -1) {
1656 return value; //number
1657 }
1658 //anything without an explicit decimal point is still subject to a
1659 //"representable as int or long" check. Then the programmer does not
1660 //explicitly code something as a double. For example new Color(int, int, int)
1661 //and new Color(float, float, float) will get ambiguous for cases like
1662 //new Color(1.0, 1.5, 1.5) if we don't respect the decimal point.
1663 //yet we don't want e.g. 1e6 to be a double unnecessarily
1664 if (JSType.isRepresentableAsInt(value) && !JSType.isNegativeZero(value)) {
1665 return (int)value;
1666 } else if (JSType.isRepresentableAsLong(value) && !JSType.isNegativeZero(value)) {
1667 return (long)value;
1668 }
1669 return value;
1670 case STRING:
1671 return source.getString(start, len); // String
1672 case ESCSTRING:
1673 return valueOfString(start, len, strict); // String
1674 case IDENT:
1675 return valueOfIdent(start, len); // String
1676 case REGEX:
1677 return valueOfPattern(start, len); // RegexToken::LexerToken
1678 case XML:
1679 return valueOfXML(start, len); // XMLToken::LexerToken
1680 case DIRECTIVE_COMMENT:
1681 return source.getString(start, len);
1682 default:
1683 break;
1684 }
1686 return null;
1687 }
1689 /**
1690 * Get the correctly localized error message for a given message id format arguments
1691 * @param msgId message id
1692 * @param args format arguments
1693 * @return message
1694 */
1695 protected static String message(final String msgId, final String... args) {
1696 return ECMAErrors.getMessage("lexer.error." + msgId, args);
1697 }
1699 /**
1700 * Generate a runtime exception
1701 *
1702 * @param message error message
1703 * @param type token type
1704 * @param start start position of lexed error
1705 * @param length length of lexed error
1706 * @throws ParserException unconditionally
1707 */
1708 protected void error(final String message, final TokenType type, final int start, final int length) throws ParserException {
1709 final long token = Token.toDesc(type, start, length);
1710 final int pos = Token.descPosition(token);
1711 final int lineNum = source.getLine(pos);
1712 final int columnNum = source.getColumn(pos);
1713 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, token);
1714 throw new ParserException(JSErrorType.SYNTAX_ERROR, formatted, source, lineNum, columnNum, token);
1715 }
1717 /**
1718 * Helper class for Lexer tokens, e.g XML or RegExp tokens.
1719 * This is the abstract superclass
1720 */
1721 public static abstract class LexerToken implements Serializable {
1722 private static final long serialVersionUID = 1L;
1724 private final String expression;
1726 /**
1727 * Constructor
1728 * @param expression token expression
1729 */
1730 protected LexerToken(final String expression) {
1731 this.expression = expression;
1732 }
1734 /**
1735 * Get the expression
1736 * @return expression
1737 */
1738 public String getExpression() {
1739 return expression;
1740 }
1741 }
1743 /**
1744 * Temporary container for regular expressions.
1745 */
1746 public static class RegexToken extends LexerToken {
1747 private static final long serialVersionUID = 1L;
1749 /** Options. */
1750 private final String options;
1752 /**
1753 * Constructor.
1754 *
1755 * @param expression regexp expression
1756 * @param options regexp options
1757 */
1758 public RegexToken(final String expression, final String options) {
1759 super(expression);
1760 this.options = options;
1761 }
1763 /**
1764 * Get regexp options
1765 * @return options
1766 */
1767 public String getOptions() {
1768 return options;
1769 }
1771 @Override
1772 public String toString() {
1773 return '/' + getExpression() + '/' + options;
1774 }
1775 }
1777 /**
1778 * Temporary container for XML expression.
1779 */
1780 public static class XMLToken extends LexerToken {
1781 private static final long serialVersionUID = 1L;
1783 /**
1784 * Constructor.
1785 *
1786 * @param expression XML expression
1787 */
1788 public XMLToken(final String expression) {
1789 super(expression);
1790 }
1791 }
1792 }