src/share/classes/com/sun/tools/javac/parser/Scanner.java

Tue, 28 Dec 2010 15:54:52 -0800

author
ohair
date
Tue, 28 Dec 2010 15:54:52 -0800
changeset 798
4868a36f6fd8
parent 752
03177f49411d
child 816
7c537f4298fb
permissions
-rw-r--r--

6962318: Update copyright year
Reviewed-by: xdono

     1 /*
     2  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.nio.*;
    30 import com.sun.tools.javac.code.Source;
    31 import com.sun.tools.javac.file.JavacFileManager;
    32 import com.sun.tools.javac.util.*;
    35 import static com.sun.tools.javac.parser.Token.*;
    36 import static com.sun.tools.javac.util.LayoutCharacters.*;
    38 /** The lexical analyzer maps an input stream consisting of
    39  *  ASCII characters and Unicode escapes into a token sequence.
    40  *
    41  *  <p><b>This is NOT part of any supported API.
    42  *  If you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class Scanner implements Lexer {
    48     private static boolean scannerDebug = false;
    50     /* Output variables; set by nextToken():
    51      */
    53     /** The token, set by nextToken().
    54      */
    55     private Token token;
    57     /** Allow hex floating-point literals.
    58      */
    59     private boolean allowHexFloats;
    61     /** Allow binary literals.
    62      */
    63     private boolean allowBinaryLiterals;
    65     /** Allow underscores in literals.
    66      */
    67     private boolean allowUnderscoresInLiterals;
    69     /** The source language setting.
    70      */
    71     private Source source;
    73     /** The token's position, 0-based offset from beginning of text.
    74      */
    75     private int pos;
    77     /** Character position just after the last character of the token.
    78      */
    79     private int endPos;
    81     /** The last character position of the previous token.
    82      */
    83     private int prevEndPos;
    85     /** The position where a lexical error occurred;
    86      */
    87     private int errPos = Position.NOPOS;
    89     /** The name of an identifier or token:
    90      */
    91     private Name name;
    93     /** The radix of a numeric literal token.
    94      */
    95     private int radix;
    97     /** Has a @deprecated been encountered in last doc comment?
    98      *  this needs to be reset by client.
    99      */
   100     protected boolean deprecatedFlag = false;
   102     /** A character buffer for literals.
   103      */
   104     private char[] sbuf = new char[128];
   105     private int sp;
   107     /** The input buffer, index of next chacter to be read,
   108      *  index of one past last character in buffer.
   109      */
   110     private char[] buf;
   111     private int bp;
   112     private int buflen;
   113     private int eofPos;
   115     /** The current character.
   116      */
   117     private char ch;
   119     /** The buffer index of the last converted unicode character
   120      */
   121     private int unicodeConversionBp = -1;
   123     /** The log to be used for error reporting.
   124      */
   125     private final Log log;
   127     /** The name table. */
   128     private final Names names;
   130     /** The keyword table. */
   131     private final Keywords keywords;
   133     /** Common code for constructors. */
   134     private Scanner(ScannerFactory fac) {
   135         log = fac.log;
   136         names = fac.names;
   137         keywords = fac.keywords;
   138         source = fac.source;
   139         allowBinaryLiterals = source.allowBinaryLiterals();
   140         allowHexFloats = source.allowHexFloats();
   141         allowUnderscoresInLiterals = source.allowBinaryLiterals();
   142     }
   144     private static final boolean hexFloatsWork = hexFloatsWork();
   145     private static boolean hexFloatsWork() {
   146         try {
   147             Float.valueOf("0x1.0p1");
   148             return true;
   149         } catch (NumberFormatException ex) {
   150             return false;
   151         }
   152     }
   154     /** Create a scanner from the input buffer.  buffer must implement
   155      *  array() and compact(), and remaining() must be less than limit().
   156      */
   157     protected Scanner(ScannerFactory fac, CharBuffer buffer) {
   158         this(fac, JavacFileManager.toArray(buffer), buffer.limit());
   159     }
   161     /**
   162      * Create a scanner from the input array.  This method might
   163      * modify the array.  To avoid copying the input array, ensure
   164      * that {@code inputLength < input.length} or
   165      * {@code input[input.length -1]} is a white space character.
   166      *
   167      * @param fac the factory which created this Scanner
   168      * @param input the input, might be modified
   169      * @param inputLength the size of the input.
   170      * Must be positive and less than or equal to input.length.
   171      */
   172     protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
   173         this(fac);
   174         eofPos = inputLength;
   175         if (inputLength == input.length) {
   176             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
   177                 inputLength--;
   178             } else {
   179                 char[] newInput = new char[inputLength + 1];
   180                 System.arraycopy(input, 0, newInput, 0, input.length);
   181                 input = newInput;
   182             }
   183         }
   184         buf = input;
   185         buflen = inputLength;
   186         buf[buflen] = EOI;
   187         bp = -1;
   188         scanChar();
   189     }
   191     /** Report an error at the given position using the provided arguments.
   192      */
   193     private void lexError(int pos, String key, Object... args) {
   194         log.error(pos, key, args);
   195         token = ERROR;
   196         errPos = pos;
   197     }
   199     /** Report an error at the current token position using the provided
   200      *  arguments.
   201      */
   202     private void lexError(String key, Object... args) {
   203         lexError(pos, key, args);
   204     }
   206     /** Convert an ASCII digit from its base (8, 10, or 16)
   207      *  to its value.
   208      */
   209     private int digit(int base) {
   210         char c = ch;
   211         int result = Character.digit(c, base);
   212         if (result >= 0 && c > 0x7f) {
   213             lexError(pos+1, "illegal.nonascii.digit");
   214             ch = "0123456789abcdef".charAt(result);
   215         }
   216         return result;
   217     }
   219     /** Convert unicode escape; bp points to initial '\' character
   220      *  (Spec 3.3).
   221      */
   222     private void convertUnicode() {
   223         if (ch == '\\' && unicodeConversionBp != bp) {
   224             bp++; ch = buf[bp];
   225             if (ch == 'u') {
   226                 do {
   227                     bp++; ch = buf[bp];
   228                 } while (ch == 'u');
   229                 int limit = bp + 3;
   230                 if (limit < buflen) {
   231                     int d = digit(16);
   232                     int code = d;
   233                     while (bp < limit && d >= 0) {
   234                         bp++; ch = buf[bp];
   235                         d = digit(16);
   236                         code = (code << 4) + d;
   237                     }
   238                     if (d >= 0) {
   239                         ch = (char)code;
   240                         unicodeConversionBp = bp;
   241                         return;
   242                     }
   243                 }
   244                 lexError(bp, "illegal.unicode.esc");
   245             } else {
   246                 bp--;
   247                 ch = '\\';
   248             }
   249         }
   250     }
   252     /** Read next character.
   253      */
   254     private void scanChar() {
   255         ch = buf[++bp];
   256         if (ch == '\\') {
   257             convertUnicode();
   258         }
   259     }
   261     /** Read next character in comment, skipping over double '\' characters.
   262      */
   263     private void scanCommentChar() {
   264         scanChar();
   265         if (ch == '\\') {
   266             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   267                 bp++;
   268             } else {
   269                 convertUnicode();
   270             }
   271         }
   272     }
   274     /** Append a character to sbuf.
   275      */
   276     private void putChar(char ch) {
   277         if (sp == sbuf.length) {
   278             char[] newsbuf = new char[sbuf.length * 2];
   279             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   280             sbuf = newsbuf;
   281         }
   282         sbuf[sp++] = ch;
   283     }
   285     /** For debugging purposes: print character.
   286      */
   287     private void dch() {
   288         System.err.print(ch); System.out.flush();
   289     }
   291     /** Read next character in character or string literal and copy into sbuf.
   292      */
   293     private void scanLitChar() {
   294         if (ch == '\\') {
   295             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   296                 bp++;
   297                 putChar('\\');
   298                 scanChar();
   299             } else {
   300                 scanChar();
   301                 switch (ch) {
   302                 case '0': case '1': case '2': case '3':
   303                 case '4': case '5': case '6': case '7':
   304                     char leadch = ch;
   305                     int oct = digit(8);
   306                     scanChar();
   307                     if ('0' <= ch && ch <= '7') {
   308                         oct = oct * 8 + digit(8);
   309                         scanChar();
   310                         if (leadch <= '3' && '0' <= ch && ch <= '7') {
   311                             oct = oct * 8 + digit(8);
   312                             scanChar();
   313                         }
   314                     }
   315                     putChar((char)oct);
   316                     break;
   317                 case 'b':
   318                     putChar('\b'); scanChar(); break;
   319                 case 't':
   320                     putChar('\t'); scanChar(); break;
   321                 case 'n':
   322                     putChar('\n'); scanChar(); break;
   323                 case 'f':
   324                     putChar('\f'); scanChar(); break;
   325                 case 'r':
   326                     putChar('\r'); scanChar(); break;
   327                 case '\'':
   328                     putChar('\''); scanChar(); break;
   329                 case '\"':
   330                     putChar('\"'); scanChar(); break;
   331                 case '\\':
   332                     putChar('\\'); scanChar(); break;
   333                 default:
   334                     lexError(bp, "illegal.esc.char");
   335                 }
   336             }
   337         } else if (bp != buflen) {
   338             putChar(ch); scanChar();
   339         }
   340     }
   342     private void scanDigits(int digitRadix) {
   343         char saveCh;
   344         int savePos;
   345         do {
   346             if (ch != '_') {
   347                 putChar(ch);
   348             } else {
   349                 if (!allowUnderscoresInLiterals) {
   350                     lexError("unsupported.underscore.lit", source.name);
   351                     allowUnderscoresInLiterals = true;
   352                 }
   353             }
   354             saveCh = ch;
   355             savePos = bp;
   356             scanChar();
   357         } while (digit(digitRadix) >= 0 || ch == '_');
   358         if (saveCh == '_')
   359             lexError(savePos, "illegal.underscore");
   360     }
   362     /** Read fractional part of hexadecimal floating point number.
   363      */
   364     private void scanHexExponentAndSuffix() {
   365         if (ch == 'p' || ch == 'P') {
   366             putChar(ch);
   367             scanChar();
   368             skipIllegalUnderscores();
   369             if (ch == '+' || ch == '-') {
   370                 putChar(ch);
   371                 scanChar();
   372             }
   373             skipIllegalUnderscores();
   374             if ('0' <= ch && ch <= '9') {
   375                 scanDigits(10);
   376                 if (!allowHexFloats) {
   377                     lexError("unsupported.fp.lit", source.name);
   378                     allowHexFloats = true;
   379                 }
   380                 else if (!hexFloatsWork)
   381                     lexError("unsupported.cross.fp.lit");
   382             } else
   383                 lexError("malformed.fp.lit");
   384         } else {
   385             lexError("malformed.fp.lit");
   386         }
   387         if (ch == 'f' || ch == 'F') {
   388             putChar(ch);
   389             scanChar();
   390             token = FLOATLITERAL;
   391         } else {
   392             if (ch == 'd' || ch == 'D') {
   393                 putChar(ch);
   394                 scanChar();
   395             }
   396             token = DOUBLELITERAL;
   397         }
   398     }
   400     /** Read fractional part of floating point number.
   401      */
   402     private void scanFraction() {
   403         skipIllegalUnderscores();
   404         if ('0' <= ch && ch <= '9') {
   405             scanDigits(10);
   406         }
   407         int sp1 = sp;
   408         if (ch == 'e' || ch == 'E') {
   409             putChar(ch);
   410             scanChar();
   411             skipIllegalUnderscores();
   412             if (ch == '+' || ch == '-') {
   413                 putChar(ch);
   414                 scanChar();
   415             }
   416             skipIllegalUnderscores();
   417             if ('0' <= ch && ch <= '9') {
   418                 scanDigits(10);
   419                 return;
   420             }
   421             lexError("malformed.fp.lit");
   422             sp = sp1;
   423         }
   424     }
   426     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   427      */
   428     private void scanFractionAndSuffix() {
   429         this.radix = 10;
   430         scanFraction();
   431         if (ch == 'f' || ch == 'F') {
   432             putChar(ch);
   433             scanChar();
   434             token = FLOATLITERAL;
   435         } else {
   436             if (ch == 'd' || ch == 'D') {
   437                 putChar(ch);
   438                 scanChar();
   439             }
   440             token = DOUBLELITERAL;
   441         }
   442     }
   444     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   445      */
   446     private void scanHexFractionAndSuffix(boolean seendigit) {
   447         this.radix = 16;
   448         assert ch == '.';
   449         putChar(ch);
   450         scanChar();
   451         skipIllegalUnderscores();
   452         if (digit(16) >= 0) {
   453             seendigit = true;
   454             scanDigits(16);
   455         }
   456         if (!seendigit)
   457             lexError("invalid.hex.number");
   458         else
   459             scanHexExponentAndSuffix();
   460     }
   462     private void skipIllegalUnderscores() {
   463         if (ch == '_') {
   464             lexError(bp, "illegal.underscore");
   465             while (ch == '_')
   466                 scanChar();
   467         }
   468     }
   470     /** Read a number.
   471      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   472      */
   473     private void scanNumber(int radix) {
   474         this.radix = radix;
   475         // for octal, allow base-10 digit in case it's a float literal
   476         int digitRadix = (radix == 8 ? 10 : radix);
   477         boolean seendigit = false;
   478         if (digit(digitRadix) >= 0) {
   479             seendigit = true;
   480             scanDigits(digitRadix);
   481         }
   482         if (radix == 16 && ch == '.') {
   483             scanHexFractionAndSuffix(seendigit);
   484         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
   485             scanHexExponentAndSuffix();
   486         } else if (digitRadix == 10 && ch == '.') {
   487             putChar(ch);
   488             scanChar();
   489             scanFractionAndSuffix();
   490         } else if (digitRadix == 10 &&
   491                    (ch == 'e' || ch == 'E' ||
   492                     ch == 'f' || ch == 'F' ||
   493                     ch == 'd' || ch == 'D')) {
   494             scanFractionAndSuffix();
   495         } else {
   496             if (ch == 'l' || ch == 'L') {
   497                 scanChar();
   498                 token = LONGLITERAL;
   499             } else {
   500                 token = INTLITERAL;
   501             }
   502         }
   503     }
   505     /** Read an identifier.
   506      */
   507     private void scanIdent() {
   508         boolean isJavaIdentifierPart;
   509         char high;
   510         do {
   511             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
   512             // optimization, was: putChar(ch);
   514             scanChar();
   515             switch (ch) {
   516             case 'A': case 'B': case 'C': case 'D': case 'E':
   517             case 'F': case 'G': case 'H': case 'I': case 'J':
   518             case 'K': case 'L': case 'M': case 'N': case 'O':
   519             case 'P': case 'Q': case 'R': case 'S': case 'T':
   520             case 'U': case 'V': case 'W': case 'X': case 'Y':
   521             case 'Z':
   522             case 'a': case 'b': case 'c': case 'd': case 'e':
   523             case 'f': case 'g': case 'h': case 'i': case 'j':
   524             case 'k': case 'l': case 'm': case 'n': case 'o':
   525             case 'p': case 'q': case 'r': case 's': case 't':
   526             case 'u': case 'v': case 'w': case 'x': case 'y':
   527             case 'z':
   528             case '$': case '_':
   529             case '0': case '1': case '2': case '3': case '4':
   530             case '5': case '6': case '7': case '8': case '9':
   531             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   532             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   533             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   534             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   535             case '\u0015': case '\u0016': case '\u0017':
   536             case '\u0018': case '\u0019': case '\u001B':
   537             case '\u007F':
   538                 break;
   539             case '\u001A': // EOI is also a legal identifier part
   540                 if (bp >= buflen) {
   541                     name = names.fromChars(sbuf, 0, sp);
   542                     token = keywords.key(name);
   543                     return;
   544                 }
   545                 break;
   546             default:
   547                 if (ch < '\u0080') {
   548                     // all ASCII range chars already handled, above
   549                     isJavaIdentifierPart = false;
   550                 } else {
   551                     high = scanSurrogates();
   552                     if (high != 0) {
   553                         if (sp == sbuf.length) {
   554                             putChar(high);
   555                         } else {
   556                             sbuf[sp++] = high;
   557                         }
   558                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
   559                             Character.toCodePoint(high, ch));
   560                     } else {
   561                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
   562                     }
   563                 }
   564                 if (!isJavaIdentifierPart) {
   565                     name = names.fromChars(sbuf, 0, sp);
   566                     token = keywords.key(name);
   567                     return;
   568                 }
   569             }
   570         } while (true);
   571     }
   573     /** Are surrogates supported?
   574      */
   575     final static boolean surrogatesSupported = surrogatesSupported();
   576     private static boolean surrogatesSupported() {
   577         try {
   578             Character.isHighSurrogate('a');
   579             return true;
   580         } catch (NoSuchMethodError ex) {
   581             return false;
   582         }
   583     }
   585     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   586      *  the next character is a low surrogate, then put the low
   587      *  surrogate in 'ch', and return the high surrogate.
   588      *  otherwise, just return 0.
   589      */
   590     private char scanSurrogates() {
   591         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   592             char high = ch;
   594             scanChar();
   596             if (Character.isLowSurrogate(ch)) {
   597                 return high;
   598             }
   600             ch = high;
   601         }
   603         return 0;
   604     }
   606     /** Return true if ch can be part of an operator.
   607      */
   608     private boolean isSpecial(char ch) {
   609         switch (ch) {
   610         case '!': case '%': case '&': case '*': case '?':
   611         case '+': case '-': case ':': case '<': case '=':
   612         case '>': case '^': case '|': case '~':
   613         case '@':
   614             return true;
   615         default:
   616             return false;
   617         }
   618     }
   620     /** Read longest possible sequence of special characters and convert
   621      *  to token.
   622      */
   623     private void scanOperator() {
   624         while (true) {
   625             putChar(ch);
   626             Name newname = names.fromChars(sbuf, 0, sp);
   627             if (keywords.key(newname) == IDENTIFIER) {
   628                 sp--;
   629                 break;
   630             }
   631             name = newname;
   632             token = keywords.key(newname);
   633             scanChar();
   634             if (!isSpecial(ch)) break;
   635         }
   636     }
   638     /**
   639      * Scan a documention comment; determine if a deprecated tag is present.
   640      * Called once the initial /, * have been skipped, positioned at the second *
   641      * (which is treated as the beginning of the first line).
   642      * Stops positioned at the closing '/'.
   643      */
   644     @SuppressWarnings("fallthrough")
   645     private void scanDocComment() {
   646         boolean deprecatedPrefix = false;
   648         forEachLine:
   649         while (bp < buflen) {
   651             // Skip optional WhiteSpace at beginning of line
   652             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   653                 scanCommentChar();
   654             }
   656             // Skip optional consecutive Stars
   657             while (bp < buflen && ch == '*') {
   658                 scanCommentChar();
   659                 if (ch == '/') {
   660                     return;
   661                 }
   662             }
   664             // Skip optional WhiteSpace after Stars
   665             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   666                 scanCommentChar();
   667             }
   669             deprecatedPrefix = false;
   670             // At beginning of line in the JavaDoc sense.
   671             if (bp < buflen && ch == '@' && !deprecatedFlag) {
   672                 scanCommentChar();
   673                 if (bp < buflen && ch == 'd') {
   674                     scanCommentChar();
   675                     if (bp < buflen && ch == 'e') {
   676                         scanCommentChar();
   677                         if (bp < buflen && ch == 'p') {
   678                             scanCommentChar();
   679                             if (bp < buflen && ch == 'r') {
   680                                 scanCommentChar();
   681                                 if (bp < buflen && ch == 'e') {
   682                                     scanCommentChar();
   683                                     if (bp < buflen && ch == 'c') {
   684                                         scanCommentChar();
   685                                         if (bp < buflen && ch == 'a') {
   686                                             scanCommentChar();
   687                                             if (bp < buflen && ch == 't') {
   688                                                 scanCommentChar();
   689                                                 if (bp < buflen && ch == 'e') {
   690                                                     scanCommentChar();
   691                                                     if (bp < buflen && ch == 'd') {
   692                                                         deprecatedPrefix = true;
   693                                                         scanCommentChar();
   694                                                     }}}}}}}}}}}
   695             if (deprecatedPrefix && bp < buflen) {
   696                 if (Character.isWhitespace(ch)) {
   697                     deprecatedFlag = true;
   698                 } else if (ch == '*') {
   699                     scanCommentChar();
   700                     if (ch == '/') {
   701                         deprecatedFlag = true;
   702                         return;
   703                     }
   704                 }
   705             }
   707             // Skip rest of line
   708             while (bp < buflen) {
   709                 switch (ch) {
   710                 case '*':
   711                     scanCommentChar();
   712                     if (ch == '/') {
   713                         return;
   714                     }
   715                     break;
   716                 case CR: // (Spec 3.4)
   717                     scanCommentChar();
   718                     if (ch != LF) {
   719                         continue forEachLine;
   720                     }
   721                     /* fall through to LF case */
   722                 case LF: // (Spec 3.4)
   723                     scanCommentChar();
   724                     continue forEachLine;
   725                 default:
   726                     scanCommentChar();
   727                 }
   728             } // rest of line
   729         } // forEachLine
   730         return;
   731     }
   733     /** The value of a literal token, recorded as a string.
   734      *  For integers, leading 0x and 'l' suffixes are suppressed.
   735      */
   736     public String stringVal() {
   737         return new String(sbuf, 0, sp);
   738     }
   740     /** Read token.
   741      */
   742     public void nextToken() {
   744         try {
   745             prevEndPos = endPos;
   746             sp = 0;
   748             while (true) {
   749                 pos = bp;
   750                 switch (ch) {
   751                 case ' ': // (Spec 3.6)
   752                 case '\t': // (Spec 3.6)
   753                 case FF: // (Spec 3.6)
   754                     do {
   755                         scanChar();
   756                     } while (ch == ' ' || ch == '\t' || ch == FF);
   757                     endPos = bp;
   758                     processWhiteSpace();
   759                     break;
   760                 case LF: // (Spec 3.4)
   761                     scanChar();
   762                     endPos = bp;
   763                     processLineTerminator();
   764                     break;
   765                 case CR: // (Spec 3.4)
   766                     scanChar();
   767                     if (ch == LF) {
   768                         scanChar();
   769                     }
   770                     endPos = bp;
   771                     processLineTerminator();
   772                     break;
   773                 case 'A': case 'B': case 'C': case 'D': case 'E':
   774                 case 'F': case 'G': case 'H': case 'I': case 'J':
   775                 case 'K': case 'L': case 'M': case 'N': case 'O':
   776                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   777                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   778                 case 'Z':
   779                 case 'a': case 'b': case 'c': case 'd': case 'e':
   780                 case 'f': case 'g': case 'h': case 'i': case 'j':
   781                 case 'k': case 'l': case 'm': case 'n': case 'o':
   782                 case 'p': case 'q': case 'r': case 's': case 't':
   783                 case 'u': case 'v': case 'w': case 'x': case 'y':
   784                 case 'z':
   785                 case '$': case '_':
   786                     scanIdent();
   787                     return;
   788                 case '0':
   789                     scanChar();
   790                     if (ch == 'x' || ch == 'X') {
   791                         scanChar();
   792                         skipIllegalUnderscores();
   793                         if (ch == '.') {
   794                             scanHexFractionAndSuffix(false);
   795                         } else if (digit(16) < 0) {
   796                             lexError("invalid.hex.number");
   797                         } else {
   798                             scanNumber(16);
   799                         }
   800                     } else if (ch == 'b' || ch == 'B') {
   801                         if (!allowBinaryLiterals) {
   802                             lexError("unsupported.binary.lit", source.name);
   803                             allowBinaryLiterals = true;
   804                         }
   805                         scanChar();
   806                         skipIllegalUnderscores();
   807                         if (digit(2) < 0) {
   808                             lexError("invalid.binary.number");
   809                         } else {
   810                             scanNumber(2);
   811                         }
   812                     } else {
   813                         putChar('0');
   814                         if (ch == '_') {
   815                             int savePos = bp;
   816                             do {
   817                                 scanChar();
   818                             } while (ch == '_');
   819                             if (digit(10) < 0) {
   820                                 lexError(savePos, "illegal.underscore");
   821                             }
   822                         }
   823                         scanNumber(8);
   824                     }
   825                     return;
   826                 case '1': case '2': case '3': case '4':
   827                 case '5': case '6': case '7': case '8': case '9':
   828                     scanNumber(10);
   829                     return;
   830                 case '.':
   831                     scanChar();
   832                     if ('0' <= ch && ch <= '9') {
   833                         putChar('.');
   834                         scanFractionAndSuffix();
   835                     } else if (ch == '.') {
   836                         putChar('.'); putChar('.');
   837                         scanChar();
   838                         if (ch == '.') {
   839                             scanChar();
   840                             putChar('.');
   841                             token = ELLIPSIS;
   842                         } else {
   843                             lexError("malformed.fp.lit");
   844                         }
   845                     } else {
   846                         token = DOT;
   847                     }
   848                     return;
   849                 case ',':
   850                     scanChar(); token = COMMA; return;
   851                 case ';':
   852                     scanChar(); token = SEMI; return;
   853                 case '(':
   854                     scanChar(); token = LPAREN; return;
   855                 case ')':
   856                     scanChar(); token = RPAREN; return;
   857                 case '[':
   858                     scanChar(); token = LBRACKET; return;
   859                 case ']':
   860                     scanChar(); token = RBRACKET; return;
   861                 case '{':
   862                     scanChar(); token = LBRACE; return;
   863                 case '}':
   864                     scanChar(); token = RBRACE; return;
   865                 case '/':
   866                     scanChar();
   867                     if (ch == '/') {
   868                         do {
   869                             scanCommentChar();
   870                         } while (ch != CR && ch != LF && bp < buflen);
   871                         if (bp < buflen) {
   872                             endPos = bp;
   873                             processComment(CommentStyle.LINE);
   874                         }
   875                         break;
   876                     } else if (ch == '*') {
   877                         scanChar();
   878                         CommentStyle style;
   879                         if (ch == '*') {
   880                             style = CommentStyle.JAVADOC;
   881                             scanDocComment();
   882                         } else {
   883                             style = CommentStyle.BLOCK;
   884                             while (bp < buflen) {
   885                                 if (ch == '*') {
   886                                     scanChar();
   887                                     if (ch == '/') break;
   888                                 } else {
   889                                     scanCommentChar();
   890                                 }
   891                             }
   892                         }
   893                         if (ch == '/') {
   894                             scanChar();
   895                             endPos = bp;
   896                             processComment(style);
   897                             break;
   898                         } else {
   899                             lexError("unclosed.comment");
   900                             return;
   901                         }
   902                     } else if (ch == '=') {
   903                         name = names.slashequals;
   904                         token = SLASHEQ;
   905                         scanChar();
   906                     } else {
   907                         name = names.slash;
   908                         token = SLASH;
   909                     }
   910                     return;
   911                 case '\'':
   912                     scanChar();
   913                     if (ch == '\'') {
   914                         lexError("empty.char.lit");
   915                     } else {
   916                         if (ch == CR || ch == LF)
   917                             lexError(pos, "illegal.line.end.in.char.lit");
   918                         scanLitChar();
   919                         if (ch == '\'') {
   920                             scanChar();
   921                             token = CHARLITERAL;
   922                         } else {
   923                             lexError(pos, "unclosed.char.lit");
   924                         }
   925                     }
   926                     return;
   927                 case '\"':
   928                     scanChar();
   929                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
   930                         scanLitChar();
   931                     if (ch == '\"') {
   932                         token = STRINGLITERAL;
   933                         scanChar();
   934                     } else {
   935                         lexError(pos, "unclosed.str.lit");
   936                     }
   937                     return;
   938                 default:
   939                     if (isSpecial(ch)) {
   940                         scanOperator();
   941                     } else {
   942                         boolean isJavaIdentifierStart;
   943                         if (ch < '\u0080') {
   944                             // all ASCII range chars already handled, above
   945                             isJavaIdentifierStart = false;
   946                         } else {
   947                             char high = scanSurrogates();
   948                             if (high != 0) {
   949                                 if (sp == sbuf.length) {
   950                                     putChar(high);
   951                                 } else {
   952                                     sbuf[sp++] = high;
   953                                 }
   955                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
   956                                     Character.toCodePoint(high, ch));
   957                             } else {
   958                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
   959                             }
   960                         }
   961                         if (isJavaIdentifierStart) {
   962                             scanIdent();
   963                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
   964                             token = EOF;
   965                             pos = bp = eofPos;
   966                         } else {
   967                             lexError("illegal.char", String.valueOf((int)ch));
   968                             scanChar();
   969                         }
   970                     }
   971                     return;
   972                 }
   973             }
   974         } finally {
   975             endPos = bp;
   976             if (scannerDebug)
   977                 System.out.println("nextToken(" + pos
   978                                    + "," + endPos + ")=|" +
   979                                    new String(getRawCharacters(pos, endPos))
   980                                    + "|");
   981         }
   982     }
   984     /** Return the current token, set by nextToken().
   985      */
   986     public Token token() {
   987         return token;
   988     }
   990     /** Sets the current token.
   991      */
   992     public void token(Token token) {
   993         this.token = token;
   994     }
   996     /** Return the current token's position: a 0-based
   997      *  offset from beginning of the raw input stream
   998      *  (before unicode translation)
   999      */
  1000     public int pos() {
  1001         return pos;
  1004     /** Return the last character position of the current token.
  1005      */
  1006     public int endPos() {
  1007         return endPos;
  1010     /** Return the last character position of the previous token.
  1011      */
  1012     public int prevEndPos() {
  1013         return prevEndPos;
  1016     /** Return the position where a lexical error occurred;
  1017      */
  1018     public int errPos() {
  1019         return errPos;
  1022     /** Set the position where a lexical error occurred;
  1023      */
  1024     public void errPos(int pos) {
  1025         errPos = pos;
  1028     /** Return the name of an identifier or token for the current token.
  1029      */
  1030     public Name name() {
  1031         return name;
  1034     /** Return the radix of a numeric literal token.
  1035      */
  1036     public int radix() {
  1037         return radix;
  1040     /** Has a @deprecated been encountered in last doc comment?
  1041      *  This needs to be reset by client with resetDeprecatedFlag.
  1042      */
  1043     public boolean deprecatedFlag() {
  1044         return deprecatedFlag;
  1047     public void resetDeprecatedFlag() {
  1048         deprecatedFlag = false;
  1051     /**
  1052      * Returns the documentation string of the current token.
  1053      */
  1054     public String docComment() {
  1055         return null;
  1058     /**
  1059      * Returns a copy of the input buffer, up to its inputLength.
  1060      * Unicode escape sequences are not translated.
  1061      */
  1062     public char[] getRawCharacters() {
  1063         char[] chars = new char[buflen];
  1064         System.arraycopy(buf, 0, chars, 0, buflen);
  1065         return chars;
  1068     /**
  1069      * Returns a copy of a character array subset of the input buffer.
  1070      * The returned array begins at the <code>beginIndex</code> and
  1071      * extends to the character at index <code>endIndex - 1</code>.
  1072      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
  1073      * This behavior is like
  1074      * <code>String.substring(beginIndex, endIndex)</code>.
  1075      * Unicode escape sequences are not translated.
  1077      * @param beginIndex the beginning index, inclusive.
  1078      * @param endIndex the ending index, exclusive.
  1079      * @throws IndexOutOfBounds if either offset is outside of the
  1080      *         array bounds
  1081      */
  1082     public char[] getRawCharacters(int beginIndex, int endIndex) {
  1083         int length = endIndex - beginIndex;
  1084         char[] chars = new char[length];
  1085         System.arraycopy(buf, beginIndex, chars, 0, length);
  1086         return chars;
  1089     public enum CommentStyle {
  1090         LINE,
  1091         BLOCK,
  1092         JAVADOC,
  1095     /**
  1096      * Called when a complete comment has been scanned. pos and endPos
  1097      * will mark the comment boundary.
  1098      */
  1099     protected void processComment(CommentStyle style) {
  1100         if (scannerDebug)
  1101             System.out.println("processComment(" + pos
  1102                                + "," + endPos + "," + style + ")=|"
  1103                                + new String(getRawCharacters(pos, endPos))
  1104                                + "|");
  1107     /**
  1108      * Called when a complete whitespace run has been scanned. pos and endPos
  1109      * will mark the whitespace boundary.
  1110      */
  1111     protected void processWhiteSpace() {
  1112         if (scannerDebug)
  1113             System.out.println("processWhitespace(" + pos
  1114                                + "," + endPos + ")=|" +
  1115                                new String(getRawCharacters(pos, endPos))
  1116                                + "|");
  1119     /**
  1120      * Called when a line terminator has been processed.
  1121      */
  1122     protected void processLineTerminator() {
  1123         if (scannerDebug)
  1124             System.out.println("processTerminator(" + pos
  1125                                + "," + endPos + ")=|" +
  1126                                new String(getRawCharacters(pos, endPos))
  1127                                + "|");
  1130     /** Build a map for translating between line numbers and
  1131      * positions in the input.
  1133      * @return a LineMap */
  1134     public Position.LineMap getLineMap() {
  1135         return Position.makeLineMap(buf, buflen, false);

mercurial