src/share/classes/com/sun/tools/javac/parser/Scanner.java

Wed, 06 Apr 2011 19:30:57 -0700

author
darcy
date
Wed, 06 Apr 2011 19:30:57 -0700
changeset 969
8cc5b440fdde
parent 870
3ce4e1a07e92
child 1074
04f983e3e825
permissions
-rw-r--r--

7033809: Rename "disjunctive" to "union" in javax.lang.model
Reviewed-by: mcimadamore, jjg

     1 /*
     2  * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.nio.*;
    30 import com.sun.tools.javac.code.Source;
    31 import com.sun.tools.javac.file.JavacFileManager;
    32 import com.sun.tools.javac.util.*;
    35 import static com.sun.tools.javac.parser.Token.*;
    36 import static com.sun.tools.javac.util.LayoutCharacters.*;
    38 /** The lexical analyzer maps an input stream consisting of
    39  *  ASCII characters and Unicode escapes into a token sequence.
    40  *
    41  *  <p><b>This is NOT part of any supported API.
    42  *  If you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class Scanner implements Lexer {
    48     private static boolean scannerDebug = false;
    50     /* Output variables; set by nextToken():
    51      */
    53     /** The token, set by nextToken().
    54      */
    55     private Token token;
    57     /** Allow hex floating-point literals.
    58      */
    59     private boolean allowHexFloats;
    61     /** Allow binary literals.
    62      */
    63     private boolean allowBinaryLiterals;
    65     /** Allow underscores in literals.
    66      */
    67     private boolean allowUnderscoresInLiterals;
    69     /** The source language setting.
    70      */
    71     private Source source;
    73     /** The token's position, 0-based offset from beginning of text.
    74      */
    75     private int pos;
    77     /** Character position just after the last character of the token.
    78      */
    79     private int endPos;
    81     /** The last character position of the previous token.
    82      */
    83     private int prevEndPos;
    85     /** The position where a lexical error occurred;
    86      */
    87     private int errPos = Position.NOPOS;
    89     /** The name of an identifier or token:
    90      */
    91     private Name name;
    93     /** The radix of a numeric literal token.
    94      */
    95     private int radix;
    97     /** Has a @deprecated been encountered in last doc comment?
    98      *  this needs to be reset by client.
    99      */
   100     protected boolean deprecatedFlag = false;
   102     /** A character buffer for literals.
   103      */
   104     private char[] sbuf = new char[128];
   105     private int sp;
   107     /** The input buffer, index of next chacter to be read,
   108      *  index of one past last character in buffer.
   109      */
   110     private char[] buf;
   111     private int bp;
   112     private int buflen;
   113     private int eofPos;
   115     /** The current character.
   116      */
   117     private char ch;
   119     /** The buffer index of the last converted unicode character
   120      */
   121     private int unicodeConversionBp = -1;
   123     /** The log to be used for error reporting.
   124      */
   125     private final Log log;
   127     /** The name table. */
   128     private final Names names;
   130     /** The keyword table. */
   131     private final Keywords keywords;
   133     /** Common code for constructors. */
   134     private Scanner(ScannerFactory fac) {
   135         log = fac.log;
   136         names = fac.names;
   137         keywords = fac.keywords;
   138         source = fac.source;
   139         allowBinaryLiterals = source.allowBinaryLiterals();
   140         allowHexFloats = source.allowHexFloats();
   141         allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
   142     }
   144     private static final boolean hexFloatsWork = hexFloatsWork();
   145     private static boolean hexFloatsWork() {
   146         try {
   147             Float.valueOf("0x1.0p1");
   148             return true;
   149         } catch (NumberFormatException ex) {
   150             return false;
   151         }
   152     }
   154     /** Create a scanner from the input buffer.  buffer must implement
   155      *  array() and compact(), and remaining() must be less than limit().
   156      */
   157     protected Scanner(ScannerFactory fac, CharBuffer buffer) {
   158         this(fac, JavacFileManager.toArray(buffer), buffer.limit());
   159     }
   161     /**
   162      * Create a scanner from the input array.  This method might
   163      * modify the array.  To avoid copying the input array, ensure
   164      * that {@code inputLength < input.length} or
   165      * {@code input[input.length -1]} is a white space character.
   166      *
   167      * @param fac the factory which created this Scanner
   168      * @param input the input, might be modified
   169      * @param inputLength the size of the input.
   170      * Must be positive and less than or equal to input.length.
   171      */
   172     protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
   173         this(fac);
   174         eofPos = inputLength;
   175         if (inputLength == input.length) {
   176             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
   177                 inputLength--;
   178             } else {
   179                 char[] newInput = new char[inputLength + 1];
   180                 System.arraycopy(input, 0, newInput, 0, input.length);
   181                 input = newInput;
   182             }
   183         }
   184         buf = input;
   185         buflen = inputLength;
   186         buf[buflen] = EOI;
   187         bp = -1;
   188         scanChar();
   189     }
   191     /** Report an error at the given position using the provided arguments.
   192      */
   193     private void lexError(int pos, String key, Object... args) {
   194         log.error(pos, key, args);
   195         token = ERROR;
   196         errPos = pos;
   197     }
   199     /** Report an error at the current token position using the provided
   200      *  arguments.
   201      */
   202     private void lexError(String key, Object... args) {
   203         lexError(pos, key, args);
   204     }
   206     /** Convert an ASCII digit from its base (8, 10, or 16)
   207      *  to its value.
   208      */
   209     private int digit(int base) {
   210         char c = ch;
   211         int result = Character.digit(c, base);
   212         if (result >= 0 && c > 0x7f) {
   213             lexError(pos+1, "illegal.nonascii.digit");
   214             ch = "0123456789abcdef".charAt(result);
   215         }
   216         return result;
   217     }
   219     /** Convert unicode escape; bp points to initial '\' character
   220      *  (Spec 3.3).
   221      */
   222     private void convertUnicode() {
   223         if (ch == '\\' && unicodeConversionBp != bp) {
   224             bp++; ch = buf[bp];
   225             if (ch == 'u') {
   226                 do {
   227                     bp++; ch = buf[bp];
   228                 } while (ch == 'u');
   229                 int limit = bp + 3;
   230                 if (limit < buflen) {
   231                     int d = digit(16);
   232                     int code = d;
   233                     while (bp < limit && d >= 0) {
   234                         bp++; ch = buf[bp];
   235                         d = digit(16);
   236                         code = (code << 4) + d;
   237                     }
   238                     if (d >= 0) {
   239                         ch = (char)code;
   240                         unicodeConversionBp = bp;
   241                         return;
   242                     }
   243                 }
   244                 lexError(bp, "illegal.unicode.esc");
   245             } else {
   246                 bp--;
   247                 ch = '\\';
   248             }
   249         }
   250     }
   252     /** Read next character.
   253      */
   254     private void scanChar() {
   255         ch = buf[++bp];
   256         if (ch == '\\') {
   257             convertUnicode();
   258         }
   259     }
   261     /** Read next character in comment, skipping over double '\' characters.
   262      */
   263     private void scanCommentChar() {
   264         scanChar();
   265         if (ch == '\\') {
   266             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   267                 bp++;
   268             } else {
   269                 convertUnicode();
   270             }
   271         }
   272     }
   274     /** Append a character to sbuf.
   275      */
   276     private void putChar(char ch) {
   277         if (sp == sbuf.length) {
   278             char[] newsbuf = new char[sbuf.length * 2];
   279             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   280             sbuf = newsbuf;
   281         }
   282         sbuf[sp++] = ch;
   283     }
   285     /** Read next character in character or string literal and copy into sbuf.
   286      */
   287     private void scanLitChar() {
   288         if (ch == '\\') {
   289             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   290                 bp++;
   291                 putChar('\\');
   292                 scanChar();
   293             } else {
   294                 scanChar();
   295                 switch (ch) {
   296                 case '0': case '1': case '2': case '3':
   297                 case '4': case '5': case '6': case '7':
   298                     char leadch = ch;
   299                     int oct = digit(8);
   300                     scanChar();
   301                     if ('0' <= ch && ch <= '7') {
   302                         oct = oct * 8 + digit(8);
   303                         scanChar();
   304                         if (leadch <= '3' && '0' <= ch && ch <= '7') {
   305                             oct = oct * 8 + digit(8);
   306                             scanChar();
   307                         }
   308                     }
   309                     putChar((char)oct);
   310                     break;
   311                 case 'b':
   312                     putChar('\b'); scanChar(); break;
   313                 case 't':
   314                     putChar('\t'); scanChar(); break;
   315                 case 'n':
   316                     putChar('\n'); scanChar(); break;
   317                 case 'f':
   318                     putChar('\f'); scanChar(); break;
   319                 case 'r':
   320                     putChar('\r'); scanChar(); break;
   321                 case '\'':
   322                     putChar('\''); scanChar(); break;
   323                 case '\"':
   324                     putChar('\"'); scanChar(); break;
   325                 case '\\':
   326                     putChar('\\'); scanChar(); break;
   327                 default:
   328                     lexError(bp, "illegal.esc.char");
   329                 }
   330             }
   331         } else if (bp != buflen) {
   332             putChar(ch); scanChar();
   333         }
   334     }
   336     private void scanDigits(int digitRadix) {
   337         char saveCh;
   338         int savePos;
   339         do {
   340             if (ch != '_') {
   341                 putChar(ch);
   342             } else {
   343                 if (!allowUnderscoresInLiterals) {
   344                     lexError("unsupported.underscore.lit", source.name);
   345                     allowUnderscoresInLiterals = true;
   346                 }
   347             }
   348             saveCh = ch;
   349             savePos = bp;
   350             scanChar();
   351         } while (digit(digitRadix) >= 0 || ch == '_');
   352         if (saveCh == '_')
   353             lexError(savePos, "illegal.underscore");
   354     }
   356     /** Read fractional part of hexadecimal floating point number.
   357      */
   358     private void scanHexExponentAndSuffix() {
   359         if (ch == 'p' || ch == 'P') {
   360             putChar(ch);
   361             scanChar();
   362             skipIllegalUnderscores();
   363             if (ch == '+' || ch == '-') {
   364                 putChar(ch);
   365                 scanChar();
   366             }
   367             skipIllegalUnderscores();
   368             if ('0' <= ch && ch <= '9') {
   369                 scanDigits(10);
   370                 if (!allowHexFloats) {
   371                     lexError("unsupported.fp.lit", source.name);
   372                     allowHexFloats = true;
   373                 }
   374                 else if (!hexFloatsWork)
   375                     lexError("unsupported.cross.fp.lit");
   376             } else
   377                 lexError("malformed.fp.lit");
   378         } else {
   379             lexError("malformed.fp.lit");
   380         }
   381         if (ch == 'f' || ch == 'F') {
   382             putChar(ch);
   383             scanChar();
   384             token = FLOATLITERAL;
   385         } else {
   386             if (ch == 'd' || ch == 'D') {
   387                 putChar(ch);
   388                 scanChar();
   389             }
   390             token = DOUBLELITERAL;
   391         }
   392     }
   394     /** Read fractional part of floating point number.
   395      */
   396     private void scanFraction() {
   397         skipIllegalUnderscores();
   398         if ('0' <= ch && ch <= '9') {
   399             scanDigits(10);
   400         }
   401         int sp1 = sp;
   402         if (ch == 'e' || ch == 'E') {
   403             putChar(ch);
   404             scanChar();
   405             skipIllegalUnderscores();
   406             if (ch == '+' || ch == '-') {
   407                 putChar(ch);
   408                 scanChar();
   409             }
   410             skipIllegalUnderscores();
   411             if ('0' <= ch && ch <= '9') {
   412                 scanDigits(10);
   413                 return;
   414             }
   415             lexError("malformed.fp.lit");
   416             sp = sp1;
   417         }
   418     }
   420     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   421      */
   422     private void scanFractionAndSuffix() {
   423         this.radix = 10;
   424         scanFraction();
   425         if (ch == 'f' || ch == 'F') {
   426             putChar(ch);
   427             scanChar();
   428             token = FLOATLITERAL;
   429         } else {
   430             if (ch == 'd' || ch == 'D') {
   431                 putChar(ch);
   432                 scanChar();
   433             }
   434             token = DOUBLELITERAL;
   435         }
   436     }
   438     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   439      */
   440     private void scanHexFractionAndSuffix(boolean seendigit) {
   441         this.radix = 16;
   442         Assert.check(ch == '.');
   443         putChar(ch);
   444         scanChar();
   445         skipIllegalUnderscores();
   446         if (digit(16) >= 0) {
   447             seendigit = true;
   448             scanDigits(16);
   449         }
   450         if (!seendigit)
   451             lexError("invalid.hex.number");
   452         else
   453             scanHexExponentAndSuffix();
   454     }
   456     private void skipIllegalUnderscores() {
   457         if (ch == '_') {
   458             lexError(bp, "illegal.underscore");
   459             while (ch == '_')
   460                 scanChar();
   461         }
   462     }
   464     /** Read a number.
   465      *  @param radix  The radix of the number; one of 2, j8, 10, 16.
   466      */
   467     private void scanNumber(int radix) {
   468         this.radix = radix;
   469         // for octal, allow base-10 digit in case it's a float literal
   470         int digitRadix = (radix == 8 ? 10 : radix);
   471         boolean seendigit = false;
   472         if (digit(digitRadix) >= 0) {
   473             seendigit = true;
   474             scanDigits(digitRadix);
   475         }
   476         if (radix == 16 && ch == '.') {
   477             scanHexFractionAndSuffix(seendigit);
   478         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
   479             scanHexExponentAndSuffix();
   480         } else if (digitRadix == 10 && ch == '.') {
   481             putChar(ch);
   482             scanChar();
   483             scanFractionAndSuffix();
   484         } else if (digitRadix == 10 &&
   485                    (ch == 'e' || ch == 'E' ||
   486                     ch == 'f' || ch == 'F' ||
   487                     ch == 'd' || ch == 'D')) {
   488             scanFractionAndSuffix();
   489         } else {
   490             if (ch == 'l' || ch == 'L') {
   491                 scanChar();
   492                 token = LONGLITERAL;
   493             } else {
   494                 token = INTLITERAL;
   495             }
   496         }
   497     }
   499     /** Read an identifier.
   500      */
   501     private void scanIdent() {
   502         boolean isJavaIdentifierPart;
   503         char high;
   504         do {
   505             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
   506             // optimization, was: putChar(ch);
   508             scanChar();
   509             switch (ch) {
   510             case 'A': case 'B': case 'C': case 'D': case 'E':
   511             case 'F': case 'G': case 'H': case 'I': case 'J':
   512             case 'K': case 'L': case 'M': case 'N': case 'O':
   513             case 'P': case 'Q': case 'R': case 'S': case 'T':
   514             case 'U': case 'V': case 'W': case 'X': case 'Y':
   515             case 'Z':
   516             case 'a': case 'b': case 'c': case 'd': case 'e':
   517             case 'f': case 'g': case 'h': case 'i': case 'j':
   518             case 'k': case 'l': case 'm': case 'n': case 'o':
   519             case 'p': case 'q': case 'r': case 's': case 't':
   520             case 'u': case 'v': case 'w': case 'x': case 'y':
   521             case 'z':
   522             case '$': case '_':
   523             case '0': case '1': case '2': case '3': case '4':
   524             case '5': case '6': case '7': case '8': case '9':
   525             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   526             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   527             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   528             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   529             case '\u0015': case '\u0016': case '\u0017':
   530             case '\u0018': case '\u0019': case '\u001B':
   531             case '\u007F':
   532                 break;
   533             case '\u001A': // EOI is also a legal identifier part
   534                 if (bp >= buflen) {
   535                     name = names.fromChars(sbuf, 0, sp);
   536                     token = keywords.key(name);
   537                     return;
   538                 }
   539                 break;
   540             default:
   541                 if (ch < '\u0080') {
   542                     // all ASCII range chars already handled, above
   543                     isJavaIdentifierPart = false;
   544                 } else {
   545                     high = scanSurrogates();
   546                     if (high != 0) {
   547                         if (sp == sbuf.length) {
   548                             putChar(high);
   549                         } else {
   550                             sbuf[sp++] = high;
   551                         }
   552                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
   553                             Character.toCodePoint(high, ch));
   554                     } else {
   555                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
   556                     }
   557                 }
   558                 if (!isJavaIdentifierPart) {
   559                     name = names.fromChars(sbuf, 0, sp);
   560                     token = keywords.key(name);
   561                     return;
   562                 }
   563             }
   564         } while (true);
   565     }
   567     /** Are surrogates supported?
   568      */
   569     final static boolean surrogatesSupported = surrogatesSupported();
   570     private static boolean surrogatesSupported() {
   571         try {
   572             Character.isHighSurrogate('a');
   573             return true;
   574         } catch (NoSuchMethodError ex) {
   575             return false;
   576         }
   577     }
   579     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   580      *  the next character is a low surrogate, then put the low
   581      *  surrogate in 'ch', and return the high surrogate.
   582      *  otherwise, just return 0.
   583      */
   584     private char scanSurrogates() {
   585         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   586             char high = ch;
   588             scanChar();
   590             if (Character.isLowSurrogate(ch)) {
   591                 return high;
   592             }
   594             ch = high;
   595         }
   597         return 0;
   598     }
   600     /** Return true if ch can be part of an operator.
   601      */
   602     private boolean isSpecial(char ch) {
   603         switch (ch) {
   604         case '!': case '%': case '&': case '*': case '?':
   605         case '+': case '-': case ':': case '<': case '=':
   606         case '>': case '^': case '|': case '~':
   607         case '@':
   608             return true;
   609         default:
   610             return false;
   611         }
   612     }
   614     /** Read longest possible sequence of special characters and convert
   615      *  to token.
   616      */
   617     private void scanOperator() {
   618         while (true) {
   619             putChar(ch);
   620             Name newname = names.fromChars(sbuf, 0, sp);
   621             if (keywords.key(newname) == IDENTIFIER) {
   622                 sp--;
   623                 break;
   624             }
   625             name = newname;
   626             token = keywords.key(newname);
   627             scanChar();
   628             if (!isSpecial(ch)) break;
   629         }
   630     }
   632     /**
   633      * Scan a documention comment; determine if a deprecated tag is present.
   634      * Called once the initial /, * have been skipped, positioned at the second *
   635      * (which is treated as the beginning of the first line).
   636      * Stops positioned at the closing '/'.
   637      */
   638     @SuppressWarnings("fallthrough")
   639     private void scanDocComment() {
   640         boolean deprecatedPrefix = false;
   642         forEachLine:
   643         while (bp < buflen) {
   645             // Skip optional WhiteSpace at beginning of line
   646             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   647                 scanCommentChar();
   648             }
   650             // Skip optional consecutive Stars
   651             while (bp < buflen && ch == '*') {
   652                 scanCommentChar();
   653                 if (ch == '/') {
   654                     return;
   655                 }
   656             }
   658             // Skip optional WhiteSpace after Stars
   659             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   660                 scanCommentChar();
   661             }
   663             deprecatedPrefix = false;
   664             // At beginning of line in the JavaDoc sense.
   665             if (bp < buflen && ch == '@' && !deprecatedFlag) {
   666                 scanCommentChar();
   667                 if (bp < buflen && ch == 'd') {
   668                     scanCommentChar();
   669                     if (bp < buflen && ch == 'e') {
   670                         scanCommentChar();
   671                         if (bp < buflen && ch == 'p') {
   672                             scanCommentChar();
   673                             if (bp < buflen && ch == 'r') {
   674                                 scanCommentChar();
   675                                 if (bp < buflen && ch == 'e') {
   676                                     scanCommentChar();
   677                                     if (bp < buflen && ch == 'c') {
   678                                         scanCommentChar();
   679                                         if (bp < buflen && ch == 'a') {
   680                                             scanCommentChar();
   681                                             if (bp < buflen && ch == 't') {
   682                                                 scanCommentChar();
   683                                                 if (bp < buflen && ch == 'e') {
   684                                                     scanCommentChar();
   685                                                     if (bp < buflen && ch == 'd') {
   686                                                         deprecatedPrefix = true;
   687                                                         scanCommentChar();
   688                                                     }}}}}}}}}}}
   689             if (deprecatedPrefix && bp < buflen) {
   690                 if (Character.isWhitespace(ch)) {
   691                     deprecatedFlag = true;
   692                 } else if (ch == '*') {
   693                     scanCommentChar();
   694                     if (ch == '/') {
   695                         deprecatedFlag = true;
   696                         return;
   697                     }
   698                 }
   699             }
   701             // Skip rest of line
   702             while (bp < buflen) {
   703                 switch (ch) {
   704                 case '*':
   705                     scanCommentChar();
   706                     if (ch == '/') {
   707                         return;
   708                     }
   709                     break;
   710                 case CR: // (Spec 3.4)
   711                     scanCommentChar();
   712                     if (ch != LF) {
   713                         continue forEachLine;
   714                     }
   715                     /* fall through to LF case */
   716                 case LF: // (Spec 3.4)
   717                     scanCommentChar();
   718                     continue forEachLine;
   719                 default:
   720                     scanCommentChar();
   721                 }
   722             } // rest of line
   723         } // forEachLine
   724         return;
   725     }
   727     /** The value of a literal token, recorded as a string.
   728      *  For integers, leading 0x and 'l' suffixes are suppressed.
   729      */
   730     public String stringVal() {
   731         return new String(sbuf, 0, sp);
   732     }
   734     /** Read token.
   735      */
   736     public void nextToken() {
   738         try {
   739             prevEndPos = endPos;
   740             sp = 0;
   742             while (true) {
   743                 pos = bp;
   744                 switch (ch) {
   745                 case ' ': // (Spec 3.6)
   746                 case '\t': // (Spec 3.6)
   747                 case FF: // (Spec 3.6)
   748                     do {
   749                         scanChar();
   750                     } while (ch == ' ' || ch == '\t' || ch == FF);
   751                     endPos = bp;
   752                     processWhiteSpace();
   753                     break;
   754                 case LF: // (Spec 3.4)
   755                     scanChar();
   756                     endPos = bp;
   757                     processLineTerminator();
   758                     break;
   759                 case CR: // (Spec 3.4)
   760                     scanChar();
   761                     if (ch == LF) {
   762                         scanChar();
   763                     }
   764                     endPos = bp;
   765                     processLineTerminator();
   766                     break;
   767                 case 'A': case 'B': case 'C': case 'D': case 'E':
   768                 case 'F': case 'G': case 'H': case 'I': case 'J':
   769                 case 'K': case 'L': case 'M': case 'N': case 'O':
   770                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   771                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   772                 case 'Z':
   773                 case 'a': case 'b': case 'c': case 'd': case 'e':
   774                 case 'f': case 'g': case 'h': case 'i': case 'j':
   775                 case 'k': case 'l': case 'm': case 'n': case 'o':
   776                 case 'p': case 'q': case 'r': case 's': case 't':
   777                 case 'u': case 'v': case 'w': case 'x': case 'y':
   778                 case 'z':
   779                 case '$': case '_':
   780                     scanIdent();
   781                     return;
   782                 case '0':
   783                     scanChar();
   784                     if (ch == 'x' || ch == 'X') {
   785                         scanChar();
   786                         skipIllegalUnderscores();
   787                         if (ch == '.') {
   788                             scanHexFractionAndSuffix(false);
   789                         } else if (digit(16) < 0) {
   790                             lexError("invalid.hex.number");
   791                         } else {
   792                             scanNumber(16);
   793                         }
   794                     } else if (ch == 'b' || ch == 'B') {
   795                         if (!allowBinaryLiterals) {
   796                             lexError("unsupported.binary.lit", source.name);
   797                             allowBinaryLiterals = true;
   798                         }
   799                         scanChar();
   800                         skipIllegalUnderscores();
   801                         if (digit(2) < 0) {
   802                             lexError("invalid.binary.number");
   803                         } else {
   804                             scanNumber(2);
   805                         }
   806                     } else {
   807                         putChar('0');
   808                         if (ch == '_') {
   809                             int savePos = bp;
   810                             do {
   811                                 scanChar();
   812                             } while (ch == '_');
   813                             if (digit(10) < 0) {
   814                                 lexError(savePos, "illegal.underscore");
   815                             }
   816                         }
   817                         scanNumber(8);
   818                     }
   819                     return;
   820                 case '1': case '2': case '3': case '4':
   821                 case '5': case '6': case '7': case '8': case '9':
   822                     scanNumber(10);
   823                     return;
   824                 case '.':
   825                     scanChar();
   826                     if ('0' <= ch && ch <= '9') {
   827                         putChar('.');
   828                         scanFractionAndSuffix();
   829                     } else if (ch == '.') {
   830                         putChar('.'); putChar('.');
   831                         scanChar();
   832                         if (ch == '.') {
   833                             scanChar();
   834                             putChar('.');
   835                             token = ELLIPSIS;
   836                         } else {
   837                             lexError("malformed.fp.lit");
   838                         }
   839                     } else {
   840                         token = DOT;
   841                     }
   842                     return;
   843                 case ',':
   844                     scanChar(); token = COMMA; return;
   845                 case ';':
   846                     scanChar(); token = SEMI; return;
   847                 case '(':
   848                     scanChar(); token = LPAREN; return;
   849                 case ')':
   850                     scanChar(); token = RPAREN; return;
   851                 case '[':
   852                     scanChar(); token = LBRACKET; return;
   853                 case ']':
   854                     scanChar(); token = RBRACKET; return;
   855                 case '{':
   856                     scanChar(); token = LBRACE; return;
   857                 case '}':
   858                     scanChar(); token = RBRACE; return;
   859                 case '/':
   860                     scanChar();
   861                     if (ch == '/') {
   862                         do {
   863                             scanCommentChar();
   864                         } while (ch != CR && ch != LF && bp < buflen);
   865                         if (bp < buflen) {
   866                             endPos = bp;
   867                             processComment(CommentStyle.LINE);
   868                         }
   869                         break;
   870                     } else if (ch == '*') {
   871                         scanChar();
   872                         CommentStyle style;
   873                         if (ch == '*') {
   874                             style = CommentStyle.JAVADOC;
   875                             scanDocComment();
   876                         } else {
   877                             style = CommentStyle.BLOCK;
   878                             while (bp < buflen) {
   879                                 if (ch == '*') {
   880                                     scanChar();
   881                                     if (ch == '/') break;
   882                                 } else {
   883                                     scanCommentChar();
   884                                 }
   885                             }
   886                         }
   887                         if (ch == '/') {
   888                             scanChar();
   889                             endPos = bp;
   890                             processComment(style);
   891                             break;
   892                         } else {
   893                             lexError("unclosed.comment");
   894                             return;
   895                         }
   896                     } else if (ch == '=') {
   897                         name = names.slashequals;
   898                         token = SLASHEQ;
   899                         scanChar();
   900                     } else {
   901                         name = names.slash;
   902                         token = SLASH;
   903                     }
   904                     return;
   905                 case '\'':
   906                     scanChar();
   907                     if (ch == '\'') {
   908                         lexError("empty.char.lit");
   909                     } else {
   910                         if (ch == CR || ch == LF)
   911                             lexError(pos, "illegal.line.end.in.char.lit");
   912                         scanLitChar();
   913                         if (ch == '\'') {
   914                             scanChar();
   915                             token = CHARLITERAL;
   916                         } else {
   917                             lexError(pos, "unclosed.char.lit");
   918                         }
   919                     }
   920                     return;
   921                 case '\"':
   922                     scanChar();
   923                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
   924                         scanLitChar();
   925                     if (ch == '\"') {
   926                         token = STRINGLITERAL;
   927                         scanChar();
   928                     } else {
   929                         lexError(pos, "unclosed.str.lit");
   930                     }
   931                     return;
   932                 default:
   933                     if (isSpecial(ch)) {
   934                         scanOperator();
   935                     } else {
   936                         boolean isJavaIdentifierStart;
   937                         if (ch < '\u0080') {
   938                             // all ASCII range chars already handled, above
   939                             isJavaIdentifierStart = false;
   940                         } else {
   941                             char high = scanSurrogates();
   942                             if (high != 0) {
   943                                 if (sp == sbuf.length) {
   944                                     putChar(high);
   945                                 } else {
   946                                     sbuf[sp++] = high;
   947                                 }
   949                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
   950                                     Character.toCodePoint(high, ch));
   951                             } else {
   952                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
   953                             }
   954                         }
   955                         if (isJavaIdentifierStart) {
   956                             scanIdent();
   957                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
   958                             token = EOF;
   959                             pos = bp = eofPos;
   960                         } else {
   961                             lexError("illegal.char", String.valueOf((int)ch));
   962                             scanChar();
   963                         }
   964                     }
   965                     return;
   966                 }
   967             }
   968         } finally {
   969             endPos = bp;
   970             if (scannerDebug)
   971                 System.out.println("nextToken(" + pos
   972                                    + "," + endPos + ")=|" +
   973                                    new String(getRawCharacters(pos, endPos))
   974                                    + "|");
   975         }
   976     }
   978     /** Return the current token, set by nextToken().
   979      */
   980     public Token token() {
   981         return token;
   982     }
   984     /** Sets the current token.
   985      */
   986     public void token(Token token) {
   987         this.token = token;
   988     }
   990     /** Return the current token's position: a 0-based
   991      *  offset from beginning of the raw input stream
   992      *  (before unicode translation)
   993      */
   994     public int pos() {
   995         return pos;
   996     }
   998     /** Return the last character position of the current token.
   999      */
  1000     public int endPos() {
  1001         return endPos;
  1004     /** Return the last character position of the previous token.
  1005      */
  1006     public int prevEndPos() {
  1007         return prevEndPos;
  1010     /** Return the position where a lexical error occurred;
  1011      */
  1012     public int errPos() {
  1013         return errPos;
  1016     /** Set the position where a lexical error occurred;
  1017      */
  1018     public void errPos(int pos) {
  1019         errPos = pos;
  1022     /** Return the name of an identifier or token for the current token.
  1023      */
  1024     public Name name() {
  1025         return name;
  1028     /** Return the radix of a numeric literal token.
  1029      */
  1030     public int radix() {
  1031         return radix;
  1034     /** Has a @deprecated been encountered in last doc comment?
  1035      *  This needs to be reset by client with resetDeprecatedFlag.
  1036      */
  1037     public boolean deprecatedFlag() {
  1038         return deprecatedFlag;
  1041     public void resetDeprecatedFlag() {
  1042         deprecatedFlag = false;
  1045     /**
  1046      * Returns the documentation string of the current token.
  1047      */
  1048     public String docComment() {
  1049         return null;
  1052     /**
  1053      * Returns a copy of the input buffer, up to its inputLength.
  1054      * Unicode escape sequences are not translated.
  1055      */
  1056     public char[] getRawCharacters() {
  1057         char[] chars = new char[buflen];
  1058         System.arraycopy(buf, 0, chars, 0, buflen);
  1059         return chars;
  1062     /**
  1063      * Returns a copy of a character array subset of the input buffer.
  1064      * The returned array begins at the <code>beginIndex</code> and
  1065      * extends to the character at index <code>endIndex - 1</code>.
  1066      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
  1067      * This behavior is like
  1068      * <code>String.substring(beginIndex, endIndex)</code>.
  1069      * Unicode escape sequences are not translated.
  1071      * @param beginIndex the beginning index, inclusive.
  1072      * @param endIndex the ending index, exclusive.
  1073      * @throws IndexOutOfBounds if either offset is outside of the
  1074      *         array bounds
  1075      */
  1076     public char[] getRawCharacters(int beginIndex, int endIndex) {
  1077         int length = endIndex - beginIndex;
  1078         char[] chars = new char[length];
  1079         System.arraycopy(buf, beginIndex, chars, 0, length);
  1080         return chars;
  1083     public enum CommentStyle {
  1084         LINE,
  1085         BLOCK,
  1086         JAVADOC,
  1089     /**
  1090      * Called when a complete comment has been scanned. pos and endPos
  1091      * will mark the comment boundary.
  1092      */
  1093     protected void processComment(CommentStyle style) {
  1094         if (scannerDebug)
  1095             System.out.println("processComment(" + pos
  1096                                + "," + endPos + "," + style + ")=|"
  1097                                + new String(getRawCharacters(pos, endPos))
  1098                                + "|");
  1101     /**
  1102      * Called when a complete whitespace run has been scanned. pos and endPos
  1103      * will mark the whitespace boundary.
  1104      */
  1105     protected void processWhiteSpace() {
  1106         if (scannerDebug)
  1107             System.out.println("processWhitespace(" + pos
  1108                                + "," + endPos + ")=|" +
  1109                                new String(getRawCharacters(pos, endPos))
  1110                                + "|");
  1113     /**
  1114      * Called when a line terminator has been processed.
  1115      */
  1116     protected void processLineTerminator() {
  1117         if (scannerDebug)
  1118             System.out.println("processTerminator(" + pos
  1119                                + "," + endPos + ")=|" +
  1120                                new String(getRawCharacters(pos, endPos))
  1121                                + "|");
  1124     /** Build a map for translating between line numbers and
  1125      * positions in the input.
  1127      * @return a LineMap */
  1128     public Position.LineMap getLineMap() {
  1129         return Position.makeLineMap(buf, buflen, false);

mercurial