jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/Scanner.java@4868a36f6fd8

     1 /*

     2  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Oracle designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Oracle in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    22  * or visit www.oracle.com if you need additional information or have any

    23  * questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import java.nio.*;

    30 import com.sun.tools.javac.code.Source;

    31 import com.sun.tools.javac.file.JavacFileManager;

    32 import com.sun.tools.javac.util.*;

    35 import static com.sun.tools.javac.parser.Token.*;

    36 import static com.sun.tools.javac.util.LayoutCharacters.*;

    38 /** The lexical analyzer maps an input stream consisting of

    39  *  ASCII characters and Unicode escapes into a token sequence.

    40  *

    41  *  <p><b>This is NOT part of any supported API.

    42  *  If you write code that depends on this, you do so at your own risk.

    43  *  This code and its internal interfaces are subject to change or

    44  *  deletion without notice.</b>

    45  */

    46 public class Scanner implements Lexer {

    48     private static boolean scannerDebug = false;

    50     /* Output variables; set by nextToken():

    51      */

    53     /** The token, set by nextToken().

    54      */

    55     private Token token;

    57     /** Allow hex floating-point literals.

    58      */

    59     private boolean allowHexFloats;

    61     /** Allow binary literals.

    62      */

    63     private boolean allowBinaryLiterals;

    65     /** Allow underscores in literals.

    66      */

    67     private boolean allowUnderscoresInLiterals;

    69     /** The source language setting.

    70      */

    71     private Source source;

    73     /** The token's position, 0-based offset from beginning of text.

    74      */

    75     private int pos;

    77     /** Character position just after the last character of the token.

    78      */

    79     private int endPos;

    81     /** The last character position of the previous token.

    82      */

    83     private int prevEndPos;

    85     /** The position where a lexical error occurred;

    86      */

    87     private int errPos = Position.NOPOS;

    89     /** The name of an identifier or token:

    90      */

    91     private Name name;

    93     /** The radix of a numeric literal token.

    94      */

    95     private int radix;

    97     /** Has a @deprecated been encountered in last doc comment?

    98      *  this needs to be reset by client.

    99      */

   100     protected boolean deprecatedFlag = false;

   102     /** A character buffer for literals.

   103      */

   104     private char[] sbuf = new char[128];

   105     private int sp;

   107     /** The input buffer, index of next chacter to be read,

   108      *  index of one past last character in buffer.

   109      */

   110     private char[] buf;

   111     private int bp;

   112     private int buflen;

   113     private int eofPos;

   115     /** The current character.

   116      */

   117     private char ch;

   119     /** The buffer index of the last converted unicode character

   120      */

   121     private int unicodeConversionBp = -1;

   123     /** The log to be used for error reporting.

   124      */

   125     private final Log log;

   127     /** The name table. */

   128     private final Names names;

   130     /** The keyword table. */

   131     private final Keywords keywords;

   133     /** Common code for constructors. */

   134     private Scanner(ScannerFactory fac) {

   135         log = fac.log;

   136         names = fac.names;

   137         keywords = fac.keywords;

   138         source = fac.source;

   139         allowBinaryLiterals = source.allowBinaryLiterals();

   140         allowHexFloats = source.allowHexFloats();

   141         allowUnderscoresInLiterals = source.allowBinaryLiterals();

   142     }

   144     private static final boolean hexFloatsWork = hexFloatsWork();

   145     private static boolean hexFloatsWork() {

   146         try {

   147             Float.valueOf("0x1.0p1");

   148             return true;

   149         } catch (NumberFormatException ex) {

   150             return false;

   151         }

   152     }

   154     /** Create a scanner from the input buffer.  buffer must implement

   155      *  array() and compact(), and remaining() must be less than limit().

   156      */

   157     protected Scanner(ScannerFactory fac, CharBuffer buffer) {

   158         this(fac, JavacFileManager.toArray(buffer), buffer.limit());

   159     }

   161     /**

   162      * Create a scanner from the input array.  This method might

   163      * modify the array.  To avoid copying the input array, ensure

   164      * that {@code inputLength < input.length} or

   165      * {@code input[input.length -1]} is a white space character.

   166      *

   167      * @param fac the factory which created this Scanner

   168      * @param input the input, might be modified

   169      * @param inputLength the size of the input.

   170      * Must be positive and less than or equal to input.length.

   171      */

   172     protected Scanner(ScannerFactory fac, char[] input, int inputLength) {

   173         this(fac);

   174         eofPos = inputLength;

   175         if (inputLength == input.length) {

   176             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {

   177                 inputLength--;

   178             } else {

   179                 char[] newInput = new char[inputLength + 1];

   180                 System.arraycopy(input, 0, newInput, 0, input.length);

   181                 input = newInput;

   182             }

   183         }

   184         buf = input;

   185         buflen = inputLength;

   186         buf[buflen] = EOI;

   187         bp = -1;

   188         scanChar();

   189     }

   191     /** Report an error at the given position using the provided arguments.

   192      */

   193     private void lexError(int pos, String key, Object... args) {

   194         log.error(pos, key, args);

   195         token = ERROR;

   196         errPos = pos;

   197     }

   199     /** Report an error at the current token position using the provided

   200      *  arguments.

   201      */

   202     private void lexError(String key, Object... args) {

   203         lexError(pos, key, args);

   204     }

   206     /** Convert an ASCII digit from its base (8, 10, or 16)

   207      *  to its value.

   208      */

   209     private int digit(int base) {

   210         char c = ch;

   211         int result = Character.digit(c, base);

   212         if (result >= 0 && c > 0x7f) {

   213             lexError(pos+1, "illegal.nonascii.digit");

   214             ch = "0123456789abcdef".charAt(result);

   215         }

   216         return result;

   217     }

   219     /** Convert unicode escape; bp points to initial '\' character

   220      *  (Spec 3.3).

   221      */

   222     private void convertUnicode() {

   223         if (ch == '\\' && unicodeConversionBp != bp) {

   224             bp++; ch = buf[bp];

   225             if (ch == 'u') {

   226                 do {

   227                     bp++; ch = buf[bp];

   228                 } while (ch == 'u');

   229                 int limit = bp + 3;

   230                 if (limit < buflen) {

   231                     int d = digit(16);

   232                     int code = d;

   233                     while (bp < limit && d >= 0) {

   234                         bp++; ch = buf[bp];

   235                         d = digit(16);

   236                         code = (code << 4) + d;

   237                     }

   238                     if (d >= 0) {

   239                         ch = (char)code;

   240                         unicodeConversionBp = bp;

   241                         return;

   242                     }

   243                 }

   244                 lexError(bp, "illegal.unicode.esc");

   245             } else {

   246                 bp--;

   247                 ch = '\\';

   248             }

   249         }

   250     }

   252     /** Read next character.

   253      */

   254     private void scanChar() {

   255         ch = buf[++bp];

   256         if (ch == '\\') {

   257             convertUnicode();

   258         }

   259     }

   261     /** Read next character in comment, skipping over double '\' characters.

   262      */

   263     private void scanCommentChar() {

   264         scanChar();

   265         if (ch == '\\') {

   266             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   267                 bp++;

   268             } else {

   269                 convertUnicode();

   270             }

   271         }

   272     }

   274     /** Append a character to sbuf.

   275      */

   276     private void putChar(char ch) {

   277         if (sp == sbuf.length) {

   278             char[] newsbuf = new char[sbuf.length * 2];

   279             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);

   280             sbuf = newsbuf;

   281         }

   282         sbuf[sp++] = ch;

   283     }

   285     /** For debugging purposes: print character.

   286      */

   287     private void dch() {

   288         System.err.print(ch); System.out.flush();

   289     }

   291     /** Read next character in character or string literal and copy into sbuf.

   292      */

   293     private void scanLitChar() {

   294         if (ch == '\\') {

   295             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   296                 bp++;

   297                 putChar('\\');

   298                 scanChar();

   299             } else {

   300                 scanChar();

   301                 switch (ch) {

   302                 case '0': case '1': case '2': case '3':

   303                 case '4': case '5': case '6': case '7':

   304                     char leadch = ch;

   305                     int oct = digit(8);

   306                     scanChar();

   307                     if ('0' <= ch && ch <= '7') {

   308                         oct = oct * 8 + digit(8);

   309                         scanChar();

   310                         if (leadch <= '3' && '0' <= ch && ch <= '7') {

   311                             oct = oct * 8 + digit(8);

   312                             scanChar();

   313                         }

   314                     }

   315                     putChar((char)oct);

   316                     break;

   317                 case 'b':

   318                     putChar('\b'); scanChar(); break;

   319                 case 't':

   320                     putChar('\t'); scanChar(); break;

   321                 case 'n':

   322                     putChar('\n'); scanChar(); break;

   323                 case 'f':

   324                     putChar('\f'); scanChar(); break;

   325                 case 'r':

   326                     putChar('\r'); scanChar(); break;

   327                 case '\'':

   328                     putChar('\''); scanChar(); break;

   329                 case '\"':

   330                     putChar('\"'); scanChar(); break;

   331                 case '\\':

   332                     putChar('\\'); scanChar(); break;

   333                 default:

   334                     lexError(bp, "illegal.esc.char");

   335                 }

   336             }

   337         } else if (bp != buflen) {

   338             putChar(ch); scanChar();

   339         }

   340     }

   342     private void scanDigits(int digitRadix) {

   343         char saveCh;

   344         int savePos;

   345         do {

   346             if (ch != '_') {

   347                 putChar(ch);

   348             } else {

   349                 if (!allowUnderscoresInLiterals) {

   350                     lexError("unsupported.underscore.lit", source.name);

   351                     allowUnderscoresInLiterals = true;

   352                 }

   353             }

   354             saveCh = ch;

   355             savePos = bp;

   356             scanChar();

   357         } while (digit(digitRadix) >= 0 || ch == '_');

   358         if (saveCh == '_')

   359             lexError(savePos, "illegal.underscore");

   360     }

   362     /** Read fractional part of hexadecimal floating point number.

   363      */

   364     private void scanHexExponentAndSuffix() {

   365         if (ch == 'p' || ch == 'P') {

   366             putChar(ch);

   367             scanChar();

   368             skipIllegalUnderscores();

   369             if (ch == '+' || ch == '-') {

   370                 putChar(ch);

   371                 scanChar();

   372             }

   373             skipIllegalUnderscores();

   374             if ('0' <= ch && ch <= '9') {

   375                 scanDigits(10);

   376                 if (!allowHexFloats) {

   377                     lexError("unsupported.fp.lit", source.name);

   378                     allowHexFloats = true;

   379                 }

   380                 else if (!hexFloatsWork)

   381                     lexError("unsupported.cross.fp.lit");

   382             } else

   383                 lexError("malformed.fp.lit");

   384         } else {

   385             lexError("malformed.fp.lit");

   386         }

   387         if (ch == 'f' || ch == 'F') {

   388             putChar(ch);

   389             scanChar();

   390             token = FLOATLITERAL;

   391         } else {

   392             if (ch == 'd' || ch == 'D') {

   393                 putChar(ch);

   394                 scanChar();

   395             }

   396             token = DOUBLELITERAL;

   397         }

   398     }

   400     /** Read fractional part of floating point number.

   401      */

   402     private void scanFraction() {

   403         skipIllegalUnderscores();

   404         if ('0' <= ch && ch <= '9') {

   405             scanDigits(10);

   406         }

   407         int sp1 = sp;

   408         if (ch == 'e' || ch == 'E') {

   409             putChar(ch);

   410             scanChar();

   411             skipIllegalUnderscores();

   412             if (ch == '+' || ch == '-') {

   413                 putChar(ch);

   414                 scanChar();

   415             }

   416             skipIllegalUnderscores();

   417             if ('0' <= ch && ch <= '9') {

   418                 scanDigits(10);

   419                 return;

   420             }

   421             lexError("malformed.fp.lit");

   422             sp = sp1;

   423         }

   424     }

   426     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   427      */

   428     private void scanFractionAndSuffix() {

   429         this.radix = 10;

   430         scanFraction();

   431         if (ch == 'f' || ch == 'F') {

   432             putChar(ch);

   433             scanChar();

   434             token = FLOATLITERAL;

   435         } else {

   436             if (ch == 'd' || ch == 'D') {

   437                 putChar(ch);

   438                 scanChar();

   439             }

   440             token = DOUBLELITERAL;

   441         }

   442     }

   444     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   445      */

   446     private void scanHexFractionAndSuffix(boolean seendigit) {

   447         this.radix = 16;

   448         assert ch == '.';

   449         putChar(ch);

   450         scanChar();

   451         skipIllegalUnderscores();

   452         if (digit(16) >= 0) {

   453             seendigit = true;

   454             scanDigits(16);

   455         }

   456         if (!seendigit)

   457             lexError("invalid.hex.number");

   458         else

   459             scanHexExponentAndSuffix();

   460     }

   462     private void skipIllegalUnderscores() {

   463         if (ch == '_') {

   464             lexError(bp, "illegal.underscore");

   465             while (ch == '_')

   466                 scanChar();

   467         }

   468     }

   470     /** Read a number.

   471      *  @param radix  The radix of the number; one of 2, j8, 10, 16.

   472      */

   473     private void scanNumber(int radix) {

   474         this.radix = radix;

   475         // for octal, allow base-10 digit in case it's a float literal

   476         int digitRadix = (radix == 8 ? 10 : radix);

   477         boolean seendigit = false;

   478         if (digit(digitRadix) >= 0) {

   479             seendigit = true;

   480             scanDigits(digitRadix);

   481         }

   482         if (radix == 16 && ch == '.') {

   483             scanHexFractionAndSuffix(seendigit);

   484         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {

   485             scanHexExponentAndSuffix();

   486         } else if (digitRadix == 10 && ch == '.') {

   487             putChar(ch);

   488             scanChar();

   489             scanFractionAndSuffix();

   490         } else if (digitRadix == 10 &&

   491                    (ch == 'e' || ch == 'E' ||

   492                     ch == 'f' || ch == 'F' ||

   493                     ch == 'd' || ch == 'D')) {

   494             scanFractionAndSuffix();

   495         } else {

   496             if (ch == 'l' || ch == 'L') {

   497                 scanChar();

   498                 token = LONGLITERAL;

   499             } else {

   500                 token = INTLITERAL;

   501             }

   502         }

   503     }

   505     /** Read an identifier.

   506      */

   507     private void scanIdent() {

   508         boolean isJavaIdentifierPart;

   509         char high;

   510         do {

   511             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;

   512             // optimization, was: putChar(ch);

   514             scanChar();

   515             switch (ch) {

   516             case 'A': case 'B': case 'C': case 'D': case 'E':

   517             case 'F': case 'G': case 'H': case 'I': case 'J':

   518             case 'K': case 'L': case 'M': case 'N': case 'O':

   519             case 'P': case 'Q': case 'R': case 'S': case 'T':

   520             case 'U': case 'V': case 'W': case 'X': case 'Y':

   521             case 'Z':

   522             case 'a': case 'b': case 'c': case 'd': case 'e':

   523             case 'f': case 'g': case 'h': case 'i': case 'j':

   524             case 'k': case 'l': case 'm': case 'n': case 'o':

   525             case 'p': case 'q': case 'r': case 's': case 't':

   526             case 'u': case 'v': case 'w': case 'x': case 'y':

   527             case 'z':

   528             case '$': case '_':

   529             case '0': case '1': case '2': case '3': case '4':

   530             case '5': case '6': case '7': case '8': case '9':

   531             case '\u0000': case '\u0001': case '\u0002': case '\u0003':

   532             case '\u0004': case '\u0005': case '\u0006': case '\u0007':

   533             case '\u0008': case '\u000E': case '\u000F': case '\u0010':

   534             case '\u0011': case '\u0012': case '\u0013': case '\u0014':

   535             case '\u0015': case '\u0016': case '\u0017':

   536             case '\u0018': case '\u0019': case '\u001B':

   537             case '\u007F':

   538                 break;

   539             case '\u001A': // EOI is also a legal identifier part

   540                 if (bp >= buflen) {

   541                     name = names.fromChars(sbuf, 0, sp);

   542                     token = keywords.key(name);

   543                     return;

   544                 }

   545                 break;

   546             default:

   547                 if (ch < '\u0080') {

   548                     // all ASCII range chars already handled, above

   549                     isJavaIdentifierPart = false;

   550                 } else {

   551                     high = scanSurrogates();

   552                     if (high != 0) {

   553                         if (sp == sbuf.length) {

   554                             putChar(high);

   555                         } else {

   556                             sbuf[sp++] = high;

   557                         }

   558                         isJavaIdentifierPart = Character.isJavaIdentifierPart(

   559                             Character.toCodePoint(high, ch));

   560                     } else {

   561                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);

   562                     }

   563                 }

   564                 if (!isJavaIdentifierPart) {

   565                     name = names.fromChars(sbuf, 0, sp);

   566                     token = keywords.key(name);

   567                     return;

   568                 }

   569             }

   570         } while (true);

   571     }

   573     /** Are surrogates supported?

   574      */

   575     final static boolean surrogatesSupported = surrogatesSupported();

   576     private static boolean surrogatesSupported() {

   577         try {

   578             Character.isHighSurrogate('a');

   579             return true;

   580         } catch (NoSuchMethodError ex) {

   581             return false;

   582         }

   583     }

   585     /** Scan surrogate pairs.  If 'ch' is a high surrogate and

   586      *  the next character is a low surrogate, then put the low

   587      *  surrogate in 'ch', and return the high surrogate.

   588      *  otherwise, just return 0.

   589      */

   590     private char scanSurrogates() {

   591         if (surrogatesSupported && Character.isHighSurrogate(ch)) {

   592             char high = ch;

   594             scanChar();

   596             if (Character.isLowSurrogate(ch)) {

   597                 return high;

   598             }

   600             ch = high;

   601         }

   603         return 0;

   604     }

   606     /** Return true if ch can be part of an operator.

   607      */

   608     private boolean isSpecial(char ch) {

   609         switch (ch) {

   610         case '!': case '%': case '&': case '*': case '?':

   611         case '+': case '-': case ':': case '<': case '=':

   612         case '>': case '^': case '|': case '~':

   613         case '@':

   614             return true;

   615         default:

   616             return false;

   617         }

   618     }

   620     /** Read longest possible sequence of special characters and convert

   621      *  to token.

   622      */

   623     private void scanOperator() {

   624         while (true) {

   625             putChar(ch);

   626             Name newname = names.fromChars(sbuf, 0, sp);

   627             if (keywords.key(newname) == IDENTIFIER) {

   628                 sp--;

   629                 break;

   630             }

   631             name = newname;

   632             token = keywords.key(newname);

   633             scanChar();

   634             if (!isSpecial(ch)) break;

   635         }

   636     }

   638     /**

   639      * Scan a documention comment; determine if a deprecated tag is present.

   640      * Called once the initial /, * have been skipped, positioned at the second *

   641      * (which is treated as the beginning of the first line).

   642      * Stops positioned at the closing '/'.

   643      */

   644     @SuppressWarnings("fallthrough")

   645     private void scanDocComment() {

   646         boolean deprecatedPrefix = false;

   648         forEachLine:

   649         while (bp < buflen) {

   651             // Skip optional WhiteSpace at beginning of line

   652             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   653                 scanCommentChar();

   654             }

   656             // Skip optional consecutive Stars

   657             while (bp < buflen && ch == '*') {

   658                 scanCommentChar();

   659                 if (ch == '/') {

   660                     return;

   661                 }

   662             }

   664             // Skip optional WhiteSpace after Stars

   665             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   666                 scanCommentChar();

   667             }

   669             deprecatedPrefix = false;

   670             // At beginning of line in the JavaDoc sense.

   671             if (bp < buflen && ch == '@' && !deprecatedFlag) {

   672                 scanCommentChar();

   673                 if (bp < buflen && ch == 'd') {

   674                     scanCommentChar();

   675                     if (bp < buflen && ch == 'e') {

   676                         scanCommentChar();

   677                         if (bp < buflen && ch == 'p') {

   678                             scanCommentChar();

   679                             if (bp < buflen && ch == 'r') {

   680                                 scanCommentChar();

   681                                 if (bp < buflen && ch == 'e') {

   682                                     scanCommentChar();

   683                                     if (bp < buflen && ch == 'c') {

   684                                         scanCommentChar();

   685                                         if (bp < buflen && ch == 'a') {

   686                                             scanCommentChar();

   687                                             if (bp < buflen && ch == 't') {

   688                                                 scanCommentChar();

   689                                                 if (bp < buflen && ch == 'e') {

   690                                                     scanCommentChar();

   691                                                     if (bp < buflen && ch == 'd') {

   692                                                         deprecatedPrefix = true;

   693                                                         scanCommentChar();

   694                                                     }}}}}}}}}}}

   695             if (deprecatedPrefix && bp < buflen) {

   696                 if (Character.isWhitespace(ch)) {

   697                     deprecatedFlag = true;

   698                 } else if (ch == '*') {

   699                     scanCommentChar();

   700                     if (ch == '/') {

   701                         deprecatedFlag = true;

   702                         return;

   703                     }

   704                 }

   705             }

   707             // Skip rest of line

   708             while (bp < buflen) {

   709                 switch (ch) {

   710                 case '*':

   711                     scanCommentChar();

   712                     if (ch == '/') {

   713                         return;

   714                     }

   715                     break;

   716                 case CR: // (Spec 3.4)

   717                     scanCommentChar();

   718                     if (ch != LF) {

   719                         continue forEachLine;

   720                     }

   721                     /* fall through to LF case */

   722                 case LF: // (Spec 3.4)

   723                     scanCommentChar();

   724                     continue forEachLine;

   725                 default:

   726                     scanCommentChar();

   727                 }

   728             } // rest of line

   729         } // forEachLine

   730         return;

   731     }

   733     /** The value of a literal token, recorded as a string.

   734      *  For integers, leading 0x and 'l' suffixes are suppressed.

   735      */

   736     public String stringVal() {

   737         return new String(sbuf, 0, sp);

   738     }

   740     /** Read token.

   741      */

   742     public void nextToken() {

   744         try {

   745             prevEndPos = endPos;

   746             sp = 0;

   748             while (true) {

   749                 pos = bp;

   750                 switch (ch) {

   751                 case ' ': // (Spec 3.6)

   752                 case '\t': // (Spec 3.6)

   753                 case FF: // (Spec 3.6)

   754                     do {

   755                         scanChar();

   756                     } while (ch == ' ' || ch == '\t' || ch == FF);

   757                     endPos = bp;

   758                     processWhiteSpace();

   759                     break;

   760                 case LF: // (Spec 3.4)

   761                     scanChar();

   762                     endPos = bp;

   763                     processLineTerminator();

   764                     break;

   765                 case CR: // (Spec 3.4)

   766                     scanChar();

   767                     if (ch == LF) {

   768                         scanChar();

   769                     }

   770                     endPos = bp;

   771                     processLineTerminator();

   772                     break;

   773                 case 'A': case 'B': case 'C': case 'D': case 'E':

   774                 case 'F': case 'G': case 'H': case 'I': case 'J':

   775                 case 'K': case 'L': case 'M': case 'N': case 'O':

   776                 case 'P': case 'Q': case 'R': case 'S': case 'T':

   777                 case 'U': case 'V': case 'W': case 'X': case 'Y':

   778                 case 'Z':

   779                 case 'a': case 'b': case 'c': case 'd': case 'e':

   780                 case 'f': case 'g': case 'h': case 'i': case 'j':

   781                 case 'k': case 'l': case 'm': case 'n': case 'o':

   782                 case 'p': case 'q': case 'r': case 's': case 't':

   783                 case 'u': case 'v': case 'w': case 'x': case 'y':

   784                 case 'z':

   785                 case '$': case '_':

   786                     scanIdent();

   787                     return;

   788                 case '0':

   789                     scanChar();

   790                     if (ch == 'x' || ch == 'X') {

   791                         scanChar();

   792                         skipIllegalUnderscores();

   793                         if (ch == '.') {

   794                             scanHexFractionAndSuffix(false);

   795                         } else if (digit(16) < 0) {

   796                             lexError("invalid.hex.number");

   797                         } else {

   798                             scanNumber(16);

   799                         }

   800                     } else if (ch == 'b' || ch == 'B') {

   801                         if (!allowBinaryLiterals) {

   802                             lexError("unsupported.binary.lit", source.name);

   803                             allowBinaryLiterals = true;

   804                         }

   805                         scanChar();

   806                         skipIllegalUnderscores();

   807                         if (digit(2) < 0) {

   808                             lexError("invalid.binary.number");

   809                         } else {

   810                             scanNumber(2);

   811                         }

   812                     } else {

   813                         putChar('0');

   814                         if (ch == '_') {

   815                             int savePos = bp;

   816                             do {

   817                                 scanChar();

   818                             } while (ch == '_');

   819                             if (digit(10) < 0) {

   820                                 lexError(savePos, "illegal.underscore");

   821                             }

   822                         }

   823                         scanNumber(8);

   824                     }

   825                     return;

   826                 case '1': case '2': case '3': case '4':

   827                 case '5': case '6': case '7': case '8': case '9':

   828                     scanNumber(10);

   829                     return;

   830                 case '.':

   831                     scanChar();

   832                     if ('0' <= ch && ch <= '9') {

   833                         putChar('.');

   834                         scanFractionAndSuffix();

   835                     } else if (ch == '.') {

   836                         putChar('.'); putChar('.');

   837                         scanChar();

   838                         if (ch == '.') {

   839                             scanChar();

   840                             putChar('.');

   841                             token = ELLIPSIS;

   842                         } else {

   843                             lexError("malformed.fp.lit");

   844                         }

   845                     } else {

   846                         token = DOT;

   847                     }

   848                     return;

   849                 case ',':

   850                     scanChar(); token = COMMA; return;

   851                 case ';':

   852                     scanChar(); token = SEMI; return;

   853                 case '(':

   854                     scanChar(); token = LPAREN; return;

   855                 case ')':

   856                     scanChar(); token = RPAREN; return;

   857                 case '[':

   858                     scanChar(); token = LBRACKET; return;

   859                 case ']':

   860                     scanChar(); token = RBRACKET; return;

   861                 case '{':

   862                     scanChar(); token = LBRACE; return;

   863                 case '}':

   864                     scanChar(); token = RBRACE; return;

   865                 case '/':

   866                     scanChar();

   867                     if (ch == '/') {

   868                         do {

   869                             scanCommentChar();

   870                         } while (ch != CR && ch != LF && bp < buflen);

   871                         if (bp < buflen) {

   872                             endPos = bp;

   873                             processComment(CommentStyle.LINE);

   874                         }

   875                         break;

   876                     } else if (ch == '*') {

   877                         scanChar();

   878                         CommentStyle style;

   879                         if (ch == '*') {

   880                             style = CommentStyle.JAVADOC;

   881                             scanDocComment();

   882                         } else {

   883                             style = CommentStyle.BLOCK;

   884                             while (bp < buflen) {

   885                                 if (ch == '*') {

   886                                     scanChar();

   887                                     if (ch == '/') break;

   888                                 } else {

   889                                     scanCommentChar();

   890                                 }

   891                             }

   892                         }

   893                         if (ch == '/') {

   894                             scanChar();

   895                             endPos = bp;

   896                             processComment(style);

   897                             break;

   898                         } else {

   899                             lexError("unclosed.comment");

   900                             return;

   901                         }

   902                     } else if (ch == '=') {

   903                         name = names.slashequals;

   904                         token = SLASHEQ;

   905                         scanChar();

   906                     } else {

   907                         name = names.slash;

   908                         token = SLASH;

   909                     }

   910                     return;

   911                 case '\'':

   912                     scanChar();

   913                     if (ch == '\'') {

   914                         lexError("empty.char.lit");

   915                     } else {

   916                         if (ch == CR || ch == LF)

   917                             lexError(pos, "illegal.line.end.in.char.lit");

   918                         scanLitChar();

   919                         if (ch == '\'') {

   920                             scanChar();

   921                             token = CHARLITERAL;

   922                         } else {

   923                             lexError(pos, "unclosed.char.lit");

   924                         }

   925                     }

   926                     return;

   927                 case '\"':

   928                     scanChar();

   929                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)

   930                         scanLitChar();

   931                     if (ch == '\"') {

   932                         token = STRINGLITERAL;

   933                         scanChar();

   934                     } else {

   935                         lexError(pos, "unclosed.str.lit");

   936                     }

   937                     return;

   938                 default:

   939                     if (isSpecial(ch)) {

   940                         scanOperator();

   941                     } else {

   942                         boolean isJavaIdentifierStart;

   943                         if (ch < '\u0080') {

   944                             // all ASCII range chars already handled, above

   945                             isJavaIdentifierStart = false;

   946                         } else {

   947                             char high = scanSurrogates();

   948                             if (high != 0) {

   949                                 if (sp == sbuf.length) {

   950                                     putChar(high);

   951                                 } else {

   952                                     sbuf[sp++] = high;

   953                                 }

   955                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(

   956                                     Character.toCodePoint(high, ch));

   957                             } else {

   958                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);

   959                             }

   960                         }

   961                         if (isJavaIdentifierStart) {

   962                             scanIdent();

   963                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5

   964                             token = EOF;

   965                             pos = bp = eofPos;

   966                         } else {

   967                             lexError("illegal.char", String.valueOf((int)ch));

   968                             scanChar();

   969                         }

   970                     }

   971                     return;

   972                 }

   973             }

   974         } finally {

   975             endPos = bp;

   976             if (scannerDebug)

   977                 System.out.println("nextToken(" + pos

   978                                    + "," + endPos + ")=|" +

   979                                    new String(getRawCharacters(pos, endPos))

   980                                    + "|");

   981         }

   982     }

   984     /** Return the current token, set by nextToken().

   985      */

   986     public Token token() {

   987         return token;

   988     }

   990     /** Sets the current token.

   991      */

   992     public void token(Token token) {

   993         this.token = token;

   994     }

   996     /** Return the current token's position: a 0-based

   997      *  offset from beginning of the raw input stream

   998      *  (before unicode translation)

   999      */

  1000     public int pos() {

  1001         return pos;

  1002     }

  1004     /** Return the last character position of the current token.

  1005      */

  1006     public int endPos() {

  1007         return endPos;

  1008     }

  1010     /** Return the last character position of the previous token.

  1011      */

  1012     public int prevEndPos() {

  1013         return prevEndPos;

  1014     }

  1016     /** Return the position where a lexical error occurred;

  1017      */

  1018     public int errPos() {

  1019         return errPos;

  1020     }

  1022     /** Set the position where a lexical error occurred;

  1023      */

  1024     public void errPos(int pos) {

  1025         errPos = pos;

  1026     }

  1028     /** Return the name of an identifier or token for the current token.

  1029      */

  1030     public Name name() {

  1031         return name;

  1032     }

  1034     /** Return the radix of a numeric literal token.

  1035      */

  1036     public int radix() {

  1037         return radix;

  1038     }

  1040     /** Has a @deprecated been encountered in last doc comment?

  1041      *  This needs to be reset by client with resetDeprecatedFlag.

  1042      */

  1043     public boolean deprecatedFlag() {

  1044         return deprecatedFlag;

  1045     }

  1047     public void resetDeprecatedFlag() {

  1048         deprecatedFlag = false;

  1049     }

  1051     /**

  1052      * Returns the documentation string of the current token.

  1053      */

  1054     public String docComment() {

  1055         return null;

  1056     }

  1058     /**

  1059      * Returns a copy of the input buffer, up to its inputLength.

  1060      * Unicode escape sequences are not translated.

  1061      */

  1062     public char[] getRawCharacters() {

  1063         char[] chars = new char[buflen];

  1064         System.arraycopy(buf, 0, chars, 0, buflen);

  1065         return chars;

  1066     }

  1068     /**

  1069      * Returns a copy of a character array subset of the input buffer.

  1070      * The returned array begins at the <code>beginIndex</code> and

  1071      * extends to the character at index <code>endIndex - 1</code>.

  1072      * Thus the length of the substring is <code>endIndex-beginIndex</code>.

  1073      * This behavior is like

  1074      * <code>String.substring(beginIndex, endIndex)</code>.

  1075      * Unicode escape sequences are not translated.

  1076      *

  1077      * @param beginIndex the beginning index, inclusive.

  1078      * @param endIndex the ending index, exclusive.

  1079      * @throws IndexOutOfBounds if either offset is outside of the

  1080      *         array bounds

  1081      */

  1082     public char[] getRawCharacters(int beginIndex, int endIndex) {

  1083         int length = endIndex - beginIndex;

  1084         char[] chars = new char[length];

  1085         System.arraycopy(buf, beginIndex, chars, 0, length);

  1086         return chars;

  1087     }

  1089     public enum CommentStyle {

  1090         LINE,

  1091         BLOCK,

  1092         JAVADOC,

  1093     }

  1095     /**

  1096      * Called when a complete comment has been scanned. pos and endPos

  1097      * will mark the comment boundary.

  1098      */

  1099     protected void processComment(CommentStyle style) {

  1100         if (scannerDebug)

  1101             System.out.println("processComment(" + pos

  1102                                + "," + endPos + "," + style + ")=|"

  1103                                + new String(getRawCharacters(pos, endPos))

  1104                                + "|");

  1105     }

  1107     /**

  1108      * Called when a complete whitespace run has been scanned. pos and endPos

  1109      * will mark the whitespace boundary.

  1110      */

  1111     protected void processWhiteSpace() {

  1112         if (scannerDebug)

  1113             System.out.println("processWhitespace(" + pos

  1114                                + "," + endPos + ")=|" +

  1115                                new String(getRawCharacters(pos, endPos))

  1116                                + "|");

  1117     }

  1119     /**

  1120      * Called when a line terminator has been processed.

  1121      */

  1122     protected void processLineTerminator() {

  1123         if (scannerDebug)

  1124             System.out.println("processTerminator(" + pos

  1125                                + "," + endPos + ")=|" +

  1126                                new String(getRawCharacters(pos, endPos))

  1127                                + "|");

  1128     }

  1130     /** Build a map for translating between line numbers and

  1131      * positions in the input.

  1132      *

  1133      * @return a LineMap */

  1134     public Position.LineMap getLineMap() {

  1135         return Position.makeLineMap(buf, buflen, false);

  1136     }

  1138 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/Scanner.java@4868a36f6fd8

src/share/classes/com/sun/tools/javac/parser/Scanner.java