jdk8-mips64-public/langtools: src/share/classes/com/sun/tools/javac/parser/Scanner.java@eff38cc97183

6574134: Allow for alternative implementation of Name Table with garbage collection of name bytes
Reviewed-by: darcy, mcimadamore

     1 /*

     2  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.  Sun designates this

     8  * particular file as subject to the "Classpath" exception as provided

     9  * by Sun in the LICENSE file that accompanied this code.

    10  *

    11  * This code is distributed in the hope that it will be useful, but WITHOUT

    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    14  * version 2 for more details (a copy is included in the LICENSE file that

    15  * accompanied this code).

    16  *

    17  * You should have received a copy of the GNU General Public License version

    18  * 2 along with this work; if not, write to the Free Software Foundation,

    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    20  *

    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,

    22  * CA 95054 USA or visit www.sun.com if you need additional information or

    23  * have any questions.

    24  */

    26 package com.sun.tools.javac.parser;

    28 import java.nio.*;

    30 import com.sun.tools.javac.code.Source;

    31 import com.sun.tools.javac.file.JavacFileManager;

    32 import com.sun.tools.javac.util.*;

    35 import static com.sun.tools.javac.parser.Token.*;

    36 import static com.sun.tools.javac.util.LayoutCharacters.*;

    38 /** The lexical analyzer maps an input stream consisting of

    39  *  ASCII characters and Unicode escapes into a token sequence.

    40  *

    41  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If

    42  *  you write code that depends on this, you do so at your own risk.

    43  *  This code and its internal interfaces are subject to change or

    44  *  deletion without notice.</b>

    45  */

    46 public class Scanner implements Lexer {

    48     private static boolean scannerDebug = false;

    50     /** A factory for creating scanners. */

    51     public static class Factory {

    52         /** The context key for the scanner factory. */

    53         public static final Context.Key<Scanner.Factory> scannerFactoryKey =

    54             new Context.Key<Scanner.Factory>();

    56         /** Get the Factory instance for this context. */

    57         public static Factory instance(Context context) {

    58             Factory instance = context.get(scannerFactoryKey);

    59             if (instance == null)

    60                 instance = new Factory(context);

    61             return instance;

    62         }

    64         final Log log;

    65         final Names names;

    66         final Source source;

    67         final Keywords keywords;

    69         /** Create a new scanner factory. */

    70         protected Factory(Context context) {

    71             context.put(scannerFactoryKey, this);

    72             this.log = Log.instance(context);

    73             this.names = Names.instance(context);

    74             this.source = Source.instance(context);

    75             this.keywords = Keywords.instance(context);

    76         }

    78         public Scanner newScanner(CharSequence input) {

    79             if (input instanceof CharBuffer) {

    80                 return new Scanner(this, (CharBuffer)input);

    81             } else {

    82                 char[] array = input.toString().toCharArray();

    83                 return newScanner(array, array.length);

    84             }

    85         }

    87         public Scanner newScanner(char[] input, int inputLength) {

    88             return new Scanner(this, input, inputLength);

    89         }

    90     }

    92     /* Output variables; set by nextToken():

    93      */

    95     /** The token, set by nextToken().

    96      */

    97     private Token token;

    99     /** Allow hex floating-point literals.

   100      */

   101     private boolean allowHexFloats;

   103     /** The token's position, 0-based offset from beginning of text.

   104      */

   105     private int pos;

   107     /** Character position just after the last character of the token.

   108      */

   109     private int endPos;

   111     /** The last character position of the previous token.

   112      */

   113     private int prevEndPos;

   115     /** The position where a lexical error occurred;

   116      */

   117     private int errPos = Position.NOPOS;

   119     /** The name of an identifier or token:

   120      */

   121     private Name name;

   123     /** The radix of a numeric literal token.

   124      */

   125     private int radix;

   127     /** Has a @deprecated been encountered in last doc comment?

   128      *  this needs to be reset by client.

   129      */

   130     protected boolean deprecatedFlag = false;

   132     /** A character buffer for literals.

   133      */

   134     private char[] sbuf = new char[128];

   135     private int sp;

   137     /** The input buffer, index of next chacter to be read,

   138      *  index of one past last character in buffer.

   139      */

   140     private char[] buf;

   141     private int bp;

   142     private int buflen;

   143     private int eofPos;

   145     /** The current character.

   146      */

   147     private char ch;

   149     /** The buffer index of the last converted unicode character

   150      */

   151     private int unicodeConversionBp = -1;

   153     /** The log to be used for error reporting.

   154      */

   155     private final Log log;

   157     /** The name table. */

   158     private final Names names;

   160     /** The keyword table. */

   161     private final Keywords keywords;

   163     /** Common code for constructors. */

   164     private Scanner(Factory fac) {

   165         this.log = fac.log;

   166         this.names = fac.names;

   167         this.keywords = fac.keywords;

   168         this.allowHexFloats = fac.source.allowHexFloats();

   169     }

   171     private static final boolean hexFloatsWork = hexFloatsWork();

   172     private static boolean hexFloatsWork() {

   173         try {

   174             Float.valueOf("0x1.0p1");

   175             return true;

   176         } catch (NumberFormatException ex) {

   177             return false;

   178         }

   179     }

   181     /** Create a scanner from the input buffer.  buffer must implement

   182      *  array() and compact(), and remaining() must be less than limit().

   183      */

   184     protected Scanner(Factory fac, CharBuffer buffer) {

   185         this(fac, JavacFileManager.toArray(buffer), buffer.limit());

   186     }

   188     /**

   189      * Create a scanner from the input array.  This method might

   190      * modify the array.  To avoid copying the input array, ensure

   191      * that {@code inputLength < input.length} or

   192      * {@code input[input.length -1]} is a white space character.

   193      *

   194      * @param fac the factory which created this Scanner

   195      * @param input the input, might be modified

   196      * @param inputLength the size of the input.

   197      * Must be positive and less than or equal to input.length.

   198      */

   199     protected Scanner(Factory fac, char[] input, int inputLength) {

   200         this(fac);

   201         eofPos = inputLength;

   202         if (inputLength == input.length) {

   203             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {

   204                 inputLength--;

   205             } else {

   206                 char[] newInput = new char[inputLength + 1];

   207                 System.arraycopy(input, 0, newInput, 0, input.length);

   208                 input = newInput;

   209             }

   210         }

   211         buf = input;

   212         buflen = inputLength;

   213         buf[buflen] = EOI;

   214         bp = -1;

   215         scanChar();

   216     }

   218     /** Report an error at the given position using the provided arguments.

   219      */

   220     private void lexError(int pos, String key, Object... args) {

   221         log.error(pos, key, args);

   222         token = ERROR;

   223         errPos = pos;

   224     }

   226     /** Report an error at the current token position using the provided

   227      *  arguments.

   228      */

   229     private void lexError(String key, Object... args) {

   230         lexError(pos, key, args);

   231     }

   233     /** Convert an ASCII digit from its base (8, 10, or 16)

   234      *  to its value.

   235      */

   236     private int digit(int base) {

   237         char c = ch;

   238         int result = Character.digit(c, base);

   239         if (result >= 0 && c > 0x7f) {

   240             lexError(pos+1, "illegal.nonascii.digit");

   241             ch = "0123456789abcdef".charAt(result);

   242         }

   243         return result;

   244     }

   246     /** Convert unicode escape; bp points to initial '\' character

   247      *  (Spec 3.3).

   248      */

   249     private void convertUnicode() {

   250         if (ch == '\\' && unicodeConversionBp != bp) {

   251             bp++; ch = buf[bp];

   252             if (ch == 'u') {

   253                 do {

   254                     bp++; ch = buf[bp];

   255                 } while (ch == 'u');

   256                 int limit = bp + 3;

   257                 if (limit < buflen) {

   258                     int d = digit(16);

   259                     int code = d;

   260                     while (bp < limit && d >= 0) {

   261                         bp++; ch = buf[bp];

   262                         d = digit(16);

   263                         code = (code << 4) + d;

   264                     }

   265                     if (d >= 0) {

   266                         ch = (char)code;

   267                         unicodeConversionBp = bp;

   268                         return;

   269                     }

   270                 }

   271                 lexError(bp, "illegal.unicode.esc");

   272             } else {

   273                 bp--;

   274                 ch = '\\';

   275             }

   276         }

   277     }

   279     /** Read next character.

   280      */

   281     private void scanChar() {

   282         ch = buf[++bp];

   283         if (ch == '\\') {

   284             convertUnicode();

   285         }

   286     }

   288     /** Read next character in comment, skipping over double '\' characters.

   289      */

   290     private void scanCommentChar() {

   291         scanChar();

   292         if (ch == '\\') {

   293             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   294                 bp++;

   295             } else {

   296                 convertUnicode();

   297             }

   298         }

   299     }

   301     /** Append a character to sbuf.

   302      */

   303     private void putChar(char ch) {

   304         if (sp == sbuf.length) {

   305             char[] newsbuf = new char[sbuf.length * 2];

   306             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);

   307             sbuf = newsbuf;

   308         }

   309         sbuf[sp++] = ch;

   310     }

   312     /** For debugging purposes: print character.

   313      */

   314     private void dch() {

   315         System.err.print(ch); System.out.flush();

   316     }

   318     /** Read next character in character or string literal and copy into sbuf.

   319      */

   320     private void scanLitChar() {

   321         if (ch == '\\') {

   322             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {

   323                 bp++;

   324                 putChar('\\');

   325                 scanChar();

   326             } else {

   327                 scanChar();

   328                 switch (ch) {

   329                 case '0': case '1': case '2': case '3':

   330                 case '4': case '5': case '6': case '7':

   331                     char leadch = ch;

   332                     int oct = digit(8);

   333                     scanChar();

   334                     if ('0' <= ch && ch <= '7') {

   335                         oct = oct * 8 + digit(8);

   336                         scanChar();

   337                         if (leadch <= '3' && '0' <= ch && ch <= '7') {

   338                             oct = oct * 8 + digit(8);

   339                             scanChar();

   340                         }

   341                     }

   342                     putChar((char)oct);

   343                     break;

   344                 case 'b':

   345                     putChar('\b'); scanChar(); break;

   346                 case 't':

   347                     putChar('\t'); scanChar(); break;

   348                 case 'n':

   349                     putChar('\n'); scanChar(); break;

   350                 case 'f':

   351                     putChar('\f'); scanChar(); break;

   352                 case 'r':

   353                     putChar('\r'); scanChar(); break;

   354                 case '\'':

   355                     putChar('\''); scanChar(); break;

   356                 case '\"':

   357                     putChar('\"'); scanChar(); break;

   358                 case '\\':

   359                     putChar('\\'); scanChar(); break;

   360                 default:

   361                     lexError(bp, "illegal.esc.char");

   362                 }

   363             }

   364         } else if (bp != buflen) {

   365             putChar(ch); scanChar();

   366         }

   367     }

   369     /** Read fractional part of hexadecimal floating point number.

   370      */

   371     private void scanHexExponentAndSuffix() {

   372         if (ch == 'p' || ch == 'P') {

   373             putChar(ch);

   374             scanChar();

   375             if (ch == '+' || ch == '-') {

   376                 putChar(ch);

   377                 scanChar();

   378             }

   379             if ('0' <= ch && ch <= '9') {

   380                 do {

   381                     putChar(ch);

   382                     scanChar();

   383                 } while ('0' <= ch && ch <= '9');

   384                 if (!allowHexFloats) {

   385                     lexError("unsupported.fp.lit");

   386                     allowHexFloats = true;

   387                 }

   388                 else if (!hexFloatsWork)

   389                     lexError("unsupported.cross.fp.lit");

   390             } else

   391                 lexError("malformed.fp.lit");

   392         } else {

   393             lexError("malformed.fp.lit");

   394         }

   395         if (ch == 'f' || ch == 'F') {

   396             putChar(ch);

   397             scanChar();

   398             token = FLOATLITERAL;

   399         } else {

   400             if (ch == 'd' || ch == 'D') {

   401                 putChar(ch);

   402                 scanChar();

   403             }

   404             token = DOUBLELITERAL;

   405         }

   406     }

   408     /** Read fractional part of floating point number.

   409      */

   410     private void scanFraction() {

   411         while (digit(10) >= 0) {

   412             putChar(ch);

   413             scanChar();

   414         }

   415         int sp1 = sp;

   416         if (ch == 'e' || ch == 'E') {

   417             putChar(ch);

   418             scanChar();

   419             if (ch == '+' || ch == '-') {

   420                 putChar(ch);

   421                 scanChar();

   422             }

   423             if ('0' <= ch && ch <= '9') {

   424                 do {

   425                     putChar(ch);

   426                     scanChar();

   427                 } while ('0' <= ch && ch <= '9');

   428                 return;

   429             }

   430             lexError("malformed.fp.lit");

   431             sp = sp1;

   432         }

   433     }

   435     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   436      */

   437     private void scanFractionAndSuffix() {

   438         this.radix = 10;

   439         scanFraction();

   440         if (ch == 'f' || ch == 'F') {

   441             putChar(ch);

   442             scanChar();

   443             token = FLOATLITERAL;

   444         } else {

   445             if (ch == 'd' || ch == 'D') {

   446                 putChar(ch);

   447                 scanChar();

   448             }

   449             token = DOUBLELITERAL;

   450         }

   451     }

   453     /** Read fractional part and 'd' or 'f' suffix of floating point number.

   454      */

   455     private void scanHexFractionAndSuffix(boolean seendigit) {

   456         this.radix = 16;

   457         assert ch == '.';

   458         putChar(ch);

   459         scanChar();

   460         while (digit(16) >= 0) {

   461             seendigit = true;

   462             putChar(ch);

   463             scanChar();

   464         }

   465         if (!seendigit)

   466             lexError("invalid.hex.number");

   467         else

   468             scanHexExponentAndSuffix();

   469     }

   471     /** Read a number.

   472      *  @param radix  The radix of the number; one of 8, 10, 16.

   473      */

   474     private void scanNumber(int radix) {

   475         this.radix = radix;

   476         // for octal, allow base-10 digit in case it's a float literal

   477         int digitRadix = (radix <= 10) ? 10 : 16;

   478         boolean seendigit = false;

   479         while (digit(digitRadix) >= 0) {

   480             seendigit = true;

   481             putChar(ch);

   482             scanChar();

   483         }

   484         if (radix == 16 && ch == '.') {

   485             scanHexFractionAndSuffix(seendigit);

   486         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {

   487             scanHexExponentAndSuffix();

   488         } else if (radix <= 10 && ch == '.') {

   489             putChar(ch);

   490             scanChar();

   491             scanFractionAndSuffix();

   492         } else if (radix <= 10 &&

   493                    (ch == 'e' || ch == 'E' ||

   494                     ch == 'f' || ch == 'F' ||

   495                     ch == 'd' || ch == 'D')) {

   496             scanFractionAndSuffix();

   497         } else {

   498             if (ch == 'l' || ch == 'L') {

   499                 scanChar();

   500                 token = LONGLITERAL;

   501             } else {

   502                 token = INTLITERAL;

   503             }

   504         }

   505     }

   507     /** Read an identifier.

   508      */

   509     private void scanIdent() {

   510         boolean isJavaIdentifierPart;

   511         char high;

   512         do {

   513             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;

   514             // optimization, was: putChar(ch);

   516             scanChar();

   517             switch (ch) {

   518             case 'A': case 'B': case 'C': case 'D': case 'E':

   519             case 'F': case 'G': case 'H': case 'I': case 'J':

   520             case 'K': case 'L': case 'M': case 'N': case 'O':

   521             case 'P': case 'Q': case 'R': case 'S': case 'T':

   522             case 'U': case 'V': case 'W': case 'X': case 'Y':

   523             case 'Z':

   524             case 'a': case 'b': case 'c': case 'd': case 'e':

   525             case 'f': case 'g': case 'h': case 'i': case 'j':

   526             case 'k': case 'l': case 'm': case 'n': case 'o':

   527             case 'p': case 'q': case 'r': case 's': case 't':

   528             case 'u': case 'v': case 'w': case 'x': case 'y':

   529             case 'z':

   530             case '$': case '_':

   531             case '0': case '1': case '2': case '3': case '4':

   532             case '5': case '6': case '7': case '8': case '9':

   533             case '\u0000': case '\u0001': case '\u0002': case '\u0003':

   534             case '\u0004': case '\u0005': case '\u0006': case '\u0007':

   535             case '\u0008': case '\u000E': case '\u000F': case '\u0010':

   536             case '\u0011': case '\u0012': case '\u0013': case '\u0014':

   537             case '\u0015': case '\u0016': case '\u0017':

   538             case '\u0018': case '\u0019': case '\u001B':

   539             case '\u007F':

   540                 break;

   541             case '\u001A': // EOI is also a legal identifier part

   542                 if (bp >= buflen) {

   543                     name = names.fromChars(sbuf, 0, sp);

   544                     token = keywords.key(name);

   545                     return;

   546                 }

   547                 break;

   548             default:

   549                 if (ch < '\u0080') {

   550                     // all ASCII range chars already handled, above

   551                     isJavaIdentifierPart = false;

   552                 } else {

   553                     high = scanSurrogates();

   554                     if (high != 0) {

   555                         if (sp == sbuf.length) {

   556                             putChar(high);

   557                         } else {

   558                             sbuf[sp++] = high;

   559                         }

   560                         isJavaIdentifierPart = Character.isJavaIdentifierPart(

   561                             Character.toCodePoint(high, ch));

   562                     } else {

   563                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);

   564                     }

   565                 }

   566                 if (!isJavaIdentifierPart) {

   567                     name = names.fromChars(sbuf, 0, sp);

   568                     token = keywords.key(name);

   569                     return;

   570                 }

   571             }

   572         } while (true);

   573     }

   575     /** Are surrogates supported?

   576      */

   577     final static boolean surrogatesSupported = surrogatesSupported();

   578     private static boolean surrogatesSupported() {

   579         try {

   580             Character.isHighSurrogate('a');

   581             return true;

   582         } catch (NoSuchMethodError ex) {

   583             return false;

   584         }

   585     }

   587     /** Scan surrogate pairs.  If 'ch' is a high surrogate and

   588      *  the next character is a low surrogate, then put the low

   589      *  surrogate in 'ch', and return the high surrogate.

   590      *  otherwise, just return 0.

   591      */

   592     private char scanSurrogates() {

   593         if (surrogatesSupported && Character.isHighSurrogate(ch)) {

   594             char high = ch;

   596             scanChar();

   598             if (Character.isLowSurrogate(ch)) {

   599                 return high;

   600             }

   602             ch = high;

   603         }

   605         return 0;

   606     }

   608     /** Return true if ch can be part of an operator.

   609      */

   610     private boolean isSpecial(char ch) {

   611         switch (ch) {

   612         case '!': case '%': case '&': case '*': case '?':

   613         case '+': case '-': case ':': case '<': case '=':

   614         case '>': case '^': case '|': case '~':

   615         case '@':

   616             return true;

   617         default:

   618             return false;

   619         }

   620     }

   622     /** Read longest possible sequence of special characters and convert

   623      *  to token.

   624      */

   625     private void scanOperator() {

   626         while (true) {

   627             putChar(ch);

   628             Name newname = names.fromChars(sbuf, 0, sp);

   629             if (keywords.key(newname) == IDENTIFIER) {

   630                 sp--;

   631                 break;

   632             }

   633             name = newname;

   634             token = keywords.key(newname);

   635             scanChar();

   636             if (!isSpecial(ch)) break;

   637         }

   638     }

   640     /**

   641      * Scan a documention comment; determine if a deprecated tag is present.

   642      * Called once the initial /, * have been skipped, positioned at the second *

   643      * (which is treated as the beginning of the first line).

   644      * Stops positioned at the closing '/'.

   645      */

   646     @SuppressWarnings("fallthrough")

   647     private void scanDocComment() {

   648         boolean deprecatedPrefix = false;

   650         forEachLine:

   651         while (bp < buflen) {

   653             // Skip optional WhiteSpace at beginning of line

   654             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   655                 scanCommentChar();

   656             }

   658             // Skip optional consecutive Stars

   659             while (bp < buflen && ch == '*') {

   660                 scanCommentChar();

   661                 if (ch == '/') {

   662                     return;

   663                 }

   664             }

   666             // Skip optional WhiteSpace after Stars

   667             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {

   668                 scanCommentChar();

   669             }

   671             deprecatedPrefix = false;

   672             // At beginning of line in the JavaDoc sense.

   673             if (bp < buflen && ch == '@' && !deprecatedFlag) {

   674                 scanCommentChar();

   675                 if (bp < buflen && ch == 'd') {

   676                     scanCommentChar();

   677                     if (bp < buflen && ch == 'e') {

   678                         scanCommentChar();

   679                         if (bp < buflen && ch == 'p') {

   680                             scanCommentChar();

   681                             if (bp < buflen && ch == 'r') {

   682                                 scanCommentChar();

   683                                 if (bp < buflen && ch == 'e') {

   684                                     scanCommentChar();

   685                                     if (bp < buflen && ch == 'c') {

   686                                         scanCommentChar();

   687                                         if (bp < buflen && ch == 'a') {

   688                                             scanCommentChar();

   689                                             if (bp < buflen && ch == 't') {

   690                                                 scanCommentChar();

   691                                                 if (bp < buflen && ch == 'e') {

   692                                                     scanCommentChar();

   693                                                     if (bp < buflen && ch == 'd') {

   694                                                         deprecatedPrefix = true;

   695                                                         scanCommentChar();

   696                                                     }}}}}}}}}}}

   697             if (deprecatedPrefix && bp < buflen) {

   698                 if (Character.isWhitespace(ch)) {

   699                     deprecatedFlag = true;

   700                 } else if (ch == '*') {

   701                     scanCommentChar();

   702                     if (ch == '/') {

   703                         deprecatedFlag = true;

   704                         return;

   705                     }

   706                 }

   707             }

   709             // Skip rest of line

   710             while (bp < buflen) {

   711                 switch (ch) {

   712                 case '*':

   713                     scanCommentChar();

   714                     if (ch == '/') {

   715                         return;

   716                     }

   717                     break;

   718                 case CR: // (Spec 3.4)

   719                     scanCommentChar();

   720                     if (ch != LF) {

   721                         continue forEachLine;

   722                     }

   723                     /* fall through to LF case */

   724                 case LF: // (Spec 3.4)

   725                     scanCommentChar();

   726                     continue forEachLine;

   727                 default:

   728                     scanCommentChar();

   729                 }

   730             } // rest of line

   731         } // forEachLine

   732         return;

   733     }

   735     /** The value of a literal token, recorded as a string.

   736      *  For integers, leading 0x and 'l' suffixes are suppressed.

   737      */

   738     public String stringVal() {

   739         return new String(sbuf, 0, sp);

   740     }

   742     /** Read token.

   743      */

   744     public void nextToken() {

   746         try {

   747             prevEndPos = endPos;

   748             sp = 0;

   750             while (true) {

   751                 pos = bp;

   752                 switch (ch) {

   753                 case ' ': // (Spec 3.6)

   754                 case '\t': // (Spec 3.6)

   755                 case FF: // (Spec 3.6)

   756                     do {

   757                         scanChar();

   758                     } while (ch == ' ' || ch == '\t' || ch == FF);

   759                     endPos = bp;

   760                     processWhiteSpace();

   761                     break;

   762                 case LF: // (Spec 3.4)

   763                     scanChar();

   764                     endPos = bp;

   765                     processLineTerminator();

   766                     break;

   767                 case CR: // (Spec 3.4)

   768                     scanChar();

   769                     if (ch == LF) {

   770                         scanChar();

   771                     }

   772                     endPos = bp;

   773                     processLineTerminator();

   774                     break;

   775                 case 'A': case 'B': case 'C': case 'D': case 'E':

   776                 case 'F': case 'G': case 'H': case 'I': case 'J':

   777                 case 'K': case 'L': case 'M': case 'N': case 'O':

   778                 case 'P': case 'Q': case 'R': case 'S': case 'T':

   779                 case 'U': case 'V': case 'W': case 'X': case 'Y':

   780                 case 'Z':

   781                 case 'a': case 'b': case 'c': case 'd': case 'e':

   782                 case 'f': case 'g': case 'h': case 'i': case 'j':

   783                 case 'k': case 'l': case 'm': case 'n': case 'o':

   784                 case 'p': case 'q': case 'r': case 's': case 't':

   785                 case 'u': case 'v': case 'w': case 'x': case 'y':

   786                 case 'z':

   787                 case '$': case '_':

   788                     scanIdent();

   789                     return;

   790                 case '0':

   791                     scanChar();

   792                     if (ch == 'x' || ch == 'X') {

   793                         scanChar();

   794                         if (ch == '.') {

   795                             scanHexFractionAndSuffix(false);

   796                         } else if (digit(16) < 0) {

   797                             lexError("invalid.hex.number");

   798                         } else {

   799                             scanNumber(16);

   800                         }

   801                     } else {

   802                         putChar('0');

   803                         scanNumber(8);

   804                     }

   805                     return;

   806                 case '1': case '2': case '3': case '4':

   807                 case '5': case '6': case '7': case '8': case '9':

   808                     scanNumber(10);

   809                     return;

   810                 case '.':

   811                     scanChar();

   812                     if ('0' <= ch && ch <= '9') {

   813                         putChar('.');

   814                         scanFractionAndSuffix();

   815                     } else if (ch == '.') {

   816                         putChar('.'); putChar('.');

   817                         scanChar();

   818                         if (ch == '.') {

   819                             scanChar();

   820                             putChar('.');

   821                             token = ELLIPSIS;

   822                         } else {

   823                             lexError("malformed.fp.lit");

   824                         }

   825                     } else {

   826                         token = DOT;

   827                     }

   828                     return;

   829                 case ',':

   830                     scanChar(); token = COMMA; return;

   831                 case ';':

   832                     scanChar(); token = SEMI; return;

   833                 case '(':

   834                     scanChar(); token = LPAREN; return;

   835                 case ')':

   836                     scanChar(); token = RPAREN; return;

   837                 case '[':

   838                     scanChar(); token = LBRACKET; return;

   839                 case ']':

   840                     scanChar(); token = RBRACKET; return;

   841                 case '{':

   842                     scanChar(); token = LBRACE; return;

   843                 case '}':

   844                     scanChar(); token = RBRACE; return;

   845                 case '/':

   846                     scanChar();

   847                     if (ch == '/') {

   848                         do {

   849                             scanCommentChar();

   850                         } while (ch != CR && ch != LF && bp < buflen);

   851                         if (bp < buflen) {

   852                             endPos = bp;

   853                             processComment(CommentStyle.LINE);

   854                         }

   855                         break;

   856                     } else if (ch == '*') {

   857                         scanChar();

   858                         CommentStyle style;

   859                         if (ch == '*') {

   860                             style = CommentStyle.JAVADOC;

   861                             scanDocComment();

   862                         } else {

   863                             style = CommentStyle.BLOCK;

   864                             while (bp < buflen) {

   865                                 if (ch == '*') {

   866                                     scanChar();

   867                                     if (ch == '/') break;

   868                                 } else {

   869                                     scanCommentChar();

   870                                 }

   871                             }

   872                         }

   873                         if (ch == '/') {

   874                             scanChar();

   875                             endPos = bp;

   876                             processComment(style);

   877                             break;

   878                         } else {

   879                             lexError("unclosed.comment");

   880                             return;

   881                         }

   882                     } else if (ch == '=') {

   883                         name = names.slashequals;

   884                         token = SLASHEQ;

   885                         scanChar();

   886                     } else {

   887                         name = names.slash;

   888                         token = SLASH;

   889                     }

   890                     return;

   891                 case '\'':

   892                     scanChar();

   893                     if (ch == '\'') {

   894                         lexError("empty.char.lit");

   895                     } else {

   896                         if (ch == CR || ch == LF)

   897                             lexError(pos, "illegal.line.end.in.char.lit");

   898                         scanLitChar();

   899                         if (ch == '\'') {

   900                             scanChar();

   901                             token = CHARLITERAL;

   902                         } else {

   903                             lexError(pos, "unclosed.char.lit");

   904                         }

   905                     }

   906                     return;

   907                 case '\"':

   908                     scanChar();

   909                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)

   910                         scanLitChar();

   911                     if (ch == '\"') {

   912                         token = STRINGLITERAL;

   913                         scanChar();

   914                     } else {

   915                         lexError(pos, "unclosed.str.lit");

   916                     }

   917                     return;

   918                 default:

   919                     if (isSpecial(ch)) {

   920                         scanOperator();

   921                     } else {

   922                         boolean isJavaIdentifierStart;

   923                         if (ch < '\u0080') {

   924                             // all ASCII range chars already handled, above

   925                             isJavaIdentifierStart = false;

   926                         } else {

   927                             char high = scanSurrogates();

   928                             if (high != 0) {

   929                                 if (sp == sbuf.length) {

   930                                     putChar(high);

   931                                 } else {

   932                                     sbuf[sp++] = high;

   933                                 }

   935                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(

   936                                     Character.toCodePoint(high, ch));

   937                             } else {

   938                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);

   939                             }

   940                         }

   941                         if (isJavaIdentifierStart) {

   942                             scanIdent();

   943                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5

   944                             token = EOF;

   945                             pos = bp = eofPos;

   946                         } else {

   947                             lexError("illegal.char", String.valueOf((int)ch));

   948                             scanChar();

   949                         }

   950                     }

   951                     return;

   952                 }

   953             }

   954         } finally {

   955             endPos = bp;

   956             if (scannerDebug)

   957                 System.out.println("nextToken(" + pos

   958                                    + "," + endPos + ")=|" +

   959                                    new String(getRawCharacters(pos, endPos))

   960                                    + "|");

   961         }

   962     }

   964     /** Return the current token, set by nextToken().

   965      */

   966     public Token token() {

   967         return token;

   968     }

   970     /** Sets the current token.

   971      */

   972     public void token(Token token) {

   973         this.token = token;

   974     }

   976     /** Return the current token's position: a 0-based

   977      *  offset from beginning of the raw input stream

   978      *  (before unicode translation)

   979      */

   980     public int pos() {

   981         return pos;

   982     }

   984     /** Return the last character position of the current token.

   985      */

   986     public int endPos() {

   987         return endPos;

   988     }

   990     /** Return the last character position of the previous token.

   991      */

   992     public int prevEndPos() {

   993         return prevEndPos;

   994     }

   996     /** Return the position where a lexical error occurred;

   997      */

   998     public int errPos() {

   999         return errPos;

  1000     }

  1002     /** Set the position where a lexical error occurred;

  1003      */

  1004     public void errPos(int pos) {

  1005         errPos = pos;

  1006     }

  1008     /** Return the name of an identifier or token for the current token.

  1009      */

  1010     public Name name() {

  1011         return name;

  1012     }

  1014     /** Return the radix of a numeric literal token.

  1015      */

  1016     public int radix() {

  1017         return radix;

  1018     }

  1020     /** Has a @deprecated been encountered in last doc comment?

  1021      *  This needs to be reset by client with resetDeprecatedFlag.

  1022      */

  1023     public boolean deprecatedFlag() {

  1024         return deprecatedFlag;

  1025     }

  1027     public void resetDeprecatedFlag() {

  1028         deprecatedFlag = false;

  1029     }

  1031     /**

  1032      * Returns the documentation string of the current token.

  1033      */

  1034     public String docComment() {

  1035         return null;

  1036     }

  1038     /**

  1039      * Returns a copy of the input buffer, up to its inputLength.

  1040      * Unicode escape sequences are not translated.

  1041      */

  1042     public char[] getRawCharacters() {

  1043         char[] chars = new char[buflen];

  1044         System.arraycopy(buf, 0, chars, 0, buflen);

  1045         return chars;

  1046     }

  1048     /**

  1049      * Returns a copy of a character array subset of the input buffer.

  1050      * The returned array begins at the <code>beginIndex</code> and

  1051      * extends to the character at index <code>endIndex - 1</code>.

  1052      * Thus the length of the substring is <code>endIndex-beginIndex</code>.

  1053      * This behavior is like

  1054      * <code>String.substring(beginIndex, endIndex)</code>.

  1055      * Unicode escape sequences are not translated.

  1056      *

  1057      * @param beginIndex the beginning index, inclusive.

  1058      * @param endIndex the ending index, exclusive.

  1059      * @throws IndexOutOfBounds if either offset is outside of the

  1060      *         array bounds

  1061      */

  1062     public char[] getRawCharacters(int beginIndex, int endIndex) {

  1063         int length = endIndex - beginIndex;

  1064         char[] chars = new char[length];

  1065         System.arraycopy(buf, beginIndex, chars, 0, length);

  1066         return chars;

  1067     }

  1069     public enum CommentStyle {

  1070         LINE,

  1071         BLOCK,

  1072         JAVADOC,

  1073     }

  1075     /**

  1076      * Called when a complete comment has been scanned. pos and endPos

  1077      * will mark the comment boundary.

  1078      */

  1079     protected void processComment(CommentStyle style) {

  1080         if (scannerDebug)

  1081             System.out.println("processComment(" + pos

  1082                                + "," + endPos + "," + style + ")=|"

  1083                                + new String(getRawCharacters(pos, endPos))

  1084                                + "|");

  1085     }

  1087     /**

  1088      * Called when a complete whitespace run has been scanned. pos and endPos

  1089      * will mark the whitespace boundary.

  1090      */

  1091     protected void processWhiteSpace() {

  1092         if (scannerDebug)

  1093             System.out.println("processWhitespace(" + pos

  1094                                + "," + endPos + ")=|" +

  1095                                new String(getRawCharacters(pos, endPos))

  1096                                + "|");

  1097     }

  1099     /**

  1100      * Called when a line terminator has been processed.

  1101      */

  1102     protected void processLineTerminator() {

  1103         if (scannerDebug)

  1104             System.out.println("processTerminator(" + pos

  1105                                + "," + endPos + ")=|" +

  1106                                new String(getRawCharacters(pos, endPos))

  1107                                + "|");

  1108     }

  1110     /** Build a map for translating between line numbers and

  1111      * positions in the input.

  1112      *

  1113      * @return a LineMap */

  1114     public Position.LineMap getLineMap() {

  1115         return Position.makeLineMap(buf, buflen, false);

  1116     }

  1118 }

Mercurial > jdk8-mips64-public > langtools / file revision

src/share/classes/com/sun/tools/javac/parser/Scanner.java@eff38cc97183

src/share/classes/com/sun/tools/javac/parser/Scanner.java