src/share/classes/com/sun/tools/javac/parser/Scanner.java

Tue, 16 Sep 2008 18:35:18 -0700

author
jjg
date
Tue, 16 Sep 2008 18:35:18 -0700
changeset 113
eff38cc97183
parent 54
eaf608c64fec
child 267
e2722bd43f3a
permissions
-rw-r--r--

6574134: Allow for alternative implementation of Name Table with garbage collection of name bytes
Reviewed-by: darcy, mcimadamore

     1 /*
     2  * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Sun designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Sun in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
    23  * have any questions.
    24  */
    26 package com.sun.tools.javac.parser;
    28 import java.nio.*;
    30 import com.sun.tools.javac.code.Source;
    31 import com.sun.tools.javac.file.JavacFileManager;
    32 import com.sun.tools.javac.util.*;
    35 import static com.sun.tools.javac.parser.Token.*;
    36 import static com.sun.tools.javac.util.LayoutCharacters.*;
    38 /** The lexical analyzer maps an input stream consisting of
    39  *  ASCII characters and Unicode escapes into a token sequence.
    40  *
    41  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
    42  *  you write code that depends on this, you do so at your own risk.
    43  *  This code and its internal interfaces are subject to change or
    44  *  deletion without notice.</b>
    45  */
    46 public class Scanner implements Lexer {
    48     private static boolean scannerDebug = false;
    50     /** A factory for creating scanners. */
    51     public static class Factory {
    52         /** The context key for the scanner factory. */
    53         public static final Context.Key<Scanner.Factory> scannerFactoryKey =
    54             new Context.Key<Scanner.Factory>();
    56         /** Get the Factory instance for this context. */
    57         public static Factory instance(Context context) {
    58             Factory instance = context.get(scannerFactoryKey);
    59             if (instance == null)
    60                 instance = new Factory(context);
    61             return instance;
    62         }
    64         final Log log;
    65         final Names names;
    66         final Source source;
    67         final Keywords keywords;
    69         /** Create a new scanner factory. */
    70         protected Factory(Context context) {
    71             context.put(scannerFactoryKey, this);
    72             this.log = Log.instance(context);
    73             this.names = Names.instance(context);
    74             this.source = Source.instance(context);
    75             this.keywords = Keywords.instance(context);
    76         }
    78         public Scanner newScanner(CharSequence input) {
    79             if (input instanceof CharBuffer) {
    80                 return new Scanner(this, (CharBuffer)input);
    81             } else {
    82                 char[] array = input.toString().toCharArray();
    83                 return newScanner(array, array.length);
    84             }
    85         }
    87         public Scanner newScanner(char[] input, int inputLength) {
    88             return new Scanner(this, input, inputLength);
    89         }
    90     }
    92     /* Output variables; set by nextToken():
    93      */
    95     /** The token, set by nextToken().
    96      */
    97     private Token token;
    99     /** Allow hex floating-point literals.
   100      */
   101     private boolean allowHexFloats;
   103     /** The token's position, 0-based offset from beginning of text.
   104      */
   105     private int pos;
   107     /** Character position just after the last character of the token.
   108      */
   109     private int endPos;
   111     /** The last character position of the previous token.
   112      */
   113     private int prevEndPos;
   115     /** The position where a lexical error occurred;
   116      */
   117     private int errPos = Position.NOPOS;
   119     /** The name of an identifier or token:
   120      */
   121     private Name name;
   123     /** The radix of a numeric literal token.
   124      */
   125     private int radix;
   127     /** Has a @deprecated been encountered in last doc comment?
   128      *  this needs to be reset by client.
   129      */
   130     protected boolean deprecatedFlag = false;
   132     /** A character buffer for literals.
   133      */
   134     private char[] sbuf = new char[128];
   135     private int sp;
   137     /** The input buffer, index of next chacter to be read,
   138      *  index of one past last character in buffer.
   139      */
   140     private char[] buf;
   141     private int bp;
   142     private int buflen;
   143     private int eofPos;
   145     /** The current character.
   146      */
   147     private char ch;
   149     /** The buffer index of the last converted unicode character
   150      */
   151     private int unicodeConversionBp = -1;
   153     /** The log to be used for error reporting.
   154      */
   155     private final Log log;
   157     /** The name table. */
   158     private final Names names;
   160     /** The keyword table. */
   161     private final Keywords keywords;
   163     /** Common code for constructors. */
   164     private Scanner(Factory fac) {
   165         this.log = fac.log;
   166         this.names = fac.names;
   167         this.keywords = fac.keywords;
   168         this.allowHexFloats = fac.source.allowHexFloats();
   169     }
   171     private static final boolean hexFloatsWork = hexFloatsWork();
   172     private static boolean hexFloatsWork() {
   173         try {
   174             Float.valueOf("0x1.0p1");
   175             return true;
   176         } catch (NumberFormatException ex) {
   177             return false;
   178         }
   179     }
   181     /** Create a scanner from the input buffer.  buffer must implement
   182      *  array() and compact(), and remaining() must be less than limit().
   183      */
   184     protected Scanner(Factory fac, CharBuffer buffer) {
   185         this(fac, JavacFileManager.toArray(buffer), buffer.limit());
   186     }
   188     /**
   189      * Create a scanner from the input array.  This method might
   190      * modify the array.  To avoid copying the input array, ensure
   191      * that {@code inputLength < input.length} or
   192      * {@code input[input.length -1]} is a white space character.
   193      *
   194      * @param fac the factory which created this Scanner
   195      * @param input the input, might be modified
   196      * @param inputLength the size of the input.
   197      * Must be positive and less than or equal to input.length.
   198      */
   199     protected Scanner(Factory fac, char[] input, int inputLength) {
   200         this(fac);
   201         eofPos = inputLength;
   202         if (inputLength == input.length) {
   203             if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
   204                 inputLength--;
   205             } else {
   206                 char[] newInput = new char[inputLength + 1];
   207                 System.arraycopy(input, 0, newInput, 0, input.length);
   208                 input = newInput;
   209             }
   210         }
   211         buf = input;
   212         buflen = inputLength;
   213         buf[buflen] = EOI;
   214         bp = -1;
   215         scanChar();
   216     }
   218     /** Report an error at the given position using the provided arguments.
   219      */
   220     private void lexError(int pos, String key, Object... args) {
   221         log.error(pos, key, args);
   222         token = ERROR;
   223         errPos = pos;
   224     }
   226     /** Report an error at the current token position using the provided
   227      *  arguments.
   228      */
   229     private void lexError(String key, Object... args) {
   230         lexError(pos, key, args);
   231     }
   233     /** Convert an ASCII digit from its base (8, 10, or 16)
   234      *  to its value.
   235      */
   236     private int digit(int base) {
   237         char c = ch;
   238         int result = Character.digit(c, base);
   239         if (result >= 0 && c > 0x7f) {
   240             lexError(pos+1, "illegal.nonascii.digit");
   241             ch = "0123456789abcdef".charAt(result);
   242         }
   243         return result;
   244     }
   246     /** Convert unicode escape; bp points to initial '\' character
   247      *  (Spec 3.3).
   248      */
   249     private void convertUnicode() {
   250         if (ch == '\\' && unicodeConversionBp != bp) {
   251             bp++; ch = buf[bp];
   252             if (ch == 'u') {
   253                 do {
   254                     bp++; ch = buf[bp];
   255                 } while (ch == 'u');
   256                 int limit = bp + 3;
   257                 if (limit < buflen) {
   258                     int d = digit(16);
   259                     int code = d;
   260                     while (bp < limit && d >= 0) {
   261                         bp++; ch = buf[bp];
   262                         d = digit(16);
   263                         code = (code << 4) + d;
   264                     }
   265                     if (d >= 0) {
   266                         ch = (char)code;
   267                         unicodeConversionBp = bp;
   268                         return;
   269                     }
   270                 }
   271                 lexError(bp, "illegal.unicode.esc");
   272             } else {
   273                 bp--;
   274                 ch = '\\';
   275             }
   276         }
   277     }
   279     /** Read next character.
   280      */
   281     private void scanChar() {
   282         ch = buf[++bp];
   283         if (ch == '\\') {
   284             convertUnicode();
   285         }
   286     }
   288     /** Read next character in comment, skipping over double '\' characters.
   289      */
   290     private void scanCommentChar() {
   291         scanChar();
   292         if (ch == '\\') {
   293             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   294                 bp++;
   295             } else {
   296                 convertUnicode();
   297             }
   298         }
   299     }
   301     /** Append a character to sbuf.
   302      */
   303     private void putChar(char ch) {
   304         if (sp == sbuf.length) {
   305             char[] newsbuf = new char[sbuf.length * 2];
   306             System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
   307             sbuf = newsbuf;
   308         }
   309         sbuf[sp++] = ch;
   310     }
   312     /** For debugging purposes: print character.
   313      */
   314     private void dch() {
   315         System.err.print(ch); System.out.flush();
   316     }
   318     /** Read next character in character or string literal and copy into sbuf.
   319      */
   320     private void scanLitChar() {
   321         if (ch == '\\') {
   322             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
   323                 bp++;
   324                 putChar('\\');
   325                 scanChar();
   326             } else {
   327                 scanChar();
   328                 switch (ch) {
   329                 case '0': case '1': case '2': case '3':
   330                 case '4': case '5': case '6': case '7':
   331                     char leadch = ch;
   332                     int oct = digit(8);
   333                     scanChar();
   334                     if ('0' <= ch && ch <= '7') {
   335                         oct = oct * 8 + digit(8);
   336                         scanChar();
   337                         if (leadch <= '3' && '0' <= ch && ch <= '7') {
   338                             oct = oct * 8 + digit(8);
   339                             scanChar();
   340                         }
   341                     }
   342                     putChar((char)oct);
   343                     break;
   344                 case 'b':
   345                     putChar('\b'); scanChar(); break;
   346                 case 't':
   347                     putChar('\t'); scanChar(); break;
   348                 case 'n':
   349                     putChar('\n'); scanChar(); break;
   350                 case 'f':
   351                     putChar('\f'); scanChar(); break;
   352                 case 'r':
   353                     putChar('\r'); scanChar(); break;
   354                 case '\'':
   355                     putChar('\''); scanChar(); break;
   356                 case '\"':
   357                     putChar('\"'); scanChar(); break;
   358                 case '\\':
   359                     putChar('\\'); scanChar(); break;
   360                 default:
   361                     lexError(bp, "illegal.esc.char");
   362                 }
   363             }
   364         } else if (bp != buflen) {
   365             putChar(ch); scanChar();
   366         }
   367     }
   369     /** Read fractional part of hexadecimal floating point number.
   370      */
   371     private void scanHexExponentAndSuffix() {
   372         if (ch == 'p' || ch == 'P') {
   373             putChar(ch);
   374             scanChar();
   375             if (ch == '+' || ch == '-') {
   376                 putChar(ch);
   377                 scanChar();
   378             }
   379             if ('0' <= ch && ch <= '9') {
   380                 do {
   381                     putChar(ch);
   382                     scanChar();
   383                 } while ('0' <= ch && ch <= '9');
   384                 if (!allowHexFloats) {
   385                     lexError("unsupported.fp.lit");
   386                     allowHexFloats = true;
   387                 }
   388                 else if (!hexFloatsWork)
   389                     lexError("unsupported.cross.fp.lit");
   390             } else
   391                 lexError("malformed.fp.lit");
   392         } else {
   393             lexError("malformed.fp.lit");
   394         }
   395         if (ch == 'f' || ch == 'F') {
   396             putChar(ch);
   397             scanChar();
   398             token = FLOATLITERAL;
   399         } else {
   400             if (ch == 'd' || ch == 'D') {
   401                 putChar(ch);
   402                 scanChar();
   403             }
   404             token = DOUBLELITERAL;
   405         }
   406     }
   408     /** Read fractional part of floating point number.
   409      */
   410     private void scanFraction() {
   411         while (digit(10) >= 0) {
   412             putChar(ch);
   413             scanChar();
   414         }
   415         int sp1 = sp;
   416         if (ch == 'e' || ch == 'E') {
   417             putChar(ch);
   418             scanChar();
   419             if (ch == '+' || ch == '-') {
   420                 putChar(ch);
   421                 scanChar();
   422             }
   423             if ('0' <= ch && ch <= '9') {
   424                 do {
   425                     putChar(ch);
   426                     scanChar();
   427                 } while ('0' <= ch && ch <= '9');
   428                 return;
   429             }
   430             lexError("malformed.fp.lit");
   431             sp = sp1;
   432         }
   433     }
   435     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   436      */
   437     private void scanFractionAndSuffix() {
   438         this.radix = 10;
   439         scanFraction();
   440         if (ch == 'f' || ch == 'F') {
   441             putChar(ch);
   442             scanChar();
   443             token = FLOATLITERAL;
   444         } else {
   445             if (ch == 'd' || ch == 'D') {
   446                 putChar(ch);
   447                 scanChar();
   448             }
   449             token = DOUBLELITERAL;
   450         }
   451     }
   453     /** Read fractional part and 'd' or 'f' suffix of floating point number.
   454      */
   455     private void scanHexFractionAndSuffix(boolean seendigit) {
   456         this.radix = 16;
   457         assert ch == '.';
   458         putChar(ch);
   459         scanChar();
   460         while (digit(16) >= 0) {
   461             seendigit = true;
   462             putChar(ch);
   463             scanChar();
   464         }
   465         if (!seendigit)
   466             lexError("invalid.hex.number");
   467         else
   468             scanHexExponentAndSuffix();
   469     }
   471     /** Read a number.
   472      *  @param radix  The radix of the number; one of 8, 10, 16.
   473      */
   474     private void scanNumber(int radix) {
   475         this.radix = radix;
   476         // for octal, allow base-10 digit in case it's a float literal
   477         int digitRadix = (radix <= 10) ? 10 : 16;
   478         boolean seendigit = false;
   479         while (digit(digitRadix) >= 0) {
   480             seendigit = true;
   481             putChar(ch);
   482             scanChar();
   483         }
   484         if (radix == 16 && ch == '.') {
   485             scanHexFractionAndSuffix(seendigit);
   486         } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
   487             scanHexExponentAndSuffix();
   488         } else if (radix <= 10 && ch == '.') {
   489             putChar(ch);
   490             scanChar();
   491             scanFractionAndSuffix();
   492         } else if (radix <= 10 &&
   493                    (ch == 'e' || ch == 'E' ||
   494                     ch == 'f' || ch == 'F' ||
   495                     ch == 'd' || ch == 'D')) {
   496             scanFractionAndSuffix();
   497         } else {
   498             if (ch == 'l' || ch == 'L') {
   499                 scanChar();
   500                 token = LONGLITERAL;
   501             } else {
   502                 token = INTLITERAL;
   503             }
   504         }
   505     }
   507     /** Read an identifier.
   508      */
   509     private void scanIdent() {
   510         boolean isJavaIdentifierPart;
   511         char high;
   512         do {
   513             if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
   514             // optimization, was: putChar(ch);
   516             scanChar();
   517             switch (ch) {
   518             case 'A': case 'B': case 'C': case 'D': case 'E':
   519             case 'F': case 'G': case 'H': case 'I': case 'J':
   520             case 'K': case 'L': case 'M': case 'N': case 'O':
   521             case 'P': case 'Q': case 'R': case 'S': case 'T':
   522             case 'U': case 'V': case 'W': case 'X': case 'Y':
   523             case 'Z':
   524             case 'a': case 'b': case 'c': case 'd': case 'e':
   525             case 'f': case 'g': case 'h': case 'i': case 'j':
   526             case 'k': case 'l': case 'm': case 'n': case 'o':
   527             case 'p': case 'q': case 'r': case 's': case 't':
   528             case 'u': case 'v': case 'w': case 'x': case 'y':
   529             case 'z':
   530             case '$': case '_':
   531             case '0': case '1': case '2': case '3': case '4':
   532             case '5': case '6': case '7': case '8': case '9':
   533             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
   534             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
   535             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
   536             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
   537             case '\u0015': case '\u0016': case '\u0017':
   538             case '\u0018': case '\u0019': case '\u001B':
   539             case '\u007F':
   540                 break;
   541             case '\u001A': // EOI is also a legal identifier part
   542                 if (bp >= buflen) {
   543                     name = names.fromChars(sbuf, 0, sp);
   544                     token = keywords.key(name);
   545                     return;
   546                 }
   547                 break;
   548             default:
   549                 if (ch < '\u0080') {
   550                     // all ASCII range chars already handled, above
   551                     isJavaIdentifierPart = false;
   552                 } else {
   553                     high = scanSurrogates();
   554                     if (high != 0) {
   555                         if (sp == sbuf.length) {
   556                             putChar(high);
   557                         } else {
   558                             sbuf[sp++] = high;
   559                         }
   560                         isJavaIdentifierPart = Character.isJavaIdentifierPart(
   561                             Character.toCodePoint(high, ch));
   562                     } else {
   563                         isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
   564                     }
   565                 }
   566                 if (!isJavaIdentifierPart) {
   567                     name = names.fromChars(sbuf, 0, sp);
   568                     token = keywords.key(name);
   569                     return;
   570                 }
   571             }
   572         } while (true);
   573     }
   575     /** Are surrogates supported?
   576      */
   577     final static boolean surrogatesSupported = surrogatesSupported();
   578     private static boolean surrogatesSupported() {
   579         try {
   580             Character.isHighSurrogate('a');
   581             return true;
   582         } catch (NoSuchMethodError ex) {
   583             return false;
   584         }
   585     }
   587     /** Scan surrogate pairs.  If 'ch' is a high surrogate and
   588      *  the next character is a low surrogate, then put the low
   589      *  surrogate in 'ch', and return the high surrogate.
   590      *  otherwise, just return 0.
   591      */
   592     private char scanSurrogates() {
   593         if (surrogatesSupported && Character.isHighSurrogate(ch)) {
   594             char high = ch;
   596             scanChar();
   598             if (Character.isLowSurrogate(ch)) {
   599                 return high;
   600             }
   602             ch = high;
   603         }
   605         return 0;
   606     }
   608     /** Return true if ch can be part of an operator.
   609      */
   610     private boolean isSpecial(char ch) {
   611         switch (ch) {
   612         case '!': case '%': case '&': case '*': case '?':
   613         case '+': case '-': case ':': case '<': case '=':
   614         case '>': case '^': case '|': case '~':
   615         case '@':
   616             return true;
   617         default:
   618             return false;
   619         }
   620     }
   622     /** Read longest possible sequence of special characters and convert
   623      *  to token.
   624      */
   625     private void scanOperator() {
   626         while (true) {
   627             putChar(ch);
   628             Name newname = names.fromChars(sbuf, 0, sp);
   629             if (keywords.key(newname) == IDENTIFIER) {
   630                 sp--;
   631                 break;
   632             }
   633             name = newname;
   634             token = keywords.key(newname);
   635             scanChar();
   636             if (!isSpecial(ch)) break;
   637         }
   638     }
   640     /**
   641      * Scan a documention comment; determine if a deprecated tag is present.
   642      * Called once the initial /, * have been skipped, positioned at the second *
   643      * (which is treated as the beginning of the first line).
   644      * Stops positioned at the closing '/'.
   645      */
   646     @SuppressWarnings("fallthrough")
   647     private void scanDocComment() {
   648         boolean deprecatedPrefix = false;
   650         forEachLine:
   651         while (bp < buflen) {
   653             // Skip optional WhiteSpace at beginning of line
   654             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   655                 scanCommentChar();
   656             }
   658             // Skip optional consecutive Stars
   659             while (bp < buflen && ch == '*') {
   660                 scanCommentChar();
   661                 if (ch == '/') {
   662                     return;
   663                 }
   664             }
   666             // Skip optional WhiteSpace after Stars
   667             while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
   668                 scanCommentChar();
   669             }
   671             deprecatedPrefix = false;
   672             // At beginning of line in the JavaDoc sense.
   673             if (bp < buflen && ch == '@' && !deprecatedFlag) {
   674                 scanCommentChar();
   675                 if (bp < buflen && ch == 'd') {
   676                     scanCommentChar();
   677                     if (bp < buflen && ch == 'e') {
   678                         scanCommentChar();
   679                         if (bp < buflen && ch == 'p') {
   680                             scanCommentChar();
   681                             if (bp < buflen && ch == 'r') {
   682                                 scanCommentChar();
   683                                 if (bp < buflen && ch == 'e') {
   684                                     scanCommentChar();
   685                                     if (bp < buflen && ch == 'c') {
   686                                         scanCommentChar();
   687                                         if (bp < buflen && ch == 'a') {
   688                                             scanCommentChar();
   689                                             if (bp < buflen && ch == 't') {
   690                                                 scanCommentChar();
   691                                                 if (bp < buflen && ch == 'e') {
   692                                                     scanCommentChar();
   693                                                     if (bp < buflen && ch == 'd') {
   694                                                         deprecatedPrefix = true;
   695                                                         scanCommentChar();
   696                                                     }}}}}}}}}}}
   697             if (deprecatedPrefix && bp < buflen) {
   698                 if (Character.isWhitespace(ch)) {
   699                     deprecatedFlag = true;
   700                 } else if (ch == '*') {
   701                     scanCommentChar();
   702                     if (ch == '/') {
   703                         deprecatedFlag = true;
   704                         return;
   705                     }
   706                 }
   707             }
   709             // Skip rest of line
   710             while (bp < buflen) {
   711                 switch (ch) {
   712                 case '*':
   713                     scanCommentChar();
   714                     if (ch == '/') {
   715                         return;
   716                     }
   717                     break;
   718                 case CR: // (Spec 3.4)
   719                     scanCommentChar();
   720                     if (ch != LF) {
   721                         continue forEachLine;
   722                     }
   723                     /* fall through to LF case */
   724                 case LF: // (Spec 3.4)
   725                     scanCommentChar();
   726                     continue forEachLine;
   727                 default:
   728                     scanCommentChar();
   729                 }
   730             } // rest of line
   731         } // forEachLine
   732         return;
   733     }
   735     /** The value of a literal token, recorded as a string.
   736      *  For integers, leading 0x and 'l' suffixes are suppressed.
   737      */
   738     public String stringVal() {
   739         return new String(sbuf, 0, sp);
   740     }
   742     /** Read token.
   743      */
   744     public void nextToken() {
   746         try {
   747             prevEndPos = endPos;
   748             sp = 0;
   750             while (true) {
   751                 pos = bp;
   752                 switch (ch) {
   753                 case ' ': // (Spec 3.6)
   754                 case '\t': // (Spec 3.6)
   755                 case FF: // (Spec 3.6)
   756                     do {
   757                         scanChar();
   758                     } while (ch == ' ' || ch == '\t' || ch == FF);
   759                     endPos = bp;
   760                     processWhiteSpace();
   761                     break;
   762                 case LF: // (Spec 3.4)
   763                     scanChar();
   764                     endPos = bp;
   765                     processLineTerminator();
   766                     break;
   767                 case CR: // (Spec 3.4)
   768                     scanChar();
   769                     if (ch == LF) {
   770                         scanChar();
   771                     }
   772                     endPos = bp;
   773                     processLineTerminator();
   774                     break;
   775                 case 'A': case 'B': case 'C': case 'D': case 'E':
   776                 case 'F': case 'G': case 'H': case 'I': case 'J':
   777                 case 'K': case 'L': case 'M': case 'N': case 'O':
   778                 case 'P': case 'Q': case 'R': case 'S': case 'T':
   779                 case 'U': case 'V': case 'W': case 'X': case 'Y':
   780                 case 'Z':
   781                 case 'a': case 'b': case 'c': case 'd': case 'e':
   782                 case 'f': case 'g': case 'h': case 'i': case 'j':
   783                 case 'k': case 'l': case 'm': case 'n': case 'o':
   784                 case 'p': case 'q': case 'r': case 's': case 't':
   785                 case 'u': case 'v': case 'w': case 'x': case 'y':
   786                 case 'z':
   787                 case '$': case '_':
   788                     scanIdent();
   789                     return;
   790                 case '0':
   791                     scanChar();
   792                     if (ch == 'x' || ch == 'X') {
   793                         scanChar();
   794                         if (ch == '.') {
   795                             scanHexFractionAndSuffix(false);
   796                         } else if (digit(16) < 0) {
   797                             lexError("invalid.hex.number");
   798                         } else {
   799                             scanNumber(16);
   800                         }
   801                     } else {
   802                         putChar('0');
   803                         scanNumber(8);
   804                     }
   805                     return;
   806                 case '1': case '2': case '3': case '4':
   807                 case '5': case '6': case '7': case '8': case '9':
   808                     scanNumber(10);
   809                     return;
   810                 case '.':
   811                     scanChar();
   812                     if ('0' <= ch && ch <= '9') {
   813                         putChar('.');
   814                         scanFractionAndSuffix();
   815                     } else if (ch == '.') {
   816                         putChar('.'); putChar('.');
   817                         scanChar();
   818                         if (ch == '.') {
   819                             scanChar();
   820                             putChar('.');
   821                             token = ELLIPSIS;
   822                         } else {
   823                             lexError("malformed.fp.lit");
   824                         }
   825                     } else {
   826                         token = DOT;
   827                     }
   828                     return;
   829                 case ',':
   830                     scanChar(); token = COMMA; return;
   831                 case ';':
   832                     scanChar(); token = SEMI; return;
   833                 case '(':
   834                     scanChar(); token = LPAREN; return;
   835                 case ')':
   836                     scanChar(); token = RPAREN; return;
   837                 case '[':
   838                     scanChar(); token = LBRACKET; return;
   839                 case ']':
   840                     scanChar(); token = RBRACKET; return;
   841                 case '{':
   842                     scanChar(); token = LBRACE; return;
   843                 case '}':
   844                     scanChar(); token = RBRACE; return;
   845                 case '/':
   846                     scanChar();
   847                     if (ch == '/') {
   848                         do {
   849                             scanCommentChar();
   850                         } while (ch != CR && ch != LF && bp < buflen);
   851                         if (bp < buflen) {
   852                             endPos = bp;
   853                             processComment(CommentStyle.LINE);
   854                         }
   855                         break;
   856                     } else if (ch == '*') {
   857                         scanChar();
   858                         CommentStyle style;
   859                         if (ch == '*') {
   860                             style = CommentStyle.JAVADOC;
   861                             scanDocComment();
   862                         } else {
   863                             style = CommentStyle.BLOCK;
   864                             while (bp < buflen) {
   865                                 if (ch == '*') {
   866                                     scanChar();
   867                                     if (ch == '/') break;
   868                                 } else {
   869                                     scanCommentChar();
   870                                 }
   871                             }
   872                         }
   873                         if (ch == '/') {
   874                             scanChar();
   875                             endPos = bp;
   876                             processComment(style);
   877                             break;
   878                         } else {
   879                             lexError("unclosed.comment");
   880                             return;
   881                         }
   882                     } else if (ch == '=') {
   883                         name = names.slashequals;
   884                         token = SLASHEQ;
   885                         scanChar();
   886                     } else {
   887                         name = names.slash;
   888                         token = SLASH;
   889                     }
   890                     return;
   891                 case '\'':
   892                     scanChar();
   893                     if (ch == '\'') {
   894                         lexError("empty.char.lit");
   895                     } else {
   896                         if (ch == CR || ch == LF)
   897                             lexError(pos, "illegal.line.end.in.char.lit");
   898                         scanLitChar();
   899                         if (ch == '\'') {
   900                             scanChar();
   901                             token = CHARLITERAL;
   902                         } else {
   903                             lexError(pos, "unclosed.char.lit");
   904                         }
   905                     }
   906                     return;
   907                 case '\"':
   908                     scanChar();
   909                     while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
   910                         scanLitChar();
   911                     if (ch == '\"') {
   912                         token = STRINGLITERAL;
   913                         scanChar();
   914                     } else {
   915                         lexError(pos, "unclosed.str.lit");
   916                     }
   917                     return;
   918                 default:
   919                     if (isSpecial(ch)) {
   920                         scanOperator();
   921                     } else {
   922                         boolean isJavaIdentifierStart;
   923                         if (ch < '\u0080') {
   924                             // all ASCII range chars already handled, above
   925                             isJavaIdentifierStart = false;
   926                         } else {
   927                             char high = scanSurrogates();
   928                             if (high != 0) {
   929                                 if (sp == sbuf.length) {
   930                                     putChar(high);
   931                                 } else {
   932                                     sbuf[sp++] = high;
   933                                 }
   935                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(
   936                                     Character.toCodePoint(high, ch));
   937                             } else {
   938                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
   939                             }
   940                         }
   941                         if (isJavaIdentifierStart) {
   942                             scanIdent();
   943                         } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
   944                             token = EOF;
   945                             pos = bp = eofPos;
   946                         } else {
   947                             lexError("illegal.char", String.valueOf((int)ch));
   948                             scanChar();
   949                         }
   950                     }
   951                     return;
   952                 }
   953             }
   954         } finally {
   955             endPos = bp;
   956             if (scannerDebug)
   957                 System.out.println("nextToken(" + pos
   958                                    + "," + endPos + ")=|" +
   959                                    new String(getRawCharacters(pos, endPos))
   960                                    + "|");
   961         }
   962     }
   964     /** Return the current token, set by nextToken().
   965      */
   966     public Token token() {
   967         return token;
   968     }
   970     /** Sets the current token.
   971      */
   972     public void token(Token token) {
   973         this.token = token;
   974     }
   976     /** Return the current token's position: a 0-based
   977      *  offset from beginning of the raw input stream
   978      *  (before unicode translation)
   979      */
   980     public int pos() {
   981         return pos;
   982     }
   984     /** Return the last character position of the current token.
   985      */
   986     public int endPos() {
   987         return endPos;
   988     }
   990     /** Return the last character position of the previous token.
   991      */
   992     public int prevEndPos() {
   993         return prevEndPos;
   994     }
   996     /** Return the position where a lexical error occurred;
   997      */
   998     public int errPos() {
   999         return errPos;
  1002     /** Set the position where a lexical error occurred;
  1003      */
  1004     public void errPos(int pos) {
  1005         errPos = pos;
  1008     /** Return the name of an identifier or token for the current token.
  1009      */
  1010     public Name name() {
  1011         return name;
  1014     /** Return the radix of a numeric literal token.
  1015      */
  1016     public int radix() {
  1017         return radix;
  1020     /** Has a @deprecated been encountered in last doc comment?
  1021      *  This needs to be reset by client with resetDeprecatedFlag.
  1022      */
  1023     public boolean deprecatedFlag() {
  1024         return deprecatedFlag;
  1027     public void resetDeprecatedFlag() {
  1028         deprecatedFlag = false;
  1031     /**
  1032      * Returns the documentation string of the current token.
  1033      */
  1034     public String docComment() {
  1035         return null;
  1038     /**
  1039      * Returns a copy of the input buffer, up to its inputLength.
  1040      * Unicode escape sequences are not translated.
  1041      */
  1042     public char[] getRawCharacters() {
  1043         char[] chars = new char[buflen];
  1044         System.arraycopy(buf, 0, chars, 0, buflen);
  1045         return chars;
  1048     /**
  1049      * Returns a copy of a character array subset of the input buffer.
  1050      * The returned array begins at the <code>beginIndex</code> and
  1051      * extends to the character at index <code>endIndex - 1</code>.
  1052      * Thus the length of the substring is <code>endIndex-beginIndex</code>.
  1053      * This behavior is like
  1054      * <code>String.substring(beginIndex, endIndex)</code>.
  1055      * Unicode escape sequences are not translated.
  1057      * @param beginIndex the beginning index, inclusive.
  1058      * @param endIndex the ending index, exclusive.
  1059      * @throws IndexOutOfBounds if either offset is outside of the
  1060      *         array bounds
  1061      */
  1062     public char[] getRawCharacters(int beginIndex, int endIndex) {
  1063         int length = endIndex - beginIndex;
  1064         char[] chars = new char[length];
  1065         System.arraycopy(buf, beginIndex, chars, 0, length);
  1066         return chars;
  1069     public enum CommentStyle {
  1070         LINE,
  1071         BLOCK,
  1072         JAVADOC,
  1075     /**
  1076      * Called when a complete comment has been scanned. pos and endPos
  1077      * will mark the comment boundary.
  1078      */
  1079     protected void processComment(CommentStyle style) {
  1080         if (scannerDebug)
  1081             System.out.println("processComment(" + pos
  1082                                + "," + endPos + "," + style + ")=|"
  1083                                + new String(getRawCharacters(pos, endPos))
  1084                                + "|");
  1087     /**
  1088      * Called when a complete whitespace run has been scanned. pos and endPos
  1089      * will mark the whitespace boundary.
  1090      */
  1091     protected void processWhiteSpace() {
  1092         if (scannerDebug)
  1093             System.out.println("processWhitespace(" + pos
  1094                                + "," + endPos + ")=|" +
  1095                                new String(getRawCharacters(pos, endPos))
  1096                                + "|");
  1099     /**
  1100      * Called when a line terminator has been processed.
  1101      */
  1102     protected void processLineTerminator() {
  1103         if (scannerDebug)
  1104             System.out.println("processTerminator(" + pos
  1105                                + "," + endPos + ")=|" +
  1106                                new String(getRawCharacters(pos, endPos))
  1107                                + "|");
  1110     /** Build a map for translating between line numbers and
  1111      * positions in the input.
  1113      * @return a LineMap */
  1114     public Position.LineMap getLineMap() {
  1115         return Position.makeLineMap(buf, buflen, false);

mercurial