Fri, 16 Jul 2010 19:35:24 -0700
6911256: Project Coin: Support Automatic Resource Management (ARM) blocks in the compiler
6964740: Project Coin: More tests for ARM compiler changes
6965277: Project Coin: Correctness issues in ARM implementation
6967065: add -Xlint warning category for Automatic Resource Management (ARM)
Reviewed-by: jjb, darcy, mcimadamore, jjg, briangoetz
Contributed-by: tball@google.com
duke@1 | 1 | /* |
ohair@554 | 2 | * Copyright (c) 1999, 2008, Oracle and/or its affiliates. All rights reserved. |
duke@1 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@1 | 4 | * |
duke@1 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@1 | 6 | * under the terms of the GNU General Public License version 2 only, as |
ohair@554 | 7 | * published by the Free Software Foundation. Oracle designates this |
duke@1 | 8 | * particular file as subject to the "Classpath" exception as provided |
ohair@554 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
duke@1 | 10 | * |
duke@1 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@1 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@1 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@1 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@1 | 15 | * accompanied this code). |
duke@1 | 16 | * |
duke@1 | 17 | * You should have received a copy of the GNU General Public License version |
duke@1 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@1 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@1 | 20 | * |
ohair@554 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
ohair@554 | 22 | * or visit www.oracle.com if you need additional information or have any |
ohair@554 | 23 | * questions. |
duke@1 | 24 | */ |
duke@1 | 25 | |
duke@1 | 26 | package com.sun.tools.javac.parser; |
duke@1 | 27 | |
duke@1 | 28 | import java.nio.*; |
duke@1 | 29 | |
jjg@50 | 30 | import com.sun.tools.javac.code.Source; |
jjg@50 | 31 | import com.sun.tools.javac.file.JavacFileManager; |
duke@1 | 32 | import com.sun.tools.javac.util.*; |
duke@1 | 33 | |
duke@1 | 34 | |
duke@1 | 35 | import static com.sun.tools.javac.parser.Token.*; |
duke@1 | 36 | import static com.sun.tools.javac.util.LayoutCharacters.*; |
duke@1 | 37 | |
duke@1 | 38 | /** The lexical analyzer maps an input stream consisting of |
duke@1 | 39 | * ASCII characters and Unicode escapes into a token sequence. |
duke@1 | 40 | * |
jjg@581 | 41 | * <p><b>This is NOT part of any supported API. |
jjg@581 | 42 | * If you write code that depends on this, you do so at your own risk. |
duke@1 | 43 | * This code and its internal interfaces are subject to change or |
duke@1 | 44 | * deletion without notice.</b> |
duke@1 | 45 | */ |
duke@1 | 46 | public class Scanner implements Lexer { |
duke@1 | 47 | |
duke@1 | 48 | private static boolean scannerDebug = false; |
duke@1 | 49 | |
duke@1 | 50 | /** A factory for creating scanners. */ |
duke@1 | 51 | public static class Factory { |
duke@1 | 52 | /** The context key for the scanner factory. */ |
duke@1 | 53 | public static final Context.Key<Scanner.Factory> scannerFactoryKey = |
duke@1 | 54 | new Context.Key<Scanner.Factory>(); |
duke@1 | 55 | |
duke@1 | 56 | /** Get the Factory instance for this context. */ |
duke@1 | 57 | public static Factory instance(Context context) { |
duke@1 | 58 | Factory instance = context.get(scannerFactoryKey); |
duke@1 | 59 | if (instance == null) |
duke@1 | 60 | instance = new Factory(context); |
duke@1 | 61 | return instance; |
duke@1 | 62 | } |
duke@1 | 63 | |
duke@1 | 64 | final Log log; |
jjg@113 | 65 | final Names names; |
duke@1 | 66 | final Source source; |
duke@1 | 67 | final Keywords keywords; |
duke@1 | 68 | |
duke@1 | 69 | /** Create a new scanner factory. */ |
duke@1 | 70 | protected Factory(Context context) { |
duke@1 | 71 | context.put(scannerFactoryKey, this); |
duke@1 | 72 | this.log = Log.instance(context); |
jjg@113 | 73 | this.names = Names.instance(context); |
duke@1 | 74 | this.source = Source.instance(context); |
duke@1 | 75 | this.keywords = Keywords.instance(context); |
duke@1 | 76 | } |
duke@1 | 77 | |
duke@1 | 78 | public Scanner newScanner(CharSequence input) { |
duke@1 | 79 | if (input instanceof CharBuffer) { |
duke@1 | 80 | return new Scanner(this, (CharBuffer)input); |
duke@1 | 81 | } else { |
duke@1 | 82 | char[] array = input.toString().toCharArray(); |
duke@1 | 83 | return newScanner(array, array.length); |
duke@1 | 84 | } |
duke@1 | 85 | } |
duke@1 | 86 | |
duke@1 | 87 | public Scanner newScanner(char[] input, int inputLength) { |
duke@1 | 88 | return new Scanner(this, input, inputLength); |
duke@1 | 89 | } |
duke@1 | 90 | } |
duke@1 | 91 | |
duke@1 | 92 | /* Output variables; set by nextToken(): |
duke@1 | 93 | */ |
duke@1 | 94 | |
duke@1 | 95 | /** The token, set by nextToken(). |
duke@1 | 96 | */ |
duke@1 | 97 | private Token token; |
duke@1 | 98 | |
duke@1 | 99 | /** Allow hex floating-point literals. |
duke@1 | 100 | */ |
duke@1 | 101 | private boolean allowHexFloats; |
duke@1 | 102 | |
jjg@409 | 103 | /** Allow binary literals. |
jjg@409 | 104 | */ |
jjg@409 | 105 | private boolean allowBinaryLiterals; |
jjg@409 | 106 | |
jjg@409 | 107 | /** Allow underscores in literals. |
jjg@409 | 108 | */ |
jjg@409 | 109 | private boolean allowUnderscoresInLiterals; |
jjg@409 | 110 | |
jjg@409 | 111 | /** The source language setting. |
jjg@409 | 112 | */ |
jjg@409 | 113 | private Source source; |
jjg@409 | 114 | |
duke@1 | 115 | /** The token's position, 0-based offset from beginning of text. |
duke@1 | 116 | */ |
duke@1 | 117 | private int pos; |
duke@1 | 118 | |
duke@1 | 119 | /** Character position just after the last character of the token. |
duke@1 | 120 | */ |
duke@1 | 121 | private int endPos; |
duke@1 | 122 | |
duke@1 | 123 | /** The last character position of the previous token. |
duke@1 | 124 | */ |
duke@1 | 125 | private int prevEndPos; |
duke@1 | 126 | |
duke@1 | 127 | /** The position where a lexical error occurred; |
duke@1 | 128 | */ |
duke@1 | 129 | private int errPos = Position.NOPOS; |
duke@1 | 130 | |
duke@1 | 131 | /** The name of an identifier or token: |
duke@1 | 132 | */ |
duke@1 | 133 | private Name name; |
duke@1 | 134 | |
duke@1 | 135 | /** The radix of a numeric literal token. |
duke@1 | 136 | */ |
duke@1 | 137 | private int radix; |
duke@1 | 138 | |
duke@1 | 139 | /** Has a @deprecated been encountered in last doc comment? |
duke@1 | 140 | * this needs to be reset by client. |
duke@1 | 141 | */ |
duke@1 | 142 | protected boolean deprecatedFlag = false; |
duke@1 | 143 | |
duke@1 | 144 | /** A character buffer for literals. |
duke@1 | 145 | */ |
duke@1 | 146 | private char[] sbuf = new char[128]; |
duke@1 | 147 | private int sp; |
duke@1 | 148 | |
duke@1 | 149 | /** The input buffer, index of next chacter to be read, |
duke@1 | 150 | * index of one past last character in buffer. |
duke@1 | 151 | */ |
duke@1 | 152 | private char[] buf; |
duke@1 | 153 | private int bp; |
duke@1 | 154 | private int buflen; |
duke@1 | 155 | private int eofPos; |
duke@1 | 156 | |
duke@1 | 157 | /** The current character. |
duke@1 | 158 | */ |
duke@1 | 159 | private char ch; |
duke@1 | 160 | |
duke@1 | 161 | /** The buffer index of the last converted unicode character |
duke@1 | 162 | */ |
duke@1 | 163 | private int unicodeConversionBp = -1; |
duke@1 | 164 | |
duke@1 | 165 | /** The log to be used for error reporting. |
duke@1 | 166 | */ |
duke@1 | 167 | private final Log log; |
duke@1 | 168 | |
duke@1 | 169 | /** The name table. */ |
jjg@113 | 170 | private final Names names; |
duke@1 | 171 | |
duke@1 | 172 | /** The keyword table. */ |
duke@1 | 173 | private final Keywords keywords; |
duke@1 | 174 | |
duke@1 | 175 | /** Common code for constructors. */ |
duke@1 | 176 | private Scanner(Factory fac) { |
jjg@409 | 177 | log = fac.log; |
jjg@409 | 178 | names = fac.names; |
jjg@409 | 179 | keywords = fac.keywords; |
jjg@409 | 180 | source = fac.source; |
jjg@409 | 181 | allowBinaryLiterals = source.allowBinaryLiterals(); |
jjg@409 | 182 | allowHexFloats = source.allowHexFloats(); |
jjg@409 | 183 | allowUnderscoresInLiterals = source.allowBinaryLiterals(); |
duke@1 | 184 | } |
duke@1 | 185 | |
duke@1 | 186 | private static final boolean hexFloatsWork = hexFloatsWork(); |
duke@1 | 187 | private static boolean hexFloatsWork() { |
duke@1 | 188 | try { |
duke@1 | 189 | Float.valueOf("0x1.0p1"); |
duke@1 | 190 | return true; |
duke@1 | 191 | } catch (NumberFormatException ex) { |
duke@1 | 192 | return false; |
duke@1 | 193 | } |
duke@1 | 194 | } |
duke@1 | 195 | |
duke@1 | 196 | /** Create a scanner from the input buffer. buffer must implement |
duke@1 | 197 | * array() and compact(), and remaining() must be less than limit(). |
duke@1 | 198 | */ |
duke@1 | 199 | protected Scanner(Factory fac, CharBuffer buffer) { |
duke@1 | 200 | this(fac, JavacFileManager.toArray(buffer), buffer.limit()); |
duke@1 | 201 | } |
duke@1 | 202 | |
duke@1 | 203 | /** |
duke@1 | 204 | * Create a scanner from the input array. This method might |
duke@1 | 205 | * modify the array. To avoid copying the input array, ensure |
duke@1 | 206 | * that {@code inputLength < input.length} or |
duke@1 | 207 | * {@code input[input.length -1]} is a white space character. |
duke@1 | 208 | * |
duke@1 | 209 | * @param fac the factory which created this Scanner |
duke@1 | 210 | * @param input the input, might be modified |
duke@1 | 211 | * @param inputLength the size of the input. |
duke@1 | 212 | * Must be positive and less than or equal to input.length. |
duke@1 | 213 | */ |
duke@1 | 214 | protected Scanner(Factory fac, char[] input, int inputLength) { |
duke@1 | 215 | this(fac); |
duke@1 | 216 | eofPos = inputLength; |
duke@1 | 217 | if (inputLength == input.length) { |
duke@1 | 218 | if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { |
duke@1 | 219 | inputLength--; |
duke@1 | 220 | } else { |
duke@1 | 221 | char[] newInput = new char[inputLength + 1]; |
duke@1 | 222 | System.arraycopy(input, 0, newInput, 0, input.length); |
duke@1 | 223 | input = newInput; |
duke@1 | 224 | } |
duke@1 | 225 | } |
duke@1 | 226 | buf = input; |
duke@1 | 227 | buflen = inputLength; |
duke@1 | 228 | buf[buflen] = EOI; |
duke@1 | 229 | bp = -1; |
duke@1 | 230 | scanChar(); |
duke@1 | 231 | } |
duke@1 | 232 | |
duke@1 | 233 | /** Report an error at the given position using the provided arguments. |
duke@1 | 234 | */ |
duke@1 | 235 | private void lexError(int pos, String key, Object... args) { |
duke@1 | 236 | log.error(pos, key, args); |
duke@1 | 237 | token = ERROR; |
duke@1 | 238 | errPos = pos; |
duke@1 | 239 | } |
duke@1 | 240 | |
duke@1 | 241 | /** Report an error at the current token position using the provided |
duke@1 | 242 | * arguments. |
duke@1 | 243 | */ |
duke@1 | 244 | private void lexError(String key, Object... args) { |
duke@1 | 245 | lexError(pos, key, args); |
duke@1 | 246 | } |
duke@1 | 247 | |
duke@1 | 248 | /** Convert an ASCII digit from its base (8, 10, or 16) |
duke@1 | 249 | * to its value. |
duke@1 | 250 | */ |
duke@1 | 251 | private int digit(int base) { |
duke@1 | 252 | char c = ch; |
duke@1 | 253 | int result = Character.digit(c, base); |
duke@1 | 254 | if (result >= 0 && c > 0x7f) { |
duke@1 | 255 | lexError(pos+1, "illegal.nonascii.digit"); |
duke@1 | 256 | ch = "0123456789abcdef".charAt(result); |
duke@1 | 257 | } |
duke@1 | 258 | return result; |
duke@1 | 259 | } |
duke@1 | 260 | |
duke@1 | 261 | /** Convert unicode escape; bp points to initial '\' character |
duke@1 | 262 | * (Spec 3.3). |
duke@1 | 263 | */ |
duke@1 | 264 | private void convertUnicode() { |
duke@1 | 265 | if (ch == '\\' && unicodeConversionBp != bp) { |
duke@1 | 266 | bp++; ch = buf[bp]; |
duke@1 | 267 | if (ch == 'u') { |
duke@1 | 268 | do { |
duke@1 | 269 | bp++; ch = buf[bp]; |
duke@1 | 270 | } while (ch == 'u'); |
duke@1 | 271 | int limit = bp + 3; |
duke@1 | 272 | if (limit < buflen) { |
duke@1 | 273 | int d = digit(16); |
duke@1 | 274 | int code = d; |
duke@1 | 275 | while (bp < limit && d >= 0) { |
duke@1 | 276 | bp++; ch = buf[bp]; |
duke@1 | 277 | d = digit(16); |
duke@1 | 278 | code = (code << 4) + d; |
duke@1 | 279 | } |
duke@1 | 280 | if (d >= 0) { |
duke@1 | 281 | ch = (char)code; |
duke@1 | 282 | unicodeConversionBp = bp; |
duke@1 | 283 | return; |
duke@1 | 284 | } |
duke@1 | 285 | } |
duke@1 | 286 | lexError(bp, "illegal.unicode.esc"); |
duke@1 | 287 | } else { |
duke@1 | 288 | bp--; |
duke@1 | 289 | ch = '\\'; |
duke@1 | 290 | } |
duke@1 | 291 | } |
duke@1 | 292 | } |
duke@1 | 293 | |
duke@1 | 294 | /** Read next character. |
duke@1 | 295 | */ |
duke@1 | 296 | private void scanChar() { |
duke@1 | 297 | ch = buf[++bp]; |
duke@1 | 298 | if (ch == '\\') { |
duke@1 | 299 | convertUnicode(); |
duke@1 | 300 | } |
duke@1 | 301 | } |
duke@1 | 302 | |
duke@1 | 303 | /** Read next character in comment, skipping over double '\' characters. |
duke@1 | 304 | */ |
duke@1 | 305 | private void scanCommentChar() { |
duke@1 | 306 | scanChar(); |
duke@1 | 307 | if (ch == '\\') { |
duke@1 | 308 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
duke@1 | 309 | bp++; |
duke@1 | 310 | } else { |
duke@1 | 311 | convertUnicode(); |
duke@1 | 312 | } |
duke@1 | 313 | } |
duke@1 | 314 | } |
duke@1 | 315 | |
duke@1 | 316 | /** Append a character to sbuf. |
duke@1 | 317 | */ |
duke@1 | 318 | private void putChar(char ch) { |
duke@1 | 319 | if (sp == sbuf.length) { |
duke@1 | 320 | char[] newsbuf = new char[sbuf.length * 2]; |
duke@1 | 321 | System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); |
duke@1 | 322 | sbuf = newsbuf; |
duke@1 | 323 | } |
duke@1 | 324 | sbuf[sp++] = ch; |
duke@1 | 325 | } |
duke@1 | 326 | |
duke@1 | 327 | /** For debugging purposes: print character. |
duke@1 | 328 | */ |
duke@1 | 329 | private void dch() { |
duke@1 | 330 | System.err.print(ch); System.out.flush(); |
duke@1 | 331 | } |
duke@1 | 332 | |
duke@1 | 333 | /** Read next character in character or string literal and copy into sbuf. |
duke@1 | 334 | */ |
jrose@267 | 335 | private void scanLitChar(boolean forBytecodeName) { |
duke@1 | 336 | if (ch == '\\') { |
duke@1 | 337 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
duke@1 | 338 | bp++; |
duke@1 | 339 | putChar('\\'); |
duke@1 | 340 | scanChar(); |
duke@1 | 341 | } else { |
duke@1 | 342 | scanChar(); |
duke@1 | 343 | switch (ch) { |
duke@1 | 344 | case '0': case '1': case '2': case '3': |
duke@1 | 345 | case '4': case '5': case '6': case '7': |
duke@1 | 346 | char leadch = ch; |
duke@1 | 347 | int oct = digit(8); |
duke@1 | 348 | scanChar(); |
duke@1 | 349 | if ('0' <= ch && ch <= '7') { |
duke@1 | 350 | oct = oct * 8 + digit(8); |
duke@1 | 351 | scanChar(); |
duke@1 | 352 | if (leadch <= '3' && '0' <= ch && ch <= '7') { |
duke@1 | 353 | oct = oct * 8 + digit(8); |
duke@1 | 354 | scanChar(); |
duke@1 | 355 | } |
duke@1 | 356 | } |
duke@1 | 357 | putChar((char)oct); |
duke@1 | 358 | break; |
duke@1 | 359 | case 'b': |
duke@1 | 360 | putChar('\b'); scanChar(); break; |
duke@1 | 361 | case 't': |
duke@1 | 362 | putChar('\t'); scanChar(); break; |
duke@1 | 363 | case 'n': |
duke@1 | 364 | putChar('\n'); scanChar(); break; |
duke@1 | 365 | case 'f': |
duke@1 | 366 | putChar('\f'); scanChar(); break; |
duke@1 | 367 | case 'r': |
duke@1 | 368 | putChar('\r'); scanChar(); break; |
duke@1 | 369 | case '\'': |
duke@1 | 370 | putChar('\''); scanChar(); break; |
duke@1 | 371 | case '\"': |
duke@1 | 372 | putChar('\"'); scanChar(); break; |
duke@1 | 373 | case '\\': |
duke@1 | 374 | putChar('\\'); scanChar(); break; |
jrose@267 | 375 | case '|': case ',': case '?': case '%': |
jrose@267 | 376 | case '^': case '_': case '{': case '}': |
jrose@267 | 377 | case '!': case '-': case '=': |
jrose@267 | 378 | if (forBytecodeName) { |
jrose@267 | 379 | // Accept escape sequences for dangerous bytecode chars. |
jrose@267 | 380 | // This is illegal in normal Java string or character literals. |
jrose@267 | 381 | // Note that the escape sequence itself is passed through. |
jrose@267 | 382 | putChar('\\'); putChar(ch); scanChar(); |
jrose@267 | 383 | } else { |
jrose@267 | 384 | lexError(bp, "illegal.esc.char"); |
jrose@267 | 385 | } |
jrose@267 | 386 | break; |
duke@1 | 387 | default: |
duke@1 | 388 | lexError(bp, "illegal.esc.char"); |
duke@1 | 389 | } |
duke@1 | 390 | } |
duke@1 | 391 | } else if (bp != buflen) { |
duke@1 | 392 | putChar(ch); scanChar(); |
duke@1 | 393 | } |
duke@1 | 394 | } |
jrose@267 | 395 | private void scanLitChar() { |
jrose@267 | 396 | scanLitChar(false); |
jrose@267 | 397 | } |
jrose@267 | 398 | |
jrose@267 | 399 | /** Read next character in an exotic name #"foo" |
jrose@267 | 400 | */ |
jrose@267 | 401 | private void scanBytecodeNameChar() { |
jrose@267 | 402 | switch (ch) { |
jrose@267 | 403 | // reject any "dangerous" char which is illegal somewhere in the JVM spec |
jrose@267 | 404 | // cf. http://blogs.sun.com/jrose/entry/symbolic_freedom_in_the_vm |
jrose@267 | 405 | case '/': case '.': case ';': // illegal everywhere |
jrose@267 | 406 | case '<': case '>': // illegal in methods, dangerous in classes |
jrose@267 | 407 | case '[': // illegal in classes |
jrose@267 | 408 | lexError(bp, "illegal.bytecode.ident.char", String.valueOf((int)ch)); |
jrose@267 | 409 | break; |
jrose@267 | 410 | } |
jrose@267 | 411 | scanLitChar(true); |
jrose@267 | 412 | } |
duke@1 | 413 | |
jjg@409 | 414 | private void scanDigits(int digitRadix) { |
jjg@409 | 415 | char saveCh; |
jjg@409 | 416 | int savePos; |
jjg@409 | 417 | do { |
jjg@409 | 418 | if (ch != '_') { |
jjg@409 | 419 | putChar(ch); |
jjg@409 | 420 | } else { |
jjg@409 | 421 | if (!allowUnderscoresInLiterals) { |
jjg@597 | 422 | lexError("unsupported.underscore.lit", source.name); |
jjg@409 | 423 | allowUnderscoresInLiterals = true; |
jjg@409 | 424 | } |
jjg@409 | 425 | } |
jjg@409 | 426 | saveCh = ch; |
jjg@409 | 427 | savePos = bp; |
jjg@409 | 428 | scanChar(); |
jjg@409 | 429 | } while (digit(digitRadix) >= 0 || ch == '_'); |
jjg@409 | 430 | if (saveCh == '_') |
jjg@409 | 431 | lexError(savePos, "illegal.underscore"); |
jjg@409 | 432 | } |
jjg@409 | 433 | |
duke@1 | 434 | /** Read fractional part of hexadecimal floating point number. |
duke@1 | 435 | */ |
duke@1 | 436 | private void scanHexExponentAndSuffix() { |
duke@1 | 437 | if (ch == 'p' || ch == 'P') { |
duke@1 | 438 | putChar(ch); |
duke@1 | 439 | scanChar(); |
jjg@409 | 440 | skipIllegalUnderscores(); |
duke@1 | 441 | if (ch == '+' || ch == '-') { |
duke@1 | 442 | putChar(ch); |
duke@1 | 443 | scanChar(); |
duke@1 | 444 | } |
jjg@409 | 445 | skipIllegalUnderscores(); |
duke@1 | 446 | if ('0' <= ch && ch <= '9') { |
jjg@409 | 447 | scanDigits(10); |
duke@1 | 448 | if (!allowHexFloats) { |
jjg@409 | 449 | lexError("unsupported.fp.lit", source.name); |
duke@1 | 450 | allowHexFloats = true; |
duke@1 | 451 | } |
duke@1 | 452 | else if (!hexFloatsWork) |
duke@1 | 453 | lexError("unsupported.cross.fp.lit"); |
duke@1 | 454 | } else |
duke@1 | 455 | lexError("malformed.fp.lit"); |
duke@1 | 456 | } else { |
duke@1 | 457 | lexError("malformed.fp.lit"); |
duke@1 | 458 | } |
duke@1 | 459 | if (ch == 'f' || ch == 'F') { |
duke@1 | 460 | putChar(ch); |
duke@1 | 461 | scanChar(); |
duke@1 | 462 | token = FLOATLITERAL; |
duke@1 | 463 | } else { |
duke@1 | 464 | if (ch == 'd' || ch == 'D') { |
duke@1 | 465 | putChar(ch); |
duke@1 | 466 | scanChar(); |
duke@1 | 467 | } |
duke@1 | 468 | token = DOUBLELITERAL; |
duke@1 | 469 | } |
duke@1 | 470 | } |
duke@1 | 471 | |
duke@1 | 472 | /** Read fractional part of floating point number. |
duke@1 | 473 | */ |
duke@1 | 474 | private void scanFraction() { |
jjg@409 | 475 | skipIllegalUnderscores(); |
jjg@409 | 476 | if ('0' <= ch && ch <= '9') { |
jjg@409 | 477 | scanDigits(10); |
duke@1 | 478 | } |
duke@1 | 479 | int sp1 = sp; |
duke@1 | 480 | if (ch == 'e' || ch == 'E') { |
duke@1 | 481 | putChar(ch); |
duke@1 | 482 | scanChar(); |
jjg@409 | 483 | skipIllegalUnderscores(); |
duke@1 | 484 | if (ch == '+' || ch == '-') { |
duke@1 | 485 | putChar(ch); |
duke@1 | 486 | scanChar(); |
duke@1 | 487 | } |
jjg@409 | 488 | skipIllegalUnderscores(); |
duke@1 | 489 | if ('0' <= ch && ch <= '9') { |
jjg@409 | 490 | scanDigits(10); |
duke@1 | 491 | return; |
duke@1 | 492 | } |
duke@1 | 493 | lexError("malformed.fp.lit"); |
duke@1 | 494 | sp = sp1; |
duke@1 | 495 | } |
duke@1 | 496 | } |
duke@1 | 497 | |
duke@1 | 498 | /** Read fractional part and 'd' or 'f' suffix of floating point number. |
duke@1 | 499 | */ |
duke@1 | 500 | private void scanFractionAndSuffix() { |
duke@1 | 501 | this.radix = 10; |
duke@1 | 502 | scanFraction(); |
duke@1 | 503 | if (ch == 'f' || ch == 'F') { |
duke@1 | 504 | putChar(ch); |
duke@1 | 505 | scanChar(); |
duke@1 | 506 | token = FLOATLITERAL; |
duke@1 | 507 | } else { |
duke@1 | 508 | if (ch == 'd' || ch == 'D') { |
duke@1 | 509 | putChar(ch); |
duke@1 | 510 | scanChar(); |
duke@1 | 511 | } |
duke@1 | 512 | token = DOUBLELITERAL; |
duke@1 | 513 | } |
duke@1 | 514 | } |
duke@1 | 515 | |
duke@1 | 516 | /** Read fractional part and 'd' or 'f' suffix of floating point number. |
duke@1 | 517 | */ |
duke@1 | 518 | private void scanHexFractionAndSuffix(boolean seendigit) { |
duke@1 | 519 | this.radix = 16; |
duke@1 | 520 | assert ch == '.'; |
duke@1 | 521 | putChar(ch); |
duke@1 | 522 | scanChar(); |
jjg@409 | 523 | skipIllegalUnderscores(); |
jjg@409 | 524 | if (digit(16) >= 0) { |
duke@1 | 525 | seendigit = true; |
jjg@409 | 526 | scanDigits(16); |
duke@1 | 527 | } |
duke@1 | 528 | if (!seendigit) |
duke@1 | 529 | lexError("invalid.hex.number"); |
duke@1 | 530 | else |
duke@1 | 531 | scanHexExponentAndSuffix(); |
duke@1 | 532 | } |
duke@1 | 533 | |
jjg@409 | 534 | private void skipIllegalUnderscores() { |
jjg@409 | 535 | if (ch == '_') { |
jjg@409 | 536 | lexError(bp, "illegal.underscore"); |
jjg@409 | 537 | while (ch == '_') |
jjg@409 | 538 | scanChar(); |
jjg@409 | 539 | } |
jjg@409 | 540 | } |
jjg@409 | 541 | |
duke@1 | 542 | /** Read a number. |
jjg@409 | 543 | * @param radix The radix of the number; one of 2, j8, 10, 16. |
duke@1 | 544 | */ |
duke@1 | 545 | private void scanNumber(int radix) { |
duke@1 | 546 | this.radix = radix; |
duke@1 | 547 | // for octal, allow base-10 digit in case it's a float literal |
jjg@409 | 548 | int digitRadix = (radix == 8 ? 10 : radix); |
duke@1 | 549 | boolean seendigit = false; |
jjg@409 | 550 | if (digit(digitRadix) >= 0) { |
duke@1 | 551 | seendigit = true; |
jjg@409 | 552 | scanDigits(digitRadix); |
duke@1 | 553 | } |
duke@1 | 554 | if (radix == 16 && ch == '.') { |
duke@1 | 555 | scanHexFractionAndSuffix(seendigit); |
duke@1 | 556 | } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) { |
duke@1 | 557 | scanHexExponentAndSuffix(); |
jjg@409 | 558 | } else if (digitRadix == 10 && ch == '.') { |
duke@1 | 559 | putChar(ch); |
duke@1 | 560 | scanChar(); |
duke@1 | 561 | scanFractionAndSuffix(); |
jjg@409 | 562 | } else if (digitRadix == 10 && |
duke@1 | 563 | (ch == 'e' || ch == 'E' || |
duke@1 | 564 | ch == 'f' || ch == 'F' || |
duke@1 | 565 | ch == 'd' || ch == 'D')) { |
duke@1 | 566 | scanFractionAndSuffix(); |
duke@1 | 567 | } else { |
duke@1 | 568 | if (ch == 'l' || ch == 'L') { |
duke@1 | 569 | scanChar(); |
duke@1 | 570 | token = LONGLITERAL; |
duke@1 | 571 | } else { |
duke@1 | 572 | token = INTLITERAL; |
duke@1 | 573 | } |
duke@1 | 574 | } |
duke@1 | 575 | } |
duke@1 | 576 | |
duke@1 | 577 | /** Read an identifier. |
duke@1 | 578 | */ |
duke@1 | 579 | private void scanIdent() { |
duke@1 | 580 | boolean isJavaIdentifierPart; |
duke@1 | 581 | char high; |
duke@1 | 582 | do { |
duke@1 | 583 | if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch; |
duke@1 | 584 | // optimization, was: putChar(ch); |
duke@1 | 585 | |
duke@1 | 586 | scanChar(); |
duke@1 | 587 | switch (ch) { |
duke@1 | 588 | case 'A': case 'B': case 'C': case 'D': case 'E': |
duke@1 | 589 | case 'F': case 'G': case 'H': case 'I': case 'J': |
duke@1 | 590 | case 'K': case 'L': case 'M': case 'N': case 'O': |
duke@1 | 591 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
duke@1 | 592 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
duke@1 | 593 | case 'Z': |
duke@1 | 594 | case 'a': case 'b': case 'c': case 'd': case 'e': |
duke@1 | 595 | case 'f': case 'g': case 'h': case 'i': case 'j': |
duke@1 | 596 | case 'k': case 'l': case 'm': case 'n': case 'o': |
duke@1 | 597 | case 'p': case 'q': case 'r': case 's': case 't': |
duke@1 | 598 | case 'u': case 'v': case 'w': case 'x': case 'y': |
duke@1 | 599 | case 'z': |
duke@1 | 600 | case '$': case '_': |
duke@1 | 601 | case '0': case '1': case '2': case '3': case '4': |
duke@1 | 602 | case '5': case '6': case '7': case '8': case '9': |
duke@1 | 603 | case '\u0000': case '\u0001': case '\u0002': case '\u0003': |
duke@1 | 604 | case '\u0004': case '\u0005': case '\u0006': case '\u0007': |
duke@1 | 605 | case '\u0008': case '\u000E': case '\u000F': case '\u0010': |
duke@1 | 606 | case '\u0011': case '\u0012': case '\u0013': case '\u0014': |
duke@1 | 607 | case '\u0015': case '\u0016': case '\u0017': |
duke@1 | 608 | case '\u0018': case '\u0019': case '\u001B': |
duke@1 | 609 | case '\u007F': |
duke@1 | 610 | break; |
duke@1 | 611 | case '\u001A': // EOI is also a legal identifier part |
duke@1 | 612 | if (bp >= buflen) { |
duke@1 | 613 | name = names.fromChars(sbuf, 0, sp); |
duke@1 | 614 | token = keywords.key(name); |
duke@1 | 615 | return; |
duke@1 | 616 | } |
duke@1 | 617 | break; |
duke@1 | 618 | default: |
duke@1 | 619 | if (ch < '\u0080') { |
duke@1 | 620 | // all ASCII range chars already handled, above |
duke@1 | 621 | isJavaIdentifierPart = false; |
duke@1 | 622 | } else { |
duke@1 | 623 | high = scanSurrogates(); |
duke@1 | 624 | if (high != 0) { |
duke@1 | 625 | if (sp == sbuf.length) { |
duke@1 | 626 | putChar(high); |
duke@1 | 627 | } else { |
duke@1 | 628 | sbuf[sp++] = high; |
duke@1 | 629 | } |
duke@1 | 630 | isJavaIdentifierPart = Character.isJavaIdentifierPart( |
duke@1 | 631 | Character.toCodePoint(high, ch)); |
duke@1 | 632 | } else { |
duke@1 | 633 | isJavaIdentifierPart = Character.isJavaIdentifierPart(ch); |
duke@1 | 634 | } |
duke@1 | 635 | } |
duke@1 | 636 | if (!isJavaIdentifierPart) { |
duke@1 | 637 | name = names.fromChars(sbuf, 0, sp); |
duke@1 | 638 | token = keywords.key(name); |
duke@1 | 639 | return; |
duke@1 | 640 | } |
duke@1 | 641 | } |
duke@1 | 642 | } while (true); |
duke@1 | 643 | } |
duke@1 | 644 | |
duke@1 | 645 | /** Are surrogates supported? |
duke@1 | 646 | */ |
duke@1 | 647 | final static boolean surrogatesSupported = surrogatesSupported(); |
duke@1 | 648 | private static boolean surrogatesSupported() { |
duke@1 | 649 | try { |
duke@1 | 650 | Character.isHighSurrogate('a'); |
duke@1 | 651 | return true; |
duke@1 | 652 | } catch (NoSuchMethodError ex) { |
duke@1 | 653 | return false; |
duke@1 | 654 | } |
duke@1 | 655 | } |
duke@1 | 656 | |
duke@1 | 657 | /** Scan surrogate pairs. If 'ch' is a high surrogate and |
duke@1 | 658 | * the next character is a low surrogate, then put the low |
duke@1 | 659 | * surrogate in 'ch', and return the high surrogate. |
duke@1 | 660 | * otherwise, just return 0. |
duke@1 | 661 | */ |
duke@1 | 662 | private char scanSurrogates() { |
duke@1 | 663 | if (surrogatesSupported && Character.isHighSurrogate(ch)) { |
duke@1 | 664 | char high = ch; |
duke@1 | 665 | |
duke@1 | 666 | scanChar(); |
duke@1 | 667 | |
duke@1 | 668 | if (Character.isLowSurrogate(ch)) { |
duke@1 | 669 | return high; |
duke@1 | 670 | } |
duke@1 | 671 | |
duke@1 | 672 | ch = high; |
duke@1 | 673 | } |
duke@1 | 674 | |
duke@1 | 675 | return 0; |
duke@1 | 676 | } |
duke@1 | 677 | |
duke@1 | 678 | /** Return true if ch can be part of an operator. |
duke@1 | 679 | */ |
duke@1 | 680 | private boolean isSpecial(char ch) { |
duke@1 | 681 | switch (ch) { |
duke@1 | 682 | case '!': case '%': case '&': case '*': case '?': |
duke@1 | 683 | case '+': case '-': case ':': case '<': case '=': |
duke@1 | 684 | case '>': case '^': case '|': case '~': |
duke@1 | 685 | case '@': |
duke@1 | 686 | return true; |
duke@1 | 687 | default: |
duke@1 | 688 | return false; |
duke@1 | 689 | } |
duke@1 | 690 | } |
duke@1 | 691 | |
duke@1 | 692 | /** Read longest possible sequence of special characters and convert |
duke@1 | 693 | * to token. |
duke@1 | 694 | */ |
duke@1 | 695 | private void scanOperator() { |
duke@1 | 696 | while (true) { |
duke@1 | 697 | putChar(ch); |
duke@1 | 698 | Name newname = names.fromChars(sbuf, 0, sp); |
duke@1 | 699 | if (keywords.key(newname) == IDENTIFIER) { |
duke@1 | 700 | sp--; |
duke@1 | 701 | break; |
duke@1 | 702 | } |
duke@1 | 703 | name = newname; |
duke@1 | 704 | token = keywords.key(newname); |
duke@1 | 705 | scanChar(); |
duke@1 | 706 | if (!isSpecial(ch)) break; |
duke@1 | 707 | } |
duke@1 | 708 | } |
duke@1 | 709 | |
duke@1 | 710 | /** |
duke@1 | 711 | * Scan a documention comment; determine if a deprecated tag is present. |
duke@1 | 712 | * Called once the initial /, * have been skipped, positioned at the second * |
duke@1 | 713 | * (which is treated as the beginning of the first line). |
duke@1 | 714 | * Stops positioned at the closing '/'. |
duke@1 | 715 | */ |
duke@1 | 716 | @SuppressWarnings("fallthrough") |
duke@1 | 717 | private void scanDocComment() { |
duke@1 | 718 | boolean deprecatedPrefix = false; |
duke@1 | 719 | |
duke@1 | 720 | forEachLine: |
duke@1 | 721 | while (bp < buflen) { |
duke@1 | 722 | |
duke@1 | 723 | // Skip optional WhiteSpace at beginning of line |
duke@1 | 724 | while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { |
duke@1 | 725 | scanCommentChar(); |
duke@1 | 726 | } |
duke@1 | 727 | |
duke@1 | 728 | // Skip optional consecutive Stars |
duke@1 | 729 | while (bp < buflen && ch == '*') { |
duke@1 | 730 | scanCommentChar(); |
duke@1 | 731 | if (ch == '/') { |
duke@1 | 732 | return; |
duke@1 | 733 | } |
duke@1 | 734 | } |
duke@1 | 735 | |
duke@1 | 736 | // Skip optional WhiteSpace after Stars |
duke@1 | 737 | while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { |
duke@1 | 738 | scanCommentChar(); |
duke@1 | 739 | } |
duke@1 | 740 | |
duke@1 | 741 | deprecatedPrefix = false; |
duke@1 | 742 | // At beginning of line in the JavaDoc sense. |
duke@1 | 743 | if (bp < buflen && ch == '@' && !deprecatedFlag) { |
duke@1 | 744 | scanCommentChar(); |
duke@1 | 745 | if (bp < buflen && ch == 'd') { |
duke@1 | 746 | scanCommentChar(); |
duke@1 | 747 | if (bp < buflen && ch == 'e') { |
duke@1 | 748 | scanCommentChar(); |
duke@1 | 749 | if (bp < buflen && ch == 'p') { |
duke@1 | 750 | scanCommentChar(); |
duke@1 | 751 | if (bp < buflen && ch == 'r') { |
duke@1 | 752 | scanCommentChar(); |
duke@1 | 753 | if (bp < buflen && ch == 'e') { |
duke@1 | 754 | scanCommentChar(); |
duke@1 | 755 | if (bp < buflen && ch == 'c') { |
duke@1 | 756 | scanCommentChar(); |
duke@1 | 757 | if (bp < buflen && ch == 'a') { |
duke@1 | 758 | scanCommentChar(); |
duke@1 | 759 | if (bp < buflen && ch == 't') { |
duke@1 | 760 | scanCommentChar(); |
duke@1 | 761 | if (bp < buflen && ch == 'e') { |
duke@1 | 762 | scanCommentChar(); |
duke@1 | 763 | if (bp < buflen && ch == 'd') { |
duke@1 | 764 | deprecatedPrefix = true; |
duke@1 | 765 | scanCommentChar(); |
duke@1 | 766 | }}}}}}}}}}} |
duke@1 | 767 | if (deprecatedPrefix && bp < buflen) { |
duke@1 | 768 | if (Character.isWhitespace(ch)) { |
duke@1 | 769 | deprecatedFlag = true; |
duke@1 | 770 | } else if (ch == '*') { |
duke@1 | 771 | scanCommentChar(); |
duke@1 | 772 | if (ch == '/') { |
duke@1 | 773 | deprecatedFlag = true; |
duke@1 | 774 | return; |
duke@1 | 775 | } |
duke@1 | 776 | } |
duke@1 | 777 | } |
duke@1 | 778 | |
duke@1 | 779 | // Skip rest of line |
duke@1 | 780 | while (bp < buflen) { |
duke@1 | 781 | switch (ch) { |
duke@1 | 782 | case '*': |
duke@1 | 783 | scanCommentChar(); |
duke@1 | 784 | if (ch == '/') { |
duke@1 | 785 | return; |
duke@1 | 786 | } |
duke@1 | 787 | break; |
duke@1 | 788 | case CR: // (Spec 3.4) |
duke@1 | 789 | scanCommentChar(); |
duke@1 | 790 | if (ch != LF) { |
duke@1 | 791 | continue forEachLine; |
duke@1 | 792 | } |
duke@1 | 793 | /* fall through to LF case */ |
duke@1 | 794 | case LF: // (Spec 3.4) |
duke@1 | 795 | scanCommentChar(); |
duke@1 | 796 | continue forEachLine; |
duke@1 | 797 | default: |
duke@1 | 798 | scanCommentChar(); |
duke@1 | 799 | } |
duke@1 | 800 | } // rest of line |
duke@1 | 801 | } // forEachLine |
duke@1 | 802 | return; |
duke@1 | 803 | } |
duke@1 | 804 | |
duke@1 | 805 | /** The value of a literal token, recorded as a string. |
duke@1 | 806 | * For integers, leading 0x and 'l' suffixes are suppressed. |
duke@1 | 807 | */ |
duke@1 | 808 | public String stringVal() { |
duke@1 | 809 | return new String(sbuf, 0, sp); |
duke@1 | 810 | } |
duke@1 | 811 | |
duke@1 | 812 | /** Read token. |
duke@1 | 813 | */ |
duke@1 | 814 | public void nextToken() { |
duke@1 | 815 | |
duke@1 | 816 | try { |
duke@1 | 817 | prevEndPos = endPos; |
duke@1 | 818 | sp = 0; |
duke@1 | 819 | |
duke@1 | 820 | while (true) { |
duke@1 | 821 | pos = bp; |
duke@1 | 822 | switch (ch) { |
duke@1 | 823 | case ' ': // (Spec 3.6) |
duke@1 | 824 | case '\t': // (Spec 3.6) |
duke@1 | 825 | case FF: // (Spec 3.6) |
duke@1 | 826 | do { |
duke@1 | 827 | scanChar(); |
duke@1 | 828 | } while (ch == ' ' || ch == '\t' || ch == FF); |
duke@1 | 829 | endPos = bp; |
duke@1 | 830 | processWhiteSpace(); |
duke@1 | 831 | break; |
duke@1 | 832 | case LF: // (Spec 3.4) |
duke@1 | 833 | scanChar(); |
duke@1 | 834 | endPos = bp; |
duke@1 | 835 | processLineTerminator(); |
duke@1 | 836 | break; |
duke@1 | 837 | case CR: // (Spec 3.4) |
duke@1 | 838 | scanChar(); |
duke@1 | 839 | if (ch == LF) { |
duke@1 | 840 | scanChar(); |
duke@1 | 841 | } |
duke@1 | 842 | endPos = bp; |
duke@1 | 843 | processLineTerminator(); |
duke@1 | 844 | break; |
duke@1 | 845 | case 'A': case 'B': case 'C': case 'D': case 'E': |
duke@1 | 846 | case 'F': case 'G': case 'H': case 'I': case 'J': |
duke@1 | 847 | case 'K': case 'L': case 'M': case 'N': case 'O': |
duke@1 | 848 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
duke@1 | 849 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
duke@1 | 850 | case 'Z': |
duke@1 | 851 | case 'a': case 'b': case 'c': case 'd': case 'e': |
duke@1 | 852 | case 'f': case 'g': case 'h': case 'i': case 'j': |
duke@1 | 853 | case 'k': case 'l': case 'm': case 'n': case 'o': |
duke@1 | 854 | case 'p': case 'q': case 'r': case 's': case 't': |
duke@1 | 855 | case 'u': case 'v': case 'w': case 'x': case 'y': |
duke@1 | 856 | case 'z': |
duke@1 | 857 | case '$': case '_': |
duke@1 | 858 | scanIdent(); |
duke@1 | 859 | return; |
duke@1 | 860 | case '0': |
duke@1 | 861 | scanChar(); |
duke@1 | 862 | if (ch == 'x' || ch == 'X') { |
duke@1 | 863 | scanChar(); |
jjg@409 | 864 | skipIllegalUnderscores(); |
duke@1 | 865 | if (ch == '.') { |
duke@1 | 866 | scanHexFractionAndSuffix(false); |
duke@1 | 867 | } else if (digit(16) < 0) { |
duke@1 | 868 | lexError("invalid.hex.number"); |
duke@1 | 869 | } else { |
duke@1 | 870 | scanNumber(16); |
duke@1 | 871 | } |
jjg@409 | 872 | } else if (ch == 'b' || ch == 'B') { |
jjg@409 | 873 | if (!allowBinaryLiterals) { |
jjg@409 | 874 | lexError("unsupported.binary.lit", source.name); |
jjg@409 | 875 | allowBinaryLiterals = true; |
jjg@409 | 876 | } |
jjg@409 | 877 | scanChar(); |
jjg@409 | 878 | skipIllegalUnderscores(); |
jjg@423 | 879 | if (digit(2) < 0) { |
jjg@423 | 880 | lexError("invalid.binary.number"); |
jjg@423 | 881 | } else { |
jjg@423 | 882 | scanNumber(2); |
jjg@423 | 883 | } |
duke@1 | 884 | } else { |
duke@1 | 885 | putChar('0'); |
jjg@409 | 886 | if (ch == '_') { |
jjg@409 | 887 | int savePos = bp; |
jjg@409 | 888 | do { |
jjg@409 | 889 | scanChar(); |
jjg@409 | 890 | } while (ch == '_'); |
jjg@409 | 891 | if (digit(10) < 0) { |
jjg@409 | 892 | lexError(savePos, "illegal.underscore"); |
jjg@409 | 893 | } |
jjg@409 | 894 | } |
duke@1 | 895 | scanNumber(8); |
duke@1 | 896 | } |
duke@1 | 897 | return; |
duke@1 | 898 | case '1': case '2': case '3': case '4': |
duke@1 | 899 | case '5': case '6': case '7': case '8': case '9': |
duke@1 | 900 | scanNumber(10); |
duke@1 | 901 | return; |
duke@1 | 902 | case '.': |
duke@1 | 903 | scanChar(); |
duke@1 | 904 | if ('0' <= ch && ch <= '9') { |
duke@1 | 905 | putChar('.'); |
duke@1 | 906 | scanFractionAndSuffix(); |
duke@1 | 907 | } else if (ch == '.') { |
duke@1 | 908 | putChar('.'); putChar('.'); |
duke@1 | 909 | scanChar(); |
duke@1 | 910 | if (ch == '.') { |
duke@1 | 911 | scanChar(); |
duke@1 | 912 | putChar('.'); |
duke@1 | 913 | token = ELLIPSIS; |
duke@1 | 914 | } else { |
duke@1 | 915 | lexError("malformed.fp.lit"); |
duke@1 | 916 | } |
duke@1 | 917 | } else { |
duke@1 | 918 | token = DOT; |
duke@1 | 919 | } |
duke@1 | 920 | return; |
duke@1 | 921 | case ',': |
duke@1 | 922 | scanChar(); token = COMMA; return; |
duke@1 | 923 | case ';': |
duke@1 | 924 | scanChar(); token = SEMI; return; |
duke@1 | 925 | case '(': |
duke@1 | 926 | scanChar(); token = LPAREN; return; |
duke@1 | 927 | case ')': |
duke@1 | 928 | scanChar(); token = RPAREN; return; |
duke@1 | 929 | case '[': |
duke@1 | 930 | scanChar(); token = LBRACKET; return; |
duke@1 | 931 | case ']': |
duke@1 | 932 | scanChar(); token = RBRACKET; return; |
duke@1 | 933 | case '{': |
duke@1 | 934 | scanChar(); token = LBRACE; return; |
duke@1 | 935 | case '}': |
duke@1 | 936 | scanChar(); token = RBRACE; return; |
duke@1 | 937 | case '/': |
duke@1 | 938 | scanChar(); |
duke@1 | 939 | if (ch == '/') { |
duke@1 | 940 | do { |
duke@1 | 941 | scanCommentChar(); |
duke@1 | 942 | } while (ch != CR && ch != LF && bp < buflen); |
duke@1 | 943 | if (bp < buflen) { |
duke@1 | 944 | endPos = bp; |
duke@1 | 945 | processComment(CommentStyle.LINE); |
duke@1 | 946 | } |
duke@1 | 947 | break; |
duke@1 | 948 | } else if (ch == '*') { |
duke@1 | 949 | scanChar(); |
duke@1 | 950 | CommentStyle style; |
duke@1 | 951 | if (ch == '*') { |
duke@1 | 952 | style = CommentStyle.JAVADOC; |
duke@1 | 953 | scanDocComment(); |
duke@1 | 954 | } else { |
duke@1 | 955 | style = CommentStyle.BLOCK; |
duke@1 | 956 | while (bp < buflen) { |
duke@1 | 957 | if (ch == '*') { |
duke@1 | 958 | scanChar(); |
duke@1 | 959 | if (ch == '/') break; |
duke@1 | 960 | } else { |
duke@1 | 961 | scanCommentChar(); |
duke@1 | 962 | } |
duke@1 | 963 | } |
duke@1 | 964 | } |
duke@1 | 965 | if (ch == '/') { |
duke@1 | 966 | scanChar(); |
duke@1 | 967 | endPos = bp; |
duke@1 | 968 | processComment(style); |
duke@1 | 969 | break; |
duke@1 | 970 | } else { |
duke@1 | 971 | lexError("unclosed.comment"); |
duke@1 | 972 | return; |
duke@1 | 973 | } |
duke@1 | 974 | } else if (ch == '=') { |
duke@1 | 975 | name = names.slashequals; |
duke@1 | 976 | token = SLASHEQ; |
duke@1 | 977 | scanChar(); |
duke@1 | 978 | } else { |
duke@1 | 979 | name = names.slash; |
duke@1 | 980 | token = SLASH; |
duke@1 | 981 | } |
duke@1 | 982 | return; |
duke@1 | 983 | case '\'': |
duke@1 | 984 | scanChar(); |
duke@1 | 985 | if (ch == '\'') { |
duke@1 | 986 | lexError("empty.char.lit"); |
duke@1 | 987 | } else { |
duke@1 | 988 | if (ch == CR || ch == LF) |
duke@1 | 989 | lexError(pos, "illegal.line.end.in.char.lit"); |
duke@1 | 990 | scanLitChar(); |
duke@1 | 991 | if (ch == '\'') { |
duke@1 | 992 | scanChar(); |
duke@1 | 993 | token = CHARLITERAL; |
duke@1 | 994 | } else { |
duke@1 | 995 | lexError(pos, "unclosed.char.lit"); |
duke@1 | 996 | } |
duke@1 | 997 | } |
duke@1 | 998 | return; |
duke@1 | 999 | case '\"': |
duke@1 | 1000 | scanChar(); |
duke@1 | 1001 | while (ch != '\"' && ch != CR && ch != LF && bp < buflen) |
duke@1 | 1002 | scanLitChar(); |
duke@1 | 1003 | if (ch == '\"') { |
duke@1 | 1004 | token = STRINGLITERAL; |
duke@1 | 1005 | scanChar(); |
duke@1 | 1006 | } else { |
duke@1 | 1007 | lexError(pos, "unclosed.str.lit"); |
duke@1 | 1008 | } |
duke@1 | 1009 | return; |
jrose@267 | 1010 | case '#': |
jrose@267 | 1011 | scanChar(); |
jrose@267 | 1012 | if (ch == '\"') { |
jrose@267 | 1013 | scanChar(); |
jrose@267 | 1014 | if (ch == '\"') |
jrose@267 | 1015 | lexError(pos, "empty.bytecode.ident"); |
jrose@267 | 1016 | while (ch != '\"' && ch != CR && ch != LF && bp < buflen) { |
jrose@267 | 1017 | scanBytecodeNameChar(); |
jrose@267 | 1018 | } |
jrose@267 | 1019 | if (ch == '\"') { |
jrose@267 | 1020 | name = names.fromChars(sbuf, 0, sp); |
jrose@267 | 1021 | token = IDENTIFIER; // even if #"int" or #"do" |
jrose@267 | 1022 | scanChar(); |
jrose@267 | 1023 | } else { |
jrose@267 | 1024 | lexError(pos, "unclosed.bytecode.ident"); |
jrose@267 | 1025 | } |
jrose@267 | 1026 | } else { |
jrose@267 | 1027 | lexError("illegal.char", String.valueOf((int)'#')); |
jrose@267 | 1028 | } |
jrose@267 | 1029 | return; |
duke@1 | 1030 | default: |
duke@1 | 1031 | if (isSpecial(ch)) { |
duke@1 | 1032 | scanOperator(); |
duke@1 | 1033 | } else { |
duke@1 | 1034 | boolean isJavaIdentifierStart; |
duke@1 | 1035 | if (ch < '\u0080') { |
duke@1 | 1036 | // all ASCII range chars already handled, above |
duke@1 | 1037 | isJavaIdentifierStart = false; |
duke@1 | 1038 | } else { |
duke@1 | 1039 | char high = scanSurrogates(); |
duke@1 | 1040 | if (high != 0) { |
duke@1 | 1041 | if (sp == sbuf.length) { |
duke@1 | 1042 | putChar(high); |
duke@1 | 1043 | } else { |
duke@1 | 1044 | sbuf[sp++] = high; |
duke@1 | 1045 | } |
duke@1 | 1046 | |
duke@1 | 1047 | isJavaIdentifierStart = Character.isJavaIdentifierStart( |
duke@1 | 1048 | Character.toCodePoint(high, ch)); |
duke@1 | 1049 | } else { |
duke@1 | 1050 | isJavaIdentifierStart = Character.isJavaIdentifierStart(ch); |
duke@1 | 1051 | } |
duke@1 | 1052 | } |
duke@1 | 1053 | if (isJavaIdentifierStart) { |
duke@1 | 1054 | scanIdent(); |
duke@1 | 1055 | } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5 |
duke@1 | 1056 | token = EOF; |
duke@1 | 1057 | pos = bp = eofPos; |
duke@1 | 1058 | } else { |
duke@1 | 1059 | lexError("illegal.char", String.valueOf((int)ch)); |
duke@1 | 1060 | scanChar(); |
duke@1 | 1061 | } |
duke@1 | 1062 | } |
duke@1 | 1063 | return; |
duke@1 | 1064 | } |
duke@1 | 1065 | } |
duke@1 | 1066 | } finally { |
duke@1 | 1067 | endPos = bp; |
duke@1 | 1068 | if (scannerDebug) |
duke@1 | 1069 | System.out.println("nextToken(" + pos |
duke@1 | 1070 | + "," + endPos + ")=|" + |
duke@1 | 1071 | new String(getRawCharacters(pos, endPos)) |
duke@1 | 1072 | + "|"); |
duke@1 | 1073 | } |
duke@1 | 1074 | } |
duke@1 | 1075 | |
duke@1 | 1076 | /** Return the current token, set by nextToken(). |
duke@1 | 1077 | */ |
duke@1 | 1078 | public Token token() { |
duke@1 | 1079 | return token; |
duke@1 | 1080 | } |
duke@1 | 1081 | |
duke@1 | 1082 | /** Sets the current token. |
duke@1 | 1083 | */ |
duke@1 | 1084 | public void token(Token token) { |
duke@1 | 1085 | this.token = token; |
duke@1 | 1086 | } |
duke@1 | 1087 | |
duke@1 | 1088 | /** Return the current token's position: a 0-based |
duke@1 | 1089 | * offset from beginning of the raw input stream |
duke@1 | 1090 | * (before unicode translation) |
duke@1 | 1091 | */ |
duke@1 | 1092 | public int pos() { |
duke@1 | 1093 | return pos; |
duke@1 | 1094 | } |
duke@1 | 1095 | |
duke@1 | 1096 | /** Return the last character position of the current token. |
duke@1 | 1097 | */ |
duke@1 | 1098 | public int endPos() { |
duke@1 | 1099 | return endPos; |
duke@1 | 1100 | } |
duke@1 | 1101 | |
duke@1 | 1102 | /** Return the last character position of the previous token. |
duke@1 | 1103 | */ |
duke@1 | 1104 | public int prevEndPos() { |
duke@1 | 1105 | return prevEndPos; |
duke@1 | 1106 | } |
duke@1 | 1107 | |
duke@1 | 1108 | /** Return the position where a lexical error occurred; |
duke@1 | 1109 | */ |
duke@1 | 1110 | public int errPos() { |
duke@1 | 1111 | return errPos; |
duke@1 | 1112 | } |
duke@1 | 1113 | |
duke@1 | 1114 | /** Set the position where a lexical error occurred; |
duke@1 | 1115 | */ |
duke@1 | 1116 | public void errPos(int pos) { |
duke@1 | 1117 | errPos = pos; |
duke@1 | 1118 | } |
duke@1 | 1119 | |
duke@1 | 1120 | /** Return the name of an identifier or token for the current token. |
duke@1 | 1121 | */ |
duke@1 | 1122 | public Name name() { |
duke@1 | 1123 | return name; |
duke@1 | 1124 | } |
duke@1 | 1125 | |
duke@1 | 1126 | /** Return the radix of a numeric literal token. |
duke@1 | 1127 | */ |
duke@1 | 1128 | public int radix() { |
duke@1 | 1129 | return radix; |
duke@1 | 1130 | } |
duke@1 | 1131 | |
duke@1 | 1132 | /** Has a @deprecated been encountered in last doc comment? |
duke@1 | 1133 | * This needs to be reset by client with resetDeprecatedFlag. |
duke@1 | 1134 | */ |
duke@1 | 1135 | public boolean deprecatedFlag() { |
duke@1 | 1136 | return deprecatedFlag; |
duke@1 | 1137 | } |
duke@1 | 1138 | |
duke@1 | 1139 | public void resetDeprecatedFlag() { |
duke@1 | 1140 | deprecatedFlag = false; |
duke@1 | 1141 | } |
duke@1 | 1142 | |
duke@1 | 1143 | /** |
duke@1 | 1144 | * Returns the documentation string of the current token. |
duke@1 | 1145 | */ |
duke@1 | 1146 | public String docComment() { |
duke@1 | 1147 | return null; |
duke@1 | 1148 | } |
duke@1 | 1149 | |
duke@1 | 1150 | /** |
duke@1 | 1151 | * Returns a copy of the input buffer, up to its inputLength. |
duke@1 | 1152 | * Unicode escape sequences are not translated. |
duke@1 | 1153 | */ |
duke@1 | 1154 | public char[] getRawCharacters() { |
duke@1 | 1155 | char[] chars = new char[buflen]; |
duke@1 | 1156 | System.arraycopy(buf, 0, chars, 0, buflen); |
duke@1 | 1157 | return chars; |
duke@1 | 1158 | } |
duke@1 | 1159 | |
duke@1 | 1160 | /** |
duke@1 | 1161 | * Returns a copy of a character array subset of the input buffer. |
duke@1 | 1162 | * The returned array begins at the <code>beginIndex</code> and |
duke@1 | 1163 | * extends to the character at index <code>endIndex - 1</code>. |
duke@1 | 1164 | * Thus the length of the substring is <code>endIndex-beginIndex</code>. |
duke@1 | 1165 | * This behavior is like |
duke@1 | 1166 | * <code>String.substring(beginIndex, endIndex)</code>. |
duke@1 | 1167 | * Unicode escape sequences are not translated. |
duke@1 | 1168 | * |
duke@1 | 1169 | * @param beginIndex the beginning index, inclusive. |
duke@1 | 1170 | * @param endIndex the ending index, exclusive. |
duke@1 | 1171 | * @throws IndexOutOfBounds if either offset is outside of the |
duke@1 | 1172 | * array bounds |
duke@1 | 1173 | */ |
duke@1 | 1174 | public char[] getRawCharacters(int beginIndex, int endIndex) { |
duke@1 | 1175 | int length = endIndex - beginIndex; |
duke@1 | 1176 | char[] chars = new char[length]; |
duke@1 | 1177 | System.arraycopy(buf, beginIndex, chars, 0, length); |
duke@1 | 1178 | return chars; |
duke@1 | 1179 | } |
duke@1 | 1180 | |
duke@1 | 1181 | public enum CommentStyle { |
duke@1 | 1182 | LINE, |
duke@1 | 1183 | BLOCK, |
duke@1 | 1184 | JAVADOC, |
duke@1 | 1185 | } |
duke@1 | 1186 | |
duke@1 | 1187 | /** |
duke@1 | 1188 | * Called when a complete comment has been scanned. pos and endPos |
duke@1 | 1189 | * will mark the comment boundary. |
duke@1 | 1190 | */ |
duke@1 | 1191 | protected void processComment(CommentStyle style) { |
duke@1 | 1192 | if (scannerDebug) |
duke@1 | 1193 | System.out.println("processComment(" + pos |
duke@1 | 1194 | + "," + endPos + "," + style + ")=|" |
duke@1 | 1195 | + new String(getRawCharacters(pos, endPos)) |
duke@1 | 1196 | + "|"); |
duke@1 | 1197 | } |
duke@1 | 1198 | |
duke@1 | 1199 | /** |
duke@1 | 1200 | * Called when a complete whitespace run has been scanned. pos and endPos |
duke@1 | 1201 | * will mark the whitespace boundary. |
duke@1 | 1202 | */ |
duke@1 | 1203 | protected void processWhiteSpace() { |
duke@1 | 1204 | if (scannerDebug) |
duke@1 | 1205 | System.out.println("processWhitespace(" + pos |
duke@1 | 1206 | + "," + endPos + ")=|" + |
duke@1 | 1207 | new String(getRawCharacters(pos, endPos)) |
duke@1 | 1208 | + "|"); |
duke@1 | 1209 | } |
duke@1 | 1210 | |
duke@1 | 1211 | /** |
duke@1 | 1212 | * Called when a line terminator has been processed. |
duke@1 | 1213 | */ |
duke@1 | 1214 | protected void processLineTerminator() { |
duke@1 | 1215 | if (scannerDebug) |
duke@1 | 1216 | System.out.println("processTerminator(" + pos |
duke@1 | 1217 | + "," + endPos + ")=|" + |
duke@1 | 1218 | new String(getRawCharacters(pos, endPos)) |
duke@1 | 1219 | + "|"); |
duke@1 | 1220 | } |
duke@1 | 1221 | |
duke@1 | 1222 | /** Build a map for translating between line numbers and |
duke@1 | 1223 | * positions in the input. |
duke@1 | 1224 | * |
duke@1 | 1225 | * @return a LineMap */ |
duke@1 | 1226 | public Position.LineMap getLineMap() { |
duke@1 | 1227 | return Position.makeLineMap(buf, buflen, false); |
duke@1 | 1228 | } |
duke@1 | 1229 | |
duke@1 | 1230 | } |