src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java

Mon, 24 Oct 2011 13:00:20 +0100

author
mcimadamore
date
Mon, 24 Oct 2011 13:00:20 +0100
changeset 1113
d346ab55031b
child 1125
56830d5cb5bb
permissions
-rw-r--r--

7096014: Javac tokens should retain state
Summary: Refactor javac tokens from enum constants to stateful instances (to keep track of position, comments, etc.)
Reviewed-by: jjg

mcimadamore@1113 1 /*
mcimadamore@1113 2 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
mcimadamore@1113 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
mcimadamore@1113 4 *
mcimadamore@1113 5 * This code is free software; you can redistribute it and/or modify it
mcimadamore@1113 6 * under the terms of the GNU General Public License version 2 only, as
mcimadamore@1113 7 * published by the Free Software Foundation. Oracle designates this
mcimadamore@1113 8 * particular file as subject to the "Classpath" exception as provided
mcimadamore@1113 9 * by Oracle in the LICENSE file that accompanied this code.
mcimadamore@1113 10 *
mcimadamore@1113 11 * This code is distributed in the hope that it will be useful, but WITHOUT
mcimadamore@1113 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
mcimadamore@1113 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
mcimadamore@1113 14 * version 2 for more details (a copy is included in the LICENSE file that
mcimadamore@1113 15 * accompanied this code).
mcimadamore@1113 16 *
mcimadamore@1113 17 * You should have received a copy of the GNU General Public License version
mcimadamore@1113 18 * 2 along with this work; if not, write to the Free Software Foundation,
mcimadamore@1113 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
mcimadamore@1113 20 *
mcimadamore@1113 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
mcimadamore@1113 22 * or visit www.oracle.com if you need additional information or have any
mcimadamore@1113 23 * questions.
mcimadamore@1113 24 */
mcimadamore@1113 25
mcimadamore@1113 26 package com.sun.tools.javac.parser;
mcimadamore@1113 27
mcimadamore@1113 28 import com.sun.tools.javac.file.JavacFileManager;
mcimadamore@1113 29 import java.nio.CharBuffer;
mcimadamore@1113 30 import com.sun.tools.javac.util.Log;
mcimadamore@1113 31 import static com.sun.tools.javac.util.LayoutCharacters.*;
mcimadamore@1113 32
mcimadamore@1113 33 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
mcimadamore@1113 34 * characters contained in the input stream, handling unicode escape accordingly.
mcimadamore@1113 35 * Additionally, it provide features for saving chars into a buffer and to retrieve
mcimadamore@1113 36 * them at a later stage.
mcimadamore@1113 37 *
mcimadamore@1113 38 * <p><b>This is NOT part of any supported API.
mcimadamore@1113 39 * If you write code that depends on this, you do so at your own risk.
mcimadamore@1113 40 * This code and its internal interfaces are subject to change or
mcimadamore@1113 41 * deletion without notice.</b>
mcimadamore@1113 42 */
mcimadamore@1113 43 public class UnicodeReader {
mcimadamore@1113 44
mcimadamore@1113 45 /** The input buffer, index of next character to be read,
mcimadamore@1113 46 * index of one past last character in buffer.
mcimadamore@1113 47 */
mcimadamore@1113 48 protected char[] buf;
mcimadamore@1113 49 protected int bp;
mcimadamore@1113 50 protected final int buflen;
mcimadamore@1113 51
mcimadamore@1113 52 /** The current character.
mcimadamore@1113 53 */
mcimadamore@1113 54 protected char ch;
mcimadamore@1113 55
mcimadamore@1113 56 /** The buffer index of the last converted unicode character
mcimadamore@1113 57 */
mcimadamore@1113 58 protected int unicodeConversionBp = -1;
mcimadamore@1113 59
mcimadamore@1113 60 protected Log log;
mcimadamore@1113 61
mcimadamore@1113 62 /**
mcimadamore@1113 63 * Create a scanner from the input array. This method might
mcimadamore@1113 64 * modify the array. To avoid copying the input array, ensure
mcimadamore@1113 65 * that {@code inputLength < input.length} or
mcimadamore@1113 66 * {@code input[input.length -1]} is a white space character.
mcimadamore@1113 67 *
mcimadamore@1113 68 * @param fac the factory which created this Scanner
mcimadamore@1113 69 * @param input the input, might be modified
mcimadamore@1113 70 * @param inputLength the size of the input.
mcimadamore@1113 71 * Must be positive and less than or equal to input.length.
mcimadamore@1113 72 */
mcimadamore@1113 73 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
mcimadamore@1113 74 this(sf, JavacFileManager.toArray(buffer), buffer.limit());
mcimadamore@1113 75 }
mcimadamore@1113 76
mcimadamore@1113 77 protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
mcimadamore@1113 78 log = sf.log;
mcimadamore@1113 79 if (inputLength == input.length) {
mcimadamore@1113 80 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
mcimadamore@1113 81 inputLength--;
mcimadamore@1113 82 } else {
mcimadamore@1113 83 char[] newInput = new char[inputLength + 1];
mcimadamore@1113 84 System.arraycopy(input, 0, newInput, 0, input.length);
mcimadamore@1113 85 input = newInput;
mcimadamore@1113 86 }
mcimadamore@1113 87 }
mcimadamore@1113 88 buf = input;
mcimadamore@1113 89 buflen = inputLength;
mcimadamore@1113 90 buf[buflen] = EOI;
mcimadamore@1113 91 bp = -1;
mcimadamore@1113 92 scanChar();
mcimadamore@1113 93 }
mcimadamore@1113 94
mcimadamore@1113 95 /** Read next character.
mcimadamore@1113 96 */
mcimadamore@1113 97 protected void scanChar() {
mcimadamore@1113 98 if (bp < buflen) {
mcimadamore@1113 99 ch = buf[++bp];
mcimadamore@1113 100 if (ch == '\\') {
mcimadamore@1113 101 convertUnicode();
mcimadamore@1113 102 }
mcimadamore@1113 103 }
mcimadamore@1113 104 }
mcimadamore@1113 105
mcimadamore@1113 106 /** Convert unicode escape; bp points to initial '\' character
mcimadamore@1113 107 * (Spec 3.3).
mcimadamore@1113 108 */
mcimadamore@1113 109 protected void convertUnicode() {
mcimadamore@1113 110 if (ch == '\\' && unicodeConversionBp != bp) {
mcimadamore@1113 111 bp++; ch = buf[bp];
mcimadamore@1113 112 if (ch == 'u') {
mcimadamore@1113 113 do {
mcimadamore@1113 114 bp++; ch = buf[bp];
mcimadamore@1113 115 } while (ch == 'u');
mcimadamore@1113 116 int limit = bp + 3;
mcimadamore@1113 117 if (limit < buflen) {
mcimadamore@1113 118 int d = digit(bp, 16);
mcimadamore@1113 119 int code = d;
mcimadamore@1113 120 while (bp < limit && d >= 0) {
mcimadamore@1113 121 bp++; ch = buf[bp];
mcimadamore@1113 122 d = digit(bp, 16);
mcimadamore@1113 123 code = (code << 4) + d;
mcimadamore@1113 124 }
mcimadamore@1113 125 if (d >= 0) {
mcimadamore@1113 126 ch = (char)code;
mcimadamore@1113 127 unicodeConversionBp = bp;
mcimadamore@1113 128 return;
mcimadamore@1113 129 }
mcimadamore@1113 130 }
mcimadamore@1113 131 log.error(bp, "illegal.unicode.esc");
mcimadamore@1113 132 } else {
mcimadamore@1113 133 bp--;
mcimadamore@1113 134 ch = '\\';
mcimadamore@1113 135 }
mcimadamore@1113 136 }
mcimadamore@1113 137 }
mcimadamore@1113 138
mcimadamore@1113 139 /** Are surrogates supported?
mcimadamore@1113 140 */
mcimadamore@1113 141 final static boolean surrogatesSupported = surrogatesSupported();
mcimadamore@1113 142 private static boolean surrogatesSupported() {
mcimadamore@1113 143 try {
mcimadamore@1113 144 Character.isHighSurrogate('a');
mcimadamore@1113 145 return true;
mcimadamore@1113 146 } catch (NoSuchMethodError ex) {
mcimadamore@1113 147 return false;
mcimadamore@1113 148 }
mcimadamore@1113 149 }
mcimadamore@1113 150
mcimadamore@1113 151 /** Scan surrogate pairs. If 'ch' is a high surrogate and
mcimadamore@1113 152 * the next character is a low surrogate, then put the low
mcimadamore@1113 153 * surrogate in 'ch', and return the high surrogate.
mcimadamore@1113 154 * otherwise, just return 0.
mcimadamore@1113 155 */
mcimadamore@1113 156 protected char scanSurrogates() {
mcimadamore@1113 157 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
mcimadamore@1113 158 char high = ch;
mcimadamore@1113 159
mcimadamore@1113 160 scanChar();
mcimadamore@1113 161
mcimadamore@1113 162 if (Character.isLowSurrogate(ch)) {
mcimadamore@1113 163 return high;
mcimadamore@1113 164 }
mcimadamore@1113 165
mcimadamore@1113 166 ch = high;
mcimadamore@1113 167 }
mcimadamore@1113 168
mcimadamore@1113 169 return 0;
mcimadamore@1113 170 }
mcimadamore@1113 171
mcimadamore@1113 172 /** Convert an ASCII digit from its base (8, 10, or 16)
mcimadamore@1113 173 * to its value.
mcimadamore@1113 174 */
mcimadamore@1113 175 protected int digit(int pos, int base) {
mcimadamore@1113 176 char c = ch;
mcimadamore@1113 177 int result = Character.digit(c, base);
mcimadamore@1113 178 if (result >= 0 && c > 0x7f) {
mcimadamore@1113 179 log.error(pos + 1, "illegal.nonascii.digit");
mcimadamore@1113 180 ch = "0123456789abcdef".charAt(result);
mcimadamore@1113 181 }
mcimadamore@1113 182 return result;
mcimadamore@1113 183 }
mcimadamore@1113 184
mcimadamore@1113 185 protected boolean isUnicode() {
mcimadamore@1113 186 return unicodeConversionBp == bp;
mcimadamore@1113 187 }
mcimadamore@1113 188
mcimadamore@1113 189 protected void skipChar() {
mcimadamore@1113 190 bp++;
mcimadamore@1113 191 }
mcimadamore@1113 192
mcimadamore@1113 193 protected char peekChar() {
mcimadamore@1113 194 return buf[bp + 1];
mcimadamore@1113 195 }
mcimadamore@1113 196
mcimadamore@1113 197 /**
mcimadamore@1113 198 * Returns a copy of the input buffer, up to its inputLength.
mcimadamore@1113 199 * Unicode escape sequences are not translated.
mcimadamore@1113 200 */
mcimadamore@1113 201 public char[] getRawCharacters() {
mcimadamore@1113 202 char[] chars = new char[buflen];
mcimadamore@1113 203 System.arraycopy(buf, 0, chars, 0, buflen);
mcimadamore@1113 204 return chars;
mcimadamore@1113 205 }
mcimadamore@1113 206
mcimadamore@1113 207 /**
mcimadamore@1113 208 * Returns a copy of a character array subset of the input buffer.
mcimadamore@1113 209 * The returned array begins at the <code>beginIndex</code> and
mcimadamore@1113 210 * extends to the character at index <code>endIndex - 1</code>.
mcimadamore@1113 211 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
mcimadamore@1113 212 * This behavior is like
mcimadamore@1113 213 * <code>String.substring(beginIndex, endIndex)</code>.
mcimadamore@1113 214 * Unicode escape sequences are not translated.
mcimadamore@1113 215 *
mcimadamore@1113 216 * @param beginIndex the beginning index, inclusive.
mcimadamore@1113 217 * @param endIndex the ending index, exclusive.
mcimadamore@1113 218 * @throws IndexOutOfBounds if either offset is outside of the
mcimadamore@1113 219 * array bounds
mcimadamore@1113 220 */
mcimadamore@1113 221 public char[] getRawCharacters(int beginIndex, int endIndex) {
mcimadamore@1113 222 int length = endIndex - beginIndex;
mcimadamore@1113 223 char[] chars = new char[length];
mcimadamore@1113 224 System.arraycopy(buf, beginIndex, chars, 0, length);
mcimadamore@1113 225 return chars;
mcimadamore@1113 226 }
mcimadamore@1113 227 }

mercurial