src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java

Sun, 17 Feb 2013 16:44:55 -0500

author
dholmes
date
Sun, 17 Feb 2013 16:44:55 -0500
changeset 1571
af8417e590f4
parent 1521
71f35e4b93a5
child 2525
2eb010b6cb22
permissions
-rw-r--r--

Merge

mcimadamore@1113 1 /*
jjg@1521 2 * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
mcimadamore@1113 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
mcimadamore@1113 4 *
mcimadamore@1113 5 * This code is free software; you can redistribute it and/or modify it
mcimadamore@1113 6 * under the terms of the GNU General Public License version 2 only, as
mcimadamore@1113 7 * published by the Free Software Foundation. Oracle designates this
mcimadamore@1113 8 * particular file as subject to the "Classpath" exception as provided
mcimadamore@1113 9 * by Oracle in the LICENSE file that accompanied this code.
mcimadamore@1113 10 *
mcimadamore@1113 11 * This code is distributed in the hope that it will be useful, but WITHOUT
mcimadamore@1113 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
mcimadamore@1113 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
mcimadamore@1113 14 * version 2 for more details (a copy is included in the LICENSE file that
mcimadamore@1113 15 * accompanied this code).
mcimadamore@1113 16 *
mcimadamore@1113 17 * You should have received a copy of the GNU General Public License version
mcimadamore@1113 18 * 2 along with this work; if not, write to the Free Software Foundation,
mcimadamore@1113 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
mcimadamore@1113 20 *
mcimadamore@1113 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
mcimadamore@1113 22 * or visit www.oracle.com if you need additional information or have any
mcimadamore@1113 23 * questions.
mcimadamore@1113 24 */
mcimadamore@1113 25
mcimadamore@1113 26 package com.sun.tools.javac.parser;
mcimadamore@1113 27
jjg@1339 28 import java.nio.CharBuffer;
jjg@1339 29 import java.util.Arrays;
jjg@1339 30
mcimadamore@1113 31 import com.sun.tools.javac.file.JavacFileManager;
jjg@1339 32 import com.sun.tools.javac.util.ArrayUtils;
mcimadamore@1125 33 import com.sun.tools.javac.util.Log;
mcimadamore@1125 34 import com.sun.tools.javac.util.Name;
mcimadamore@1125 35 import com.sun.tools.javac.util.Names;
mcimadamore@1125 36
mcimadamore@1113 37 import static com.sun.tools.javac.util.LayoutCharacters.*;
mcimadamore@1113 38
mcimadamore@1113 39 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
mcimadamore@1113 40 * characters contained in the input stream, handling unicode escape accordingly.
jjg@1521 41 * Additionally, it provides features for saving chars into a buffer and to retrieve
mcimadamore@1113 42 * them at a later stage.
mcimadamore@1113 43 *
mcimadamore@1113 44 * <p><b>This is NOT part of any supported API.
mcimadamore@1113 45 * If you write code that depends on this, you do so at your own risk.
mcimadamore@1113 46 * This code and its internal interfaces are subject to change or
mcimadamore@1113 47 * deletion without notice.</b>
mcimadamore@1113 48 */
mcimadamore@1113 49 public class UnicodeReader {
mcimadamore@1113 50
mcimadamore@1113 51 /** The input buffer, index of next character to be read,
mcimadamore@1113 52 * index of one past last character in buffer.
mcimadamore@1113 53 */
mcimadamore@1113 54 protected char[] buf;
mcimadamore@1113 55 protected int bp;
mcimadamore@1113 56 protected final int buflen;
mcimadamore@1113 57
mcimadamore@1113 58 /** The current character.
mcimadamore@1113 59 */
mcimadamore@1113 60 protected char ch;
mcimadamore@1113 61
mcimadamore@1113 62 /** The buffer index of the last converted unicode character
mcimadamore@1113 63 */
mcimadamore@1113 64 protected int unicodeConversionBp = -1;
mcimadamore@1113 65
mcimadamore@1113 66 protected Log log;
mcimadamore@1125 67 protected Names names;
mcimadamore@1125 68
mcimadamore@1125 69 /** A character buffer for saved chars.
mcimadamore@1125 70 */
mcimadamore@1125 71 protected char[] sbuf = new char[128];
mcimadamore@1125 72 protected int sp;
mcimadamore@1113 73
mcimadamore@1113 74 /**
mcimadamore@1113 75 * Create a scanner from the input array. This method might
mcimadamore@1113 76 * modify the array. To avoid copying the input array, ensure
mcimadamore@1113 77 * that {@code inputLength < input.length} or
mcimadamore@1113 78 * {@code input[input.length -1]} is a white space character.
mcimadamore@1113 79 *
jjg@1358 80 * @param sf the factory which created this Scanner
jjg@1358 81 * @param buffer the input, might be modified
mcimadamore@1113 82 * Must be positive and less than or equal to input.length.
mcimadamore@1113 83 */
mcimadamore@1113 84 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
mcimadamore@1113 85 this(sf, JavacFileManager.toArray(buffer), buffer.limit());
mcimadamore@1113 86 }
mcimadamore@1113 87
mcimadamore@1113 88 protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
mcimadamore@1113 89 log = sf.log;
mcimadamore@1125 90 names = sf.names;
mcimadamore@1113 91 if (inputLength == input.length) {
mcimadamore@1113 92 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
mcimadamore@1113 93 inputLength--;
mcimadamore@1113 94 } else {
jjg@1339 95 input = Arrays.copyOf(input, inputLength + 1);
mcimadamore@1113 96 }
mcimadamore@1113 97 }
mcimadamore@1113 98 buf = input;
mcimadamore@1113 99 buflen = inputLength;
mcimadamore@1113 100 buf[buflen] = EOI;
mcimadamore@1113 101 bp = -1;
mcimadamore@1113 102 scanChar();
mcimadamore@1113 103 }
mcimadamore@1113 104
mcimadamore@1113 105 /** Read next character.
mcimadamore@1113 106 */
mcimadamore@1113 107 protected void scanChar() {
mcimadamore@1113 108 if (bp < buflen) {
mcimadamore@1113 109 ch = buf[++bp];
mcimadamore@1113 110 if (ch == '\\') {
mcimadamore@1113 111 convertUnicode();
mcimadamore@1113 112 }
mcimadamore@1113 113 }
mcimadamore@1113 114 }
mcimadamore@1113 115
mcimadamore@1125 116 /** Read next character in comment, skipping over double '\' characters.
mcimadamore@1125 117 */
mcimadamore@1125 118 protected void scanCommentChar() {
mcimadamore@1125 119 scanChar();
mcimadamore@1125 120 if (ch == '\\') {
mcimadamore@1125 121 if (peekChar() == '\\' && !isUnicode()) {
mcimadamore@1125 122 skipChar();
mcimadamore@1125 123 } else {
mcimadamore@1125 124 convertUnicode();
mcimadamore@1125 125 }
mcimadamore@1125 126 }
mcimadamore@1125 127 }
mcimadamore@1125 128
mcimadamore@1125 129 /** Append a character to sbuf.
mcimadamore@1125 130 */
mcimadamore@1125 131 protected void putChar(char ch, boolean scan) {
jjg@1339 132 sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
mcimadamore@1125 133 sbuf[sp++] = ch;
mcimadamore@1125 134 if (scan)
mcimadamore@1125 135 scanChar();
mcimadamore@1125 136 }
mcimadamore@1125 137
mcimadamore@1125 138 protected void putChar(char ch) {
mcimadamore@1125 139 putChar(ch, false);
mcimadamore@1125 140 }
mcimadamore@1125 141
mcimadamore@1125 142 protected void putChar(boolean scan) {
mcimadamore@1125 143 putChar(ch, scan);
mcimadamore@1125 144 }
mcimadamore@1125 145
mcimadamore@1125 146 Name name() {
mcimadamore@1125 147 return names.fromChars(sbuf, 0, sp);
mcimadamore@1125 148 }
mcimadamore@1125 149
mcimadamore@1125 150 String chars() {
mcimadamore@1125 151 return new String(sbuf, 0, sp);
mcimadamore@1125 152 }
mcimadamore@1125 153
mcimadamore@1113 154 /** Convert unicode escape; bp points to initial '\' character
mcimadamore@1113 155 * (Spec 3.3).
mcimadamore@1113 156 */
mcimadamore@1113 157 protected void convertUnicode() {
mcimadamore@1113 158 if (ch == '\\' && unicodeConversionBp != bp) {
mcimadamore@1113 159 bp++; ch = buf[bp];
mcimadamore@1113 160 if (ch == 'u') {
mcimadamore@1113 161 do {
mcimadamore@1113 162 bp++; ch = buf[bp];
mcimadamore@1113 163 } while (ch == 'u');
mcimadamore@1113 164 int limit = bp + 3;
mcimadamore@1113 165 if (limit < buflen) {
mcimadamore@1113 166 int d = digit(bp, 16);
mcimadamore@1113 167 int code = d;
mcimadamore@1113 168 while (bp < limit && d >= 0) {
mcimadamore@1113 169 bp++; ch = buf[bp];
mcimadamore@1113 170 d = digit(bp, 16);
mcimadamore@1113 171 code = (code << 4) + d;
mcimadamore@1113 172 }
mcimadamore@1113 173 if (d >= 0) {
mcimadamore@1113 174 ch = (char)code;
mcimadamore@1113 175 unicodeConversionBp = bp;
mcimadamore@1113 176 return;
mcimadamore@1113 177 }
mcimadamore@1113 178 }
mcimadamore@1113 179 log.error(bp, "illegal.unicode.esc");
mcimadamore@1113 180 } else {
mcimadamore@1113 181 bp--;
mcimadamore@1113 182 ch = '\\';
mcimadamore@1113 183 }
mcimadamore@1113 184 }
mcimadamore@1113 185 }
mcimadamore@1113 186
mcimadamore@1113 187 /** Are surrogates supported?
mcimadamore@1113 188 */
mcimadamore@1113 189 final static boolean surrogatesSupported = surrogatesSupported();
mcimadamore@1113 190 private static boolean surrogatesSupported() {
mcimadamore@1113 191 try {
mcimadamore@1113 192 Character.isHighSurrogate('a');
mcimadamore@1113 193 return true;
mcimadamore@1113 194 } catch (NoSuchMethodError ex) {
mcimadamore@1113 195 return false;
mcimadamore@1113 196 }
mcimadamore@1113 197 }
mcimadamore@1113 198
mcimadamore@1113 199 /** Scan surrogate pairs. If 'ch' is a high surrogate and
mcimadamore@1113 200 * the next character is a low surrogate, then put the low
mcimadamore@1113 201 * surrogate in 'ch', and return the high surrogate.
mcimadamore@1113 202 * otherwise, just return 0.
mcimadamore@1113 203 */
mcimadamore@1113 204 protected char scanSurrogates() {
mcimadamore@1113 205 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
mcimadamore@1113 206 char high = ch;
mcimadamore@1113 207
mcimadamore@1113 208 scanChar();
mcimadamore@1113 209
mcimadamore@1113 210 if (Character.isLowSurrogate(ch)) {
mcimadamore@1113 211 return high;
mcimadamore@1113 212 }
mcimadamore@1113 213
mcimadamore@1113 214 ch = high;
mcimadamore@1113 215 }
mcimadamore@1113 216
mcimadamore@1113 217 return 0;
mcimadamore@1113 218 }
mcimadamore@1113 219
mcimadamore@1113 220 /** Convert an ASCII digit from its base (8, 10, or 16)
mcimadamore@1113 221 * to its value.
mcimadamore@1113 222 */
mcimadamore@1113 223 protected int digit(int pos, int base) {
mcimadamore@1113 224 char c = ch;
mcimadamore@1113 225 int result = Character.digit(c, base);
mcimadamore@1113 226 if (result >= 0 && c > 0x7f) {
mcimadamore@1113 227 log.error(pos + 1, "illegal.nonascii.digit");
mcimadamore@1113 228 ch = "0123456789abcdef".charAt(result);
mcimadamore@1113 229 }
mcimadamore@1113 230 return result;
mcimadamore@1113 231 }
mcimadamore@1113 232
mcimadamore@1113 233 protected boolean isUnicode() {
mcimadamore@1113 234 return unicodeConversionBp == bp;
mcimadamore@1113 235 }
mcimadamore@1113 236
mcimadamore@1113 237 protected void skipChar() {
mcimadamore@1113 238 bp++;
mcimadamore@1113 239 }
mcimadamore@1113 240
mcimadamore@1113 241 protected char peekChar() {
mcimadamore@1113 242 return buf[bp + 1];
mcimadamore@1113 243 }
mcimadamore@1113 244
mcimadamore@1113 245 /**
mcimadamore@1113 246 * Returns a copy of the input buffer, up to its inputLength.
mcimadamore@1113 247 * Unicode escape sequences are not translated.
mcimadamore@1113 248 */
mcimadamore@1113 249 public char[] getRawCharacters() {
mcimadamore@1113 250 char[] chars = new char[buflen];
mcimadamore@1113 251 System.arraycopy(buf, 0, chars, 0, buflen);
mcimadamore@1113 252 return chars;
mcimadamore@1113 253 }
mcimadamore@1113 254
mcimadamore@1113 255 /**
mcimadamore@1113 256 * Returns a copy of a character array subset of the input buffer.
jjg@1358 257 * The returned array begins at the {@code beginIndex} and
jjg@1358 258 * extends to the character at index {@code endIndex - 1}.
jjg@1358 259 * Thus the length of the substring is {@code endIndex-beginIndex}.
mcimadamore@1113 260 * This behavior is like
jjg@1358 261 * {@code String.substring(beginIndex, endIndex)}.
mcimadamore@1113 262 * Unicode escape sequences are not translated.
mcimadamore@1113 263 *
mcimadamore@1113 264 * @param beginIndex the beginning index, inclusive.
mcimadamore@1113 265 * @param endIndex the ending index, exclusive.
jjg@1358 266 * @throws ArrayIndexOutOfBoundsException if either offset is outside of the
mcimadamore@1113 267 * array bounds
mcimadamore@1113 268 */
mcimadamore@1113 269 public char[] getRawCharacters(int beginIndex, int endIndex) {
mcimadamore@1113 270 int length = endIndex - beginIndex;
mcimadamore@1113 271 char[] chars = new char[length];
mcimadamore@1113 272 System.arraycopy(buf, beginIndex, chars, 0, length);
mcimadamore@1113 273 return chars;
mcimadamore@1113 274 }
mcimadamore@1113 275 }

mercurial