src/share/classes/com/sun/tools/javac/parser/UnicodeReader.java

Fri, 04 Nov 2011 12:36:40 +0000

author
mcimadamore
date
Fri, 04 Nov 2011 12:36:40 +0000
changeset 1125
56830d5cb5bb
parent 1113
d346ab55031b
child 1339
0e5899f09dab
permissions
-rw-r--r--

7104201: Refactor DocCommentScanner
Summary: Add new Comment helper class to parse contents of comments in source code
Reviewed-by: jjg

mcimadamore@1113 1 /*
mcimadamore@1113 2 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
mcimadamore@1113 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
mcimadamore@1113 4 *
mcimadamore@1113 5 * This code is free software; you can redistribute it and/or modify it
mcimadamore@1113 6 * under the terms of the GNU General Public License version 2 only, as
mcimadamore@1113 7 * published by the Free Software Foundation. Oracle designates this
mcimadamore@1113 8 * particular file as subject to the "Classpath" exception as provided
mcimadamore@1113 9 * by Oracle in the LICENSE file that accompanied this code.
mcimadamore@1113 10 *
mcimadamore@1113 11 * This code is distributed in the hope that it will be useful, but WITHOUT
mcimadamore@1113 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
mcimadamore@1113 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
mcimadamore@1113 14 * version 2 for more details (a copy is included in the LICENSE file that
mcimadamore@1113 15 * accompanied this code).
mcimadamore@1113 16 *
mcimadamore@1113 17 * You should have received a copy of the GNU General Public License version
mcimadamore@1113 18 * 2 along with this work; if not, write to the Free Software Foundation,
mcimadamore@1113 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
mcimadamore@1113 20 *
mcimadamore@1113 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
mcimadamore@1113 22 * or visit www.oracle.com if you need additional information or have any
mcimadamore@1113 23 * questions.
mcimadamore@1113 24 */
mcimadamore@1113 25
mcimadamore@1113 26 package com.sun.tools.javac.parser;
mcimadamore@1113 27
mcimadamore@1113 28 import com.sun.tools.javac.file.JavacFileManager;
mcimadamore@1125 29 import com.sun.tools.javac.util.Log;
mcimadamore@1125 30 import com.sun.tools.javac.util.Name;
mcimadamore@1125 31 import com.sun.tools.javac.util.Names;
mcimadamore@1125 32
mcimadamore@1113 33 import java.nio.CharBuffer;
mcimadamore@1125 34
mcimadamore@1113 35 import static com.sun.tools.javac.util.LayoutCharacters.*;
mcimadamore@1113 36
mcimadamore@1113 37 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
mcimadamore@1113 38 * characters contained in the input stream, handling unicode escape accordingly.
mcimadamore@1113 39 * Additionally, it provide features for saving chars into a buffer and to retrieve
mcimadamore@1113 40 * them at a later stage.
mcimadamore@1113 41 *
mcimadamore@1113 42 * <p><b>This is NOT part of any supported API.
mcimadamore@1113 43 * If you write code that depends on this, you do so at your own risk.
mcimadamore@1113 44 * This code and its internal interfaces are subject to change or
mcimadamore@1113 45 * deletion without notice.</b>
mcimadamore@1113 46 */
mcimadamore@1113 47 public class UnicodeReader {
mcimadamore@1113 48
mcimadamore@1113 49 /** The input buffer, index of next character to be read,
mcimadamore@1113 50 * index of one past last character in buffer.
mcimadamore@1113 51 */
mcimadamore@1113 52 protected char[] buf;
mcimadamore@1113 53 protected int bp;
mcimadamore@1113 54 protected final int buflen;
mcimadamore@1113 55
mcimadamore@1113 56 /** The current character.
mcimadamore@1113 57 */
mcimadamore@1113 58 protected char ch;
mcimadamore@1113 59
mcimadamore@1113 60 /** The buffer index of the last converted unicode character
mcimadamore@1113 61 */
mcimadamore@1113 62 protected int unicodeConversionBp = -1;
mcimadamore@1113 63
mcimadamore@1113 64 protected Log log;
mcimadamore@1125 65 protected Names names;
mcimadamore@1125 66
mcimadamore@1125 67 /** A character buffer for saved chars.
mcimadamore@1125 68 */
mcimadamore@1125 69 protected char[] sbuf = new char[128];
mcimadamore@1125 70 protected int sp;
mcimadamore@1113 71
mcimadamore@1113 72 /**
mcimadamore@1113 73 * Create a scanner from the input array. This method might
mcimadamore@1113 74 * modify the array. To avoid copying the input array, ensure
mcimadamore@1113 75 * that {@code inputLength < input.length} or
mcimadamore@1113 76 * {@code input[input.length -1]} is a white space character.
mcimadamore@1113 77 *
mcimadamore@1113 78 * @param fac the factory which created this Scanner
mcimadamore@1113 79 * @param input the input, might be modified
mcimadamore@1113 80 * @param inputLength the size of the input.
mcimadamore@1113 81 * Must be positive and less than or equal to input.length.
mcimadamore@1113 82 */
mcimadamore@1113 83 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
mcimadamore@1113 84 this(sf, JavacFileManager.toArray(buffer), buffer.limit());
mcimadamore@1113 85 }
mcimadamore@1113 86
mcimadamore@1113 87 protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
mcimadamore@1113 88 log = sf.log;
mcimadamore@1125 89 names = sf.names;
mcimadamore@1113 90 if (inputLength == input.length) {
mcimadamore@1113 91 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
mcimadamore@1113 92 inputLength--;
mcimadamore@1113 93 } else {
mcimadamore@1113 94 char[] newInput = new char[inputLength + 1];
mcimadamore@1113 95 System.arraycopy(input, 0, newInput, 0, input.length);
mcimadamore@1113 96 input = newInput;
mcimadamore@1113 97 }
mcimadamore@1113 98 }
mcimadamore@1113 99 buf = input;
mcimadamore@1113 100 buflen = inputLength;
mcimadamore@1113 101 buf[buflen] = EOI;
mcimadamore@1113 102 bp = -1;
mcimadamore@1113 103 scanChar();
mcimadamore@1113 104 }
mcimadamore@1113 105
mcimadamore@1113 106 /** Read next character.
mcimadamore@1113 107 */
mcimadamore@1113 108 protected void scanChar() {
mcimadamore@1113 109 if (bp < buflen) {
mcimadamore@1113 110 ch = buf[++bp];
mcimadamore@1113 111 if (ch == '\\') {
mcimadamore@1113 112 convertUnicode();
mcimadamore@1113 113 }
mcimadamore@1113 114 }
mcimadamore@1113 115 }
mcimadamore@1113 116
mcimadamore@1125 117 /** Read next character in comment, skipping over double '\' characters.
mcimadamore@1125 118 */
mcimadamore@1125 119 protected void scanCommentChar() {
mcimadamore@1125 120 scanChar();
mcimadamore@1125 121 if (ch == '\\') {
mcimadamore@1125 122 if (peekChar() == '\\' && !isUnicode()) {
mcimadamore@1125 123 skipChar();
mcimadamore@1125 124 } else {
mcimadamore@1125 125 convertUnicode();
mcimadamore@1125 126 }
mcimadamore@1125 127 }
mcimadamore@1125 128 }
mcimadamore@1125 129
mcimadamore@1125 130 /** Append a character to sbuf.
mcimadamore@1125 131 */
mcimadamore@1125 132 protected void putChar(char ch, boolean scan) {
mcimadamore@1125 133 if (sp == sbuf.length) {
mcimadamore@1125 134 char[] newsbuf = new char[sbuf.length * 2];
mcimadamore@1125 135 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
mcimadamore@1125 136 sbuf = newsbuf;
mcimadamore@1125 137 }
mcimadamore@1125 138 sbuf[sp++] = ch;
mcimadamore@1125 139 if (scan)
mcimadamore@1125 140 scanChar();
mcimadamore@1125 141 }
mcimadamore@1125 142
mcimadamore@1125 143 protected void putChar(char ch) {
mcimadamore@1125 144 putChar(ch, false);
mcimadamore@1125 145 }
mcimadamore@1125 146
mcimadamore@1125 147 protected void putChar(boolean scan) {
mcimadamore@1125 148 putChar(ch, scan);
mcimadamore@1125 149 }
mcimadamore@1125 150
mcimadamore@1125 151 Name name() {
mcimadamore@1125 152 return names.fromChars(sbuf, 0, sp);
mcimadamore@1125 153 }
mcimadamore@1125 154
mcimadamore@1125 155 String chars() {
mcimadamore@1125 156 return new String(sbuf, 0, sp);
mcimadamore@1125 157 }
mcimadamore@1125 158
mcimadamore@1113 159 /** Convert unicode escape; bp points to initial '\' character
mcimadamore@1113 160 * (Spec 3.3).
mcimadamore@1113 161 */
mcimadamore@1113 162 protected void convertUnicode() {
mcimadamore@1113 163 if (ch == '\\' && unicodeConversionBp != bp) {
mcimadamore@1113 164 bp++; ch = buf[bp];
mcimadamore@1113 165 if (ch == 'u') {
mcimadamore@1113 166 do {
mcimadamore@1113 167 bp++; ch = buf[bp];
mcimadamore@1113 168 } while (ch == 'u');
mcimadamore@1113 169 int limit = bp + 3;
mcimadamore@1113 170 if (limit < buflen) {
mcimadamore@1113 171 int d = digit(bp, 16);
mcimadamore@1113 172 int code = d;
mcimadamore@1113 173 while (bp < limit && d >= 0) {
mcimadamore@1113 174 bp++; ch = buf[bp];
mcimadamore@1113 175 d = digit(bp, 16);
mcimadamore@1113 176 code = (code << 4) + d;
mcimadamore@1113 177 }
mcimadamore@1113 178 if (d >= 0) {
mcimadamore@1113 179 ch = (char)code;
mcimadamore@1113 180 unicodeConversionBp = bp;
mcimadamore@1113 181 return;
mcimadamore@1113 182 }
mcimadamore@1113 183 }
mcimadamore@1113 184 log.error(bp, "illegal.unicode.esc");
mcimadamore@1113 185 } else {
mcimadamore@1113 186 bp--;
mcimadamore@1113 187 ch = '\\';
mcimadamore@1113 188 }
mcimadamore@1113 189 }
mcimadamore@1113 190 }
mcimadamore@1113 191
mcimadamore@1113 192 /** Are surrogates supported?
mcimadamore@1113 193 */
mcimadamore@1113 194 final static boolean surrogatesSupported = surrogatesSupported();
mcimadamore@1113 195 private static boolean surrogatesSupported() {
mcimadamore@1113 196 try {
mcimadamore@1113 197 Character.isHighSurrogate('a');
mcimadamore@1113 198 return true;
mcimadamore@1113 199 } catch (NoSuchMethodError ex) {
mcimadamore@1113 200 return false;
mcimadamore@1113 201 }
mcimadamore@1113 202 }
mcimadamore@1113 203
mcimadamore@1113 204 /** Scan surrogate pairs. If 'ch' is a high surrogate and
mcimadamore@1113 205 * the next character is a low surrogate, then put the low
mcimadamore@1113 206 * surrogate in 'ch', and return the high surrogate.
mcimadamore@1113 207 * otherwise, just return 0.
mcimadamore@1113 208 */
mcimadamore@1113 209 protected char scanSurrogates() {
mcimadamore@1113 210 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
mcimadamore@1113 211 char high = ch;
mcimadamore@1113 212
mcimadamore@1113 213 scanChar();
mcimadamore@1113 214
mcimadamore@1113 215 if (Character.isLowSurrogate(ch)) {
mcimadamore@1113 216 return high;
mcimadamore@1113 217 }
mcimadamore@1113 218
mcimadamore@1113 219 ch = high;
mcimadamore@1113 220 }
mcimadamore@1113 221
mcimadamore@1113 222 return 0;
mcimadamore@1113 223 }
mcimadamore@1113 224
mcimadamore@1113 225 /** Convert an ASCII digit from its base (8, 10, or 16)
mcimadamore@1113 226 * to its value.
mcimadamore@1113 227 */
mcimadamore@1113 228 protected int digit(int pos, int base) {
mcimadamore@1113 229 char c = ch;
mcimadamore@1113 230 int result = Character.digit(c, base);
mcimadamore@1113 231 if (result >= 0 && c > 0x7f) {
mcimadamore@1113 232 log.error(pos + 1, "illegal.nonascii.digit");
mcimadamore@1113 233 ch = "0123456789abcdef".charAt(result);
mcimadamore@1113 234 }
mcimadamore@1113 235 return result;
mcimadamore@1113 236 }
mcimadamore@1113 237
mcimadamore@1113 238 protected boolean isUnicode() {
mcimadamore@1113 239 return unicodeConversionBp == bp;
mcimadamore@1113 240 }
mcimadamore@1113 241
mcimadamore@1113 242 protected void skipChar() {
mcimadamore@1113 243 bp++;
mcimadamore@1113 244 }
mcimadamore@1113 245
mcimadamore@1113 246 protected char peekChar() {
mcimadamore@1113 247 return buf[bp + 1];
mcimadamore@1113 248 }
mcimadamore@1113 249
mcimadamore@1113 250 /**
mcimadamore@1113 251 * Returns a copy of the input buffer, up to its inputLength.
mcimadamore@1113 252 * Unicode escape sequences are not translated.
mcimadamore@1113 253 */
mcimadamore@1113 254 public char[] getRawCharacters() {
mcimadamore@1113 255 char[] chars = new char[buflen];
mcimadamore@1113 256 System.arraycopy(buf, 0, chars, 0, buflen);
mcimadamore@1113 257 return chars;
mcimadamore@1113 258 }
mcimadamore@1113 259
mcimadamore@1113 260 /**
mcimadamore@1113 261 * Returns a copy of a character array subset of the input buffer.
mcimadamore@1113 262 * The returned array begins at the <code>beginIndex</code> and
mcimadamore@1113 263 * extends to the character at index <code>endIndex - 1</code>.
mcimadamore@1113 264 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
mcimadamore@1113 265 * This behavior is like
mcimadamore@1113 266 * <code>String.substring(beginIndex, endIndex)</code>.
mcimadamore@1113 267 * Unicode escape sequences are not translated.
mcimadamore@1113 268 *
mcimadamore@1113 269 * @param beginIndex the beginning index, inclusive.
mcimadamore@1113 270 * @param endIndex the ending index, exclusive.
mcimadamore@1113 271 * @throws IndexOutOfBounds if either offset is outside of the
mcimadamore@1113 272 * array bounds
mcimadamore@1113 273 */
mcimadamore@1113 274 public char[] getRawCharacters(int beginIndex, int endIndex) {
mcimadamore@1113 275 int length = endIndex - beginIndex;
mcimadamore@1113 276 char[] chars = new char[length];
mcimadamore@1113 277 System.arraycopy(buf, beginIndex, chars, 0, length);
mcimadamore@1113 278 return chars;
mcimadamore@1113 279 }
mcimadamore@1113 280 }

mercurial