Wed, 20 Jun 2012 13:23:26 -0700
7174143: encapsulate doc comment table
Reviewed-by: ksrini, mcimadamore
1 /*
2 * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import com.sun.tools.javac.file.JavacFileManager;
29 import com.sun.tools.javac.util.Log;
30 import com.sun.tools.javac.util.Name;
31 import com.sun.tools.javac.util.Names;
33 import java.nio.CharBuffer;
35 import static com.sun.tools.javac.util.LayoutCharacters.*;
37 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
38 * characters contained in the input stream, handling unicode escape accordingly.
39 * Additionally, it provide features for saving chars into a buffer and to retrieve
40 * them at a later stage.
41 *
42 * <p><b>This is NOT part of any supported API.
43 * If you write code that depends on this, you do so at your own risk.
44 * This code and its internal interfaces are subject to change or
45 * deletion without notice.</b>
46 */
47 public class UnicodeReader {
49 /** The input buffer, index of next character to be read,
50 * index of one past last character in buffer.
51 */
52 protected char[] buf;
53 protected int bp;
54 protected final int buflen;
56 /** The current character.
57 */
58 protected char ch;
60 /** The buffer index of the last converted unicode character
61 */
62 protected int unicodeConversionBp = -1;
64 protected Log log;
65 protected Names names;
67 /** A character buffer for saved chars.
68 */
69 protected char[] sbuf = new char[128];
70 protected int sp;
72 /**
73 * Create a scanner from the input array. This method might
74 * modify the array. To avoid copying the input array, ensure
75 * that {@code inputLength < input.length} or
76 * {@code input[input.length -1]} is a white space character.
77 *
78 * @param fac the factory which created this Scanner
79 * @param input the input, might be modified
80 * @param inputLength the size of the input.
81 * Must be positive and less than or equal to input.length.
82 */
83 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
84 this(sf, JavacFileManager.toArray(buffer), buffer.limit());
85 }
87 protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
88 log = sf.log;
89 names = sf.names;
90 if (inputLength == input.length) {
91 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
92 inputLength--;
93 } else {
94 char[] newInput = new char[inputLength + 1];
95 System.arraycopy(input, 0, newInput, 0, input.length);
96 input = newInput;
97 }
98 }
99 buf = input;
100 buflen = inputLength;
101 buf[buflen] = EOI;
102 bp = -1;
103 scanChar();
104 }
106 /** Read next character.
107 */
108 protected void scanChar() {
109 if (bp < buflen) {
110 ch = buf[++bp];
111 if (ch == '\\') {
112 convertUnicode();
113 }
114 }
115 }
117 /** Read next character in comment, skipping over double '\' characters.
118 */
119 protected void scanCommentChar() {
120 scanChar();
121 if (ch == '\\') {
122 if (peekChar() == '\\' && !isUnicode()) {
123 skipChar();
124 } else {
125 convertUnicode();
126 }
127 }
128 }
130 /** Append a character to sbuf.
131 */
132 protected void putChar(char ch, boolean scan) {
133 if (sp == sbuf.length) {
134 char[] newsbuf = new char[sbuf.length * 2];
135 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
136 sbuf = newsbuf;
137 }
138 sbuf[sp++] = ch;
139 if (scan)
140 scanChar();
141 }
143 protected void putChar(char ch) {
144 putChar(ch, false);
145 }
147 protected void putChar(boolean scan) {
148 putChar(ch, scan);
149 }
151 Name name() {
152 return names.fromChars(sbuf, 0, sp);
153 }
155 String chars() {
156 return new String(sbuf, 0, sp);
157 }
159 /** Convert unicode escape; bp points to initial '\' character
160 * (Spec 3.3).
161 */
162 protected void convertUnicode() {
163 if (ch == '\\' && unicodeConversionBp != bp) {
164 bp++; ch = buf[bp];
165 if (ch == 'u') {
166 do {
167 bp++; ch = buf[bp];
168 } while (ch == 'u');
169 int limit = bp + 3;
170 if (limit < buflen) {
171 int d = digit(bp, 16);
172 int code = d;
173 while (bp < limit && d >= 0) {
174 bp++; ch = buf[bp];
175 d = digit(bp, 16);
176 code = (code << 4) + d;
177 }
178 if (d >= 0) {
179 ch = (char)code;
180 unicodeConversionBp = bp;
181 return;
182 }
183 }
184 log.error(bp, "illegal.unicode.esc");
185 } else {
186 bp--;
187 ch = '\\';
188 }
189 }
190 }
192 /** Are surrogates supported?
193 */
194 final static boolean surrogatesSupported = surrogatesSupported();
195 private static boolean surrogatesSupported() {
196 try {
197 Character.isHighSurrogate('a');
198 return true;
199 } catch (NoSuchMethodError ex) {
200 return false;
201 }
202 }
204 /** Scan surrogate pairs. If 'ch' is a high surrogate and
205 * the next character is a low surrogate, then put the low
206 * surrogate in 'ch', and return the high surrogate.
207 * otherwise, just return 0.
208 */
209 protected char scanSurrogates() {
210 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
211 char high = ch;
213 scanChar();
215 if (Character.isLowSurrogate(ch)) {
216 return high;
217 }
219 ch = high;
220 }
222 return 0;
223 }
225 /** Convert an ASCII digit from its base (8, 10, or 16)
226 * to its value.
227 */
228 protected int digit(int pos, int base) {
229 char c = ch;
230 int result = Character.digit(c, base);
231 if (result >= 0 && c > 0x7f) {
232 log.error(pos + 1, "illegal.nonascii.digit");
233 ch = "0123456789abcdef".charAt(result);
234 }
235 return result;
236 }
238 protected boolean isUnicode() {
239 return unicodeConversionBp == bp;
240 }
242 protected void skipChar() {
243 bp++;
244 }
246 protected char peekChar() {
247 return buf[bp + 1];
248 }
250 /**
251 * Returns a copy of the input buffer, up to its inputLength.
252 * Unicode escape sequences are not translated.
253 */
254 public char[] getRawCharacters() {
255 char[] chars = new char[buflen];
256 System.arraycopy(buf, 0, chars, 0, buflen);
257 return chars;
258 }
260 /**
261 * Returns a copy of a character array subset of the input buffer.
262 * The returned array begins at the <code>beginIndex</code> and
263 * extends to the character at index <code>endIndex - 1</code>.
264 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
265 * This behavior is like
266 * <code>String.substring(beginIndex, endIndex)</code>.
267 * Unicode escape sequences are not translated.
268 *
269 * @param beginIndex the beginning index, inclusive.
270 * @param endIndex the ending index, exclusive.
271 * @throws IndexOutOfBounds if either offset is outside of the
272 * array bounds
273 */
274 public char[] getRawCharacters(int beginIndex, int endIndex) {
275 int length = endIndex - beginIndex;
276 char[] chars = new char[length];
277 System.arraycopy(buf, beginIndex, chars, 0, length);
278 return chars;
279 }
280 }