Tue, 25 Sep 2012 13:06:58 -0700
7193657: provide internal ArrayUtils class to simplify common usage of arrays in javac
Reviewed-by: mcimadamore, jjg
Contributed-by: vicenterz@yahoo.es
1 /*
2 * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.nio.CharBuffer;
29 import java.util.Arrays;
31 import com.sun.tools.javac.file.JavacFileManager;
32 import com.sun.tools.javac.util.ArrayUtils;
33 import com.sun.tools.javac.util.Log;
34 import com.sun.tools.javac.util.Name;
35 import com.sun.tools.javac.util.Names;
37 import static com.sun.tools.javac.util.LayoutCharacters.*;
39 /** The char reader used by the javac lexer/tokenizer. Returns the sequence of
40 * characters contained in the input stream, handling unicode escape accordingly.
41 * Additionally, it provide features for saving chars into a buffer and to retrieve
42 * them at a later stage.
43 *
44 * <p><b>This is NOT part of any supported API.
45 * If you write code that depends on this, you do so at your own risk.
46 * This code and its internal interfaces are subject to change or
47 * deletion without notice.</b>
48 */
49 public class UnicodeReader {
51 /** The input buffer, index of next character to be read,
52 * index of one past last character in buffer.
53 */
54 protected char[] buf;
55 protected int bp;
56 protected final int buflen;
58 /** The current character.
59 */
60 protected char ch;
62 /** The buffer index of the last converted unicode character
63 */
64 protected int unicodeConversionBp = -1;
66 protected Log log;
67 protected Names names;
69 /** A character buffer for saved chars.
70 */
71 protected char[] sbuf = new char[128];
72 protected int sp;
74 /**
75 * Create a scanner from the input array. This method might
76 * modify the array. To avoid copying the input array, ensure
77 * that {@code inputLength < input.length} or
78 * {@code input[input.length -1]} is a white space character.
79 *
80 * @param fac the factory which created this Scanner
81 * @param input the input, might be modified
82 * @param inputLength the size of the input.
83 * Must be positive and less than or equal to input.length.
84 */
85 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
86 this(sf, JavacFileManager.toArray(buffer), buffer.limit());
87 }
89 protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
90 log = sf.log;
91 names = sf.names;
92 if (inputLength == input.length) {
93 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
94 inputLength--;
95 } else {
96 input = Arrays.copyOf(input, inputLength + 1);
97 }
98 }
99 buf = input;
100 buflen = inputLength;
101 buf[buflen] = EOI;
102 bp = -1;
103 scanChar();
104 }
106 /** Read next character.
107 */
108 protected void scanChar() {
109 if (bp < buflen) {
110 ch = buf[++bp];
111 if (ch == '\\') {
112 convertUnicode();
113 }
114 }
115 }
117 /** Read next character in comment, skipping over double '\' characters.
118 */
119 protected void scanCommentChar() {
120 scanChar();
121 if (ch == '\\') {
122 if (peekChar() == '\\' && !isUnicode()) {
123 skipChar();
124 } else {
125 convertUnicode();
126 }
127 }
128 }
130 /** Append a character to sbuf.
131 */
132 protected void putChar(char ch, boolean scan) {
133 sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
134 sbuf[sp++] = ch;
135 if (scan)
136 scanChar();
137 }
139 protected void putChar(char ch) {
140 putChar(ch, false);
141 }
143 protected void putChar(boolean scan) {
144 putChar(ch, scan);
145 }
147 Name name() {
148 return names.fromChars(sbuf, 0, sp);
149 }
151 String chars() {
152 return new String(sbuf, 0, sp);
153 }
155 /** Convert unicode escape; bp points to initial '\' character
156 * (Spec 3.3).
157 */
158 protected void convertUnicode() {
159 if (ch == '\\' && unicodeConversionBp != bp) {
160 bp++; ch = buf[bp];
161 if (ch == 'u') {
162 do {
163 bp++; ch = buf[bp];
164 } while (ch == 'u');
165 int limit = bp + 3;
166 if (limit < buflen) {
167 int d = digit(bp, 16);
168 int code = d;
169 while (bp < limit && d >= 0) {
170 bp++; ch = buf[bp];
171 d = digit(bp, 16);
172 code = (code << 4) + d;
173 }
174 if (d >= 0) {
175 ch = (char)code;
176 unicodeConversionBp = bp;
177 return;
178 }
179 }
180 log.error(bp, "illegal.unicode.esc");
181 } else {
182 bp--;
183 ch = '\\';
184 }
185 }
186 }
188 /** Are surrogates supported?
189 */
190 final static boolean surrogatesSupported = surrogatesSupported();
191 private static boolean surrogatesSupported() {
192 try {
193 Character.isHighSurrogate('a');
194 return true;
195 } catch (NoSuchMethodError ex) {
196 return false;
197 }
198 }
200 /** Scan surrogate pairs. If 'ch' is a high surrogate and
201 * the next character is a low surrogate, then put the low
202 * surrogate in 'ch', and return the high surrogate.
203 * otherwise, just return 0.
204 */
205 protected char scanSurrogates() {
206 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
207 char high = ch;
209 scanChar();
211 if (Character.isLowSurrogate(ch)) {
212 return high;
213 }
215 ch = high;
216 }
218 return 0;
219 }
221 /** Convert an ASCII digit from its base (8, 10, or 16)
222 * to its value.
223 */
224 protected int digit(int pos, int base) {
225 char c = ch;
226 int result = Character.digit(c, base);
227 if (result >= 0 && c > 0x7f) {
228 log.error(pos + 1, "illegal.nonascii.digit");
229 ch = "0123456789abcdef".charAt(result);
230 }
231 return result;
232 }
234 protected boolean isUnicode() {
235 return unicodeConversionBp == bp;
236 }
238 protected void skipChar() {
239 bp++;
240 }
242 protected char peekChar() {
243 return buf[bp + 1];
244 }
246 /**
247 * Returns a copy of the input buffer, up to its inputLength.
248 * Unicode escape sequences are not translated.
249 */
250 public char[] getRawCharacters() {
251 char[] chars = new char[buflen];
252 System.arraycopy(buf, 0, chars, 0, buflen);
253 return chars;
254 }
256 /**
257 * Returns a copy of a character array subset of the input buffer.
258 * The returned array begins at the <code>beginIndex</code> and
259 * extends to the character at index <code>endIndex - 1</code>.
260 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
261 * This behavior is like
262 * <code>String.substring(beginIndex, endIndex)</code>.
263 * Unicode escape sequences are not translated.
264 *
265 * @param beginIndex the beginning index, inclusive.
266 * @param endIndex the ending index, exclusive.
267 * @throws IndexOutOfBounds if either offset is outside of the
268 * array bounds
269 */
270 public char[] getRawCharacters(int beginIndex, int endIndex) {
271 int length = endIndex - beginIndex;
272 char[] chars = new char[length];
273 System.arraycopy(buf, beginIndex, chars, 0, length);
274 return chars;
275 }
276 }