Mon, 24 Oct 2011 13:00:20 +0100
7096014: Javac tokens should retain state
Summary: Refactor javac tokens from enum constants to stateful instances (to keep track of position, comments, etc.)
Reviewed-by: jjg
1 /*
2 * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import com.sun.tools.javac.file.JavacFileManager;
29 import com.sun.tools.javac.parser.Tokens.Token;
30 import com.sun.tools.javac.util.*;
32 import java.nio.*;
34 import static com.sun.tools.javac.util.LayoutCharacters.*;
36 /** An extension to the base lexical analyzer that captures
37 * and processes the contents of doc comments. It does so by
38 * translating Unicode escape sequences and by stripping the
39 * leading whitespace and starts from each line of the comment.
40 *
41 * <p><b>This is NOT part of any supported API.
42 * If you write code that depends on this, you do so at your own risk.
43 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b>
45 */
46 public class JavadocTokenizer extends JavaTokenizer {
48 /** Create a scanner from the input buffer. buffer must implement
49 * array() and compact(), and remaining() must be less than limit().
50 */
51 protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
52 super(fac, buffer);
53 }
55 /** Create a scanner from the input array. The array must have at
56 * least a single character of extra space.
57 */
58 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
59 super(fac, input, inputLength);
60 }
62 /** The comment input buffer, index of next chacter to be read,
63 * index of one past last character in buffer.
64 */
65 private char[] buf;
66 private int bp;
67 private int buflen;
69 /** The current character.
70 */
71 private char ch;
73 /** The column number position of the current character.
74 */
75 private int col;
77 /** The buffer index of the last converted Unicode character
78 */
79 private int unicodeConversionBp = 0;
81 /**
82 * Buffer for doc comment.
83 */
84 private char[] docCommentBuffer = new char[1024];
86 /**
87 * Number of characters in doc comment buffer.
88 */
89 private int docCommentCount;
91 /**
92 * Translated and stripped contents of doc comment
93 */
94 private String docComment = null;
97 /** Unconditionally expand the comment buffer.
98 */
99 private void expandCommentBuffer() {
100 char[] newBuffer = new char[docCommentBuffer.length * 2];
101 System.arraycopy(docCommentBuffer, 0, newBuffer,
102 0, docCommentBuffer.length);
103 docCommentBuffer = newBuffer;
104 }
106 /** Convert an ASCII digit from its base (8, 10, or 16)
107 * to its value.
108 */
109 private int digit(int base) {
110 char c = ch;
111 int result = Character.digit(c, base);
112 if (result >= 0 && c > 0x7f) {
113 ch = "0123456789abcdef".charAt(result);
114 }
115 return result;
116 }
118 /** Convert Unicode escape; bp points to initial '\' character
119 * (Spec 3.3).
120 */
121 private void convertUnicode() {
122 if (ch == '\\' && unicodeConversionBp != bp) {
123 bp++; ch = buf[bp]; col++;
124 if (ch == 'u') {
125 do {
126 bp++; ch = buf[bp]; col++;
127 } while (ch == 'u');
128 int limit = bp + 3;
129 if (limit < buflen) {
130 int d = digit(16);
131 int code = d;
132 while (bp < limit && d >= 0) {
133 bp++; ch = buf[bp]; col++;
134 d = digit(16);
135 code = (code << 4) + d;
136 }
137 if (d >= 0) {
138 ch = (char)code;
139 unicodeConversionBp = bp;
140 return;
141 }
142 }
143 // "illegal.Unicode.esc", reported by base scanner
144 } else {
145 bp--;
146 ch = '\\';
147 col--;
148 }
149 }
150 }
153 /** Read next character.
154 */
155 private void scanChar() {
156 bp++;
157 ch = buf[bp];
158 switch (ch) {
159 case '\r': // return
160 col = 0;
161 break;
162 case '\n': // newline
163 if (bp == 0 || buf[bp-1] != '\r') {
164 col = 0;
165 }
166 break;
167 case '\t': // tab
168 col = (col / TabInc * TabInc) + TabInc;
169 break;
170 case '\\': // possible Unicode
171 col++;
172 convertUnicode();
173 break;
174 default:
175 col++;
176 break;
177 }
178 }
180 @Override
181 public Token readToken() {
182 docComment = null;
183 Token tk = super.readToken();
184 tk.docComment = docComment;
185 return tk;
186 }
188 /**
189 * Read next character in doc comment, skipping over double '\' characters.
190 * If a double '\' is skipped, put in the buffer and update buffer count.
191 */
192 private void scanDocCommentChar() {
193 scanChar();
194 if (ch == '\\') {
195 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
196 if (docCommentCount == docCommentBuffer.length)
197 expandCommentBuffer();
198 docCommentBuffer[docCommentCount++] = ch;
199 bp++; col++;
200 } else {
201 convertUnicode();
202 }
203 }
204 }
206 /**
207 * Process a doc comment and make the string content available.
208 * Strips leading whitespace and stars.
209 */
210 @SuppressWarnings("fallthrough")
211 protected void processComment(int pos, int endPos, CommentStyle style) {
212 if (style != CommentStyle.JAVADOC) {
213 return;
214 }
216 buf = reader.getRawCharacters(pos, endPos);
217 buflen = buf.length;
218 bp = 0;
219 col = 0;
221 docCommentCount = 0;
223 boolean firstLine = true;
225 // Skip over first slash
226 scanDocCommentChar();
227 // Skip over first star
228 scanDocCommentChar();
230 // consume any number of stars
231 while (bp < buflen && ch == '*') {
232 scanDocCommentChar();
233 }
234 // is the comment in the form /**/, /***/, /****/, etc. ?
235 if (bp < buflen && ch == '/') {
236 docComment = "";
237 return;
238 }
240 // skip a newline on the first line of the comment.
241 if (bp < buflen) {
242 if (ch == LF) {
243 scanDocCommentChar();
244 firstLine = false;
245 } else if (ch == CR) {
246 scanDocCommentChar();
247 if (ch == LF) {
248 scanDocCommentChar();
249 firstLine = false;
250 }
251 }
252 }
254 outerLoop:
256 // The outerLoop processes the doc comment, looping once
257 // for each line. For each line, it first strips off
258 // whitespace, then it consumes any stars, then it
259 // puts the rest of the line into our buffer.
260 while (bp < buflen) {
262 // The wsLoop consumes whitespace from the beginning
263 // of each line.
264 wsLoop:
266 while (bp < buflen) {
267 switch(ch) {
268 case ' ':
269 scanDocCommentChar();
270 break;
271 case '\t':
272 col = ((col - 1) / TabInc * TabInc) + TabInc;
273 scanDocCommentChar();
274 break;
275 case FF:
276 col = 0;
277 scanDocCommentChar();
278 break;
279 // Treat newline at beginning of line (blank line, no star)
280 // as comment text. Old Javadoc compatibility requires this.
281 /*---------------------------------*
282 case CR: // (Spec 3.4)
283 scanDocCommentChar();
284 if (ch == LF) {
285 col = 0;
286 scanDocCommentChar();
287 }
288 break;
289 case LF: // (Spec 3.4)
290 scanDocCommentChar();
291 break;
292 *---------------------------------*/
293 default:
294 // we've seen something that isn't whitespace;
295 // jump out.
296 break wsLoop;
297 }
298 }
300 // Are there stars here? If so, consume them all
301 // and check for the end of comment.
302 if (ch == '*') {
303 // skip all of the stars
304 do {
305 scanDocCommentChar();
306 } while (ch == '*');
308 // check for the closing slash.
309 if (ch == '/') {
310 // We're done with the doc comment
311 // scanChar() and breakout.
312 break outerLoop;
313 }
314 } else if (! firstLine) {
315 //The current line does not begin with a '*' so we will indent it.
316 for (int i = 1; i < col; i++) {
317 if (docCommentCount == docCommentBuffer.length)
318 expandCommentBuffer();
319 docCommentBuffer[docCommentCount++] = ' ';
320 }
321 }
323 // The textLoop processes the rest of the characters
324 // on the line, adding them to our buffer.
325 textLoop:
326 while (bp < buflen) {
327 switch (ch) {
328 case '*':
329 // Is this just a star? Or is this the
330 // end of a comment?
331 scanDocCommentChar();
332 if (ch == '/') {
333 // This is the end of the comment,
334 // set ch and return our buffer.
335 break outerLoop;
336 }
337 // This is just an ordinary star. Add it to
338 // the buffer.
339 if (docCommentCount == docCommentBuffer.length)
340 expandCommentBuffer();
341 docCommentBuffer[docCommentCount++] = '*';
342 break;
343 case ' ':
344 case '\t':
345 if (docCommentCount == docCommentBuffer.length)
346 expandCommentBuffer();
347 docCommentBuffer[docCommentCount++] = ch;
348 scanDocCommentChar();
349 break;
350 case FF:
351 scanDocCommentChar();
352 break textLoop; // treat as end of line
353 case CR: // (Spec 3.4)
354 scanDocCommentChar();
355 if (ch != LF) {
356 // Canonicalize CR-only line terminator to LF
357 if (docCommentCount == docCommentBuffer.length)
358 expandCommentBuffer();
359 docCommentBuffer[docCommentCount++] = (char)LF;
360 break textLoop;
361 }
362 /* fall through to LF case */
363 case LF: // (Spec 3.4)
364 // We've seen a newline. Add it to our
365 // buffer and break out of this loop,
366 // starting fresh on a new line.
367 if (docCommentCount == docCommentBuffer.length)
368 expandCommentBuffer();
369 docCommentBuffer[docCommentCount++] = ch;
370 scanDocCommentChar();
371 break textLoop;
372 default:
373 // Add the character to our buffer.
374 if (docCommentCount == docCommentBuffer.length)
375 expandCommentBuffer();
376 docCommentBuffer[docCommentCount++] = ch;
377 scanDocCommentChar();
378 }
379 } // end textLoop
380 firstLine = false;
381 } // end outerLoop
383 if (docCommentCount > 0) {
384 int i = docCommentCount - 1;
385 trailLoop:
386 while (i > -1) {
387 switch (docCommentBuffer[i]) {
388 case '*':
389 i--;
390 break;
391 default:
392 break trailLoop;
393 }
394 }
395 docCommentCount = i + 1;
397 // Store the text of the doc comment
398 docComment = new String(docCommentBuffer, 0 , docCommentCount);
399 } else {
400 docComment = "";
401 }
402 }
404 /** Build a map for translating between line numbers and
405 * positions in the input.
406 *
407 * @return a LineMap */
408 public Position.LineMap getLineMap() {
409 char[] buf = reader.getRawCharacters();
410 return Position.makeLineMap(buf, buf.length, true);
411 }
412 }