Wed, 10 Oct 2012 18:44:21 -0700
8000310: Clean up use of StringBuffer in langtools
Reviewed-by: bpatel
1 /*
2 * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import com.sun.tools.javac.parser.Tokens.Comment;
29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30 import com.sun.tools.javac.util.*;
32 import java.nio.*;
34 import static com.sun.tools.javac.util.LayoutCharacters.*;
36 /** An extension to the base lexical analyzer that captures
37 * and processes the contents of doc comments. It does so by
38 * translating Unicode escape sequences and by stripping the
39 * leading whitespace and starts from each line of the comment.
40 *
41 * <p><b>This is NOT part of any supported API.
42 * If you write code that depends on this, you do so at your own risk.
43 * This code and its internal interfaces are subject to change or
44 * deletion without notice.</b>
45 */
46 public class JavadocTokenizer extends JavaTokenizer {
48 /** Create a scanner from the input buffer. buffer must implement
49 * array() and compact(), and remaining() must be less than limit().
50 */
51 protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
52 super(fac, buffer);
53 }
55 /** Create a scanner from the input array. The array must have at
56 * least a single character of extra space.
57 */
58 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
59 super(fac, input, inputLength);
60 }
62 @Override
63 protected Comment processComment(int pos, int endPos, CommentStyle style) {
64 char[] buf = reader.getRawCharacters(pos, endPos);
65 return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
66 }
68 /**
69 * This is a specialized version of UnicodeReader that keeps track of the
70 * column position within a given character stream (used for Javadoc processing),
71 * and which builds a table for mapping positions in the comment string to
72 * positions in the source file.
73 */
74 static class DocReader extends UnicodeReader {
76 int col;
77 int startPos;
79 /**
80 * A buffer for building a table for mapping positions in {@link #sbuf}
81 * to positions in the source buffer.
82 *
83 * The array is organized as a series of pairs of integers: the first
84 * number in each pair specifies a position in the comment text,
85 * the second number in each pair specifies the corresponding position
86 * in the source buffer. The pairs are sorted in ascending order.
87 *
88 * Since the mapping function is generally continuous, with successive
89 * positions in the string corresponding to successive positions in the
90 * source buffer, the table only needs to record discontinuities in
91 * the mapping. The values of intermediate positions can be inferred.
92 *
93 * Discontinuities may occur in a number of places: when a newline
94 * is followed by whitespace and asterisks (which are ignored),
95 * when a tab is expanded into spaces, and when unicode escapes
96 * are used in the source buffer.
97 *
98 * Thus, to find the source position of any position, p, in the comment
99 * string, find the index, i, of the pair whose string offset
100 * ({@code pbuf[i] }) is closest to but not greater than p. Then,
101 * {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
102 */
103 int[] pbuf = new int[128];
105 /**
106 * The index of the next empty slot in the pbuf buffer.
107 */
108 int pp = 0;
110 DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
111 super(fac, input, inputLength);
112 this.startPos = startPos;
113 }
115 @Override
116 protected void convertUnicode() {
117 if (ch == '\\' && unicodeConversionBp != bp) {
118 bp++; ch = buf[bp]; col++;
119 if (ch == 'u') {
120 do {
121 bp++; ch = buf[bp]; col++;
122 } while (ch == 'u');
123 int limit = bp + 3;
124 if (limit < buflen) {
125 int d = digit(bp, 16);
126 int code = d;
127 while (bp < limit && d >= 0) {
128 bp++; ch = buf[bp]; col++;
129 d = digit(bp, 16);
130 code = (code << 4) + d;
131 }
132 if (d >= 0) {
133 ch = (char)code;
134 unicodeConversionBp = bp;
135 return;
136 }
137 }
138 // "illegal.Unicode.esc", reported by base scanner
139 } else {
140 bp--;
141 ch = '\\';
142 col--;
143 }
144 }
145 }
147 @Override
148 protected void scanCommentChar() {
149 scanChar();
150 if (ch == '\\') {
151 if (peekChar() == '\\' && !isUnicode()) {
152 putChar(ch, false);
153 bp++; col++;
154 } else {
155 convertUnicode();
156 }
157 }
158 }
160 @Override
161 protected void scanChar() {
162 bp++;
163 ch = buf[bp];
164 switch (ch) {
165 case '\r': // return
166 col = 0;
167 break;
168 case '\n': // newline
169 if (bp == 0 || buf[bp-1] != '\r') {
170 col = 0;
171 }
172 break;
173 case '\t': // tab
174 col = (col / TabInc * TabInc) + TabInc;
175 break;
176 case '\\': // possible Unicode
177 col++;
178 convertUnicode();
179 break;
180 default:
181 col++;
182 break;
183 }
184 }
186 @Override
187 public void putChar(char ch, boolean scan) {
188 // At this point, bp is the position of the current character in buf,
189 // and sp is the position in sbuf where this character will be put.
190 // Record a new entry in pbuf if pbuf is empty or if sp and its
191 // corresponding source position are not equidistant from the
192 // corresponding values in the latest entry in the pbuf array.
193 // (i.e. there is a discontinuity in the map function.)
194 if ((pp == 0)
195 || (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
196 if (pp + 1 >= pbuf.length) {
197 int[] new_pbuf = new int[pbuf.length * 2];
198 System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
199 pbuf = new_pbuf;
200 }
201 pbuf[pp] = sp;
202 pbuf[pp + 1] = startPos + bp;
203 pp += 2;
204 }
205 super.putChar(ch, scan);
206 }
207 }
209 protected class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
211 /**
212 * Translated and stripped contents of doc comment
213 */
214 private String docComment = null;
215 private int[] docPosns = null;
217 JavadocComment(DocReader reader, CommentStyle cs) {
218 super(reader, cs);
219 }
221 @Override
222 public String getText() {
223 if (!scanned && cs == CommentStyle.JAVADOC) {
224 scanDocComment();
225 }
226 return docComment;
227 }
229 @Override
230 public int getSourcePos(int pos) {
231 // Binary search to find the entry for which the string index is
232 // less than pos. Since docPosns is a list of pairs of integers
233 // we must make sure the index is always even.
234 // If we find an exact match for pos, the other item in the pair
235 // gives the source pos; otherwise, compute the source position
236 // relative to the best match found in the array.
237 if (pos < 0 || pos >= docComment.length())
238 throw new StringIndexOutOfBoundsException();
239 if (docPosns == null)
240 return -1;
241 int start = 0;
242 int end = docPosns.length;
243 while (start < end - 2) {
244 // find an even index midway between start and end
245 int index = ((start + end) / 4) * 2;
246 if (docPosns[index] < pos)
247 start = index;
248 else if (docPosns[index] == pos)
249 return docPosns[index + 1];
250 else
251 end = index;
252 }
253 return docPosns[start + 1] + (pos - docPosns[start]);
254 }
256 @Override
257 @SuppressWarnings("fallthrough")
258 protected void scanDocComment() {
259 try {
260 boolean firstLine = true;
262 // Skip over first slash
263 comment_reader.scanCommentChar();
264 // Skip over first star
265 comment_reader.scanCommentChar();
267 // consume any number of stars
268 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
269 comment_reader.scanCommentChar();
270 }
271 // is the comment in the form /**/, /***/, /****/, etc. ?
272 if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
273 docComment = "";
274 return;
275 }
277 // skip a newline on the first line of the comment.
278 if (comment_reader.bp < comment_reader.buflen) {
279 if (comment_reader.ch == LF) {
280 comment_reader.scanCommentChar();
281 firstLine = false;
282 } else if (comment_reader.ch == CR) {
283 comment_reader.scanCommentChar();
284 if (comment_reader.ch == LF) {
285 comment_reader.scanCommentChar();
286 firstLine = false;
287 }
288 }
289 }
291 outerLoop:
293 // The outerLoop processes the doc comment, looping once
294 // for each line. For each line, it first strips off
295 // whitespace, then it consumes any stars, then it
296 // puts the rest of the line into our buffer.
297 while (comment_reader.bp < comment_reader.buflen) {
298 int begin_bp = comment_reader.bp;
299 char begin_ch = comment_reader.ch;
300 // The wsLoop consumes whitespace from the beginning
301 // of each line.
302 wsLoop:
304 while (comment_reader.bp < comment_reader.buflen) {
305 switch(comment_reader.ch) {
306 case ' ':
307 comment_reader.scanCommentChar();
308 break;
309 case '\t':
310 comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
311 comment_reader.scanCommentChar();
312 break;
313 case FF:
314 comment_reader.col = 0;
315 comment_reader.scanCommentChar();
316 break;
317 // Treat newline at beginning of line (blank line, no star)
318 // as comment text. Old Javadoc compatibility requires this.
319 /*---------------------------------*
320 case CR: // (Spec 3.4)
321 doc_reader.scanCommentChar();
322 if (ch == LF) {
323 col = 0;
324 doc_reader.scanCommentChar();
325 }
326 break;
327 case LF: // (Spec 3.4)
328 doc_reader.scanCommentChar();
329 break;
330 *---------------------------------*/
331 default:
332 // we've seen something that isn't whitespace;
333 // jump out.
334 break wsLoop;
335 }
336 }
338 // Are there stars here? If so, consume them all
339 // and check for the end of comment.
340 if (comment_reader.ch == '*') {
341 // skip all of the stars
342 do {
343 comment_reader.scanCommentChar();
344 } while (comment_reader.ch == '*');
346 // check for the closing slash.
347 if (comment_reader.ch == '/') {
348 // We're done with the doc comment
349 // scanChar() and breakout.
350 break outerLoop;
351 }
352 } else if (! firstLine) {
353 // The current line does not begin with a '*' so we will
354 // treat it as comment
355 comment_reader.bp = begin_bp;
356 comment_reader.ch = begin_ch;
357 }
358 // The textLoop processes the rest of the characters
359 // on the line, adding them to our buffer.
360 textLoop:
361 while (comment_reader.bp < comment_reader.buflen) {
362 switch (comment_reader.ch) {
363 case '*':
364 // Is this just a star? Or is this the
365 // end of a comment?
366 comment_reader.scanCommentChar();
367 if (comment_reader.ch == '/') {
368 // This is the end of the comment,
369 // set ch and return our buffer.
370 break outerLoop;
371 }
372 // This is just an ordinary star. Add it to
373 // the buffer.
374 comment_reader.putChar('*', false);
375 break;
376 case ' ':
377 case '\t':
378 comment_reader.putChar(comment_reader.ch, false);
379 comment_reader.scanCommentChar();
380 break;
381 case FF:
382 comment_reader.scanCommentChar();
383 break textLoop; // treat as end of line
384 case CR: // (Spec 3.4)
385 comment_reader.scanCommentChar();
386 if (comment_reader.ch != LF) {
387 // Canonicalize CR-only line terminator to LF
388 comment_reader.putChar((char)LF, false);
389 break textLoop;
390 }
391 /* fall through to LF case */
392 case LF: // (Spec 3.4)
393 // We've seen a newline. Add it to our
394 // buffer and break out of this loop,
395 // starting fresh on a new line.
396 comment_reader.putChar(comment_reader.ch, false);
397 comment_reader.scanCommentChar();
398 break textLoop;
399 default:
400 // Add the character to our buffer.
401 comment_reader.putChar(comment_reader.ch, false);
402 comment_reader.scanCommentChar();
403 }
404 } // end textLoop
405 firstLine = false;
406 } // end outerLoop
408 if (comment_reader.sp > 0) {
409 int i = comment_reader.sp - 1;
410 trailLoop:
411 while (i > -1) {
412 switch (comment_reader.sbuf[i]) {
413 case '*':
414 i--;
415 break;
416 default:
417 break trailLoop;
418 }
419 }
420 comment_reader.sp = i + 1;
422 // Store the text of the doc comment
423 docComment = comment_reader.chars();
424 docPosns = new int[comment_reader.pp];
425 System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
426 } else {
427 docComment = "";
428 }
429 } finally {
430 scanned = true;
431 comment_reader = null;
432 if (docComment != null &&
433 docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) {
434 deprecatedFlag = true;
435 }
436 }
437 }
438 }
440 @Override
441 public Position.LineMap getLineMap() {
442 char[] buf = reader.getRawCharacters();
443 return Position.makeLineMap(buf, buf.length, true);
444 }
445 }