src/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java

changeset 1125
56830d5cb5bb
parent 1113
d346ab55031b
child 1281
067f51db3402
equal deleted inserted replaced
1124:9e2eb4bc49eb 1125:56830d5cb5bb
23 * questions. 23 * questions.
24 */ 24 */
25 25
26 package com.sun.tools.javac.parser; 26 package com.sun.tools.javac.parser;
27 27
28 import com.sun.tools.javac.file.JavacFileManager; 28 import com.sun.tools.javac.parser.Tokens.Comment;
29 import com.sun.tools.javac.parser.Tokens.Token; 29 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
30 import com.sun.tools.javac.util.*; 30 import com.sun.tools.javac.util.*;
31 31
32 import java.nio.*; 32 import java.nio.*;
33 33
34 import static com.sun.tools.javac.util.LayoutCharacters.*; 34 import static com.sun.tools.javac.util.LayoutCharacters.*;
57 */ 57 */
58 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) { 58 protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
59 super(fac, input, inputLength); 59 super(fac, input, inputLength);
60 } 60 }
61 61
62 /** The comment input buffer, index of next chacter to be read, 62 @Override
63 * index of one past last character in buffer. 63 protected Comment processComment(int pos, int endPos, CommentStyle style) {
64 char[] buf = reader.getRawCharacters(pos, endPos);
65 return new JavadocComment(new ColReader(fac, buf, buf.length), style);
66 }
67
68 /**
69 * This is a specialized version of UnicodeReader that keeps track of the
70 * column position within a given character stream (used for Javadoc processing).
64 */ 71 */
65 private char[] buf; 72 static class ColReader extends UnicodeReader {
66 private int bp; 73
67 private int buflen; 74 int col;
68 75
69 /** The current character. 76 ColReader(ScannerFactory fac, char[] input, int inputLength) {
70 */ 77 super(fac, input, inputLength);
71 private char ch; 78 }
72 79
73 /** The column number position of the current character. 80 @Override
74 */ 81 protected void convertUnicode() {
75 private int col; 82 if (ch == '\\' && unicodeConversionBp != bp) {
76 83 bp++; ch = buf[bp]; col++;
77 /** The buffer index of the last converted Unicode character 84 if (ch == 'u') {
78 */ 85 do {
79 private int unicodeConversionBp = 0; 86 bp++; ch = buf[bp]; col++;
80 87 } while (ch == 'u');
81 /** 88 int limit = bp + 3;
82 * Buffer for doc comment. 89 if (limit < buflen) {
83 */ 90 int d = digit(bp, 16);
84 private char[] docCommentBuffer = new char[1024]; 91 int code = d;
85 92 while (bp < limit && d >= 0) {
86 /** 93 bp++; ch = buf[bp]; col++;
87 * Number of characters in doc comment buffer. 94 d = digit(bp, 16);
88 */ 95 code = (code << 4) + d;
89 private int docCommentCount; 96 }
90 97 if (d >= 0) {
91 /** 98 ch = (char)code;
92 * Translated and stripped contents of doc comment 99 unicodeConversionBp = bp;
93 */ 100 return;
94 private String docComment = null; 101 }
95 102 }
96 103 // "illegal.Unicode.esc", reported by base scanner
97 /** Unconditionally expand the comment buffer. 104 } else {
98 */ 105 bp--;
99 private void expandCommentBuffer() { 106 ch = '\\';
100 char[] newBuffer = new char[docCommentBuffer.length * 2]; 107 col--;
101 System.arraycopy(docCommentBuffer, 0, newBuffer, 108 }
102 0, docCommentBuffer.length); 109 }
103 docCommentBuffer = newBuffer; 110 }
104 } 111
105 112 @Override
106 /** Convert an ASCII digit from its base (8, 10, or 16) 113 protected void scanCommentChar() {
107 * to its value. 114 scanChar();
108 */ 115 if (ch == '\\') {
109 private int digit(int base) { 116 if (peekChar() == '\\' && !isUnicode()) {
110 char c = ch; 117 putChar(ch, false);
111 int result = Character.digit(c, base); 118 bp++; col++;
112 if (result >= 0 && c > 0x7f) { 119 } else {
113 ch = "0123456789abcdef".charAt(result); 120 convertUnicode();
121 }
122 }
123 }
124
125 @Override
126 protected void scanChar() {
127 bp++;
128 ch = buf[bp];
129 switch (ch) {
130 case '\r': // return
131 col = 0;
132 break;
133 case '\n': // newline
134 if (bp == 0 || buf[bp-1] != '\r') {
135 col = 0;
136 }
137 break;
138 case '\t': // tab
139 col = (col / TabInc * TabInc) + TabInc;
140 break;
141 case '\\': // possible Unicode
142 col++;
143 convertUnicode();
144 break;
145 default:
146 col++;
147 break;
148 }
149 }
150 }
151
152 protected class JavadocComment extends JavaTokenizer.BasicComment<ColReader> {
153
154 /**
155 * Translated and stripped contents of doc comment
156 */
157 private String docComment = null;
158
159 JavadocComment(ColReader comment_reader, CommentStyle cs) {
160 super(comment_reader, cs);
114 } 161 }
115 return result; 162
116 } 163 public String getText() {
117 164 if (!scanned && cs == CommentStyle.JAVADOC) {
118 /** Convert Unicode escape; bp points to initial '\' character 165 scanDocComment();
119 * (Spec 3.3). 166 }
120 */ 167 return docComment;
121 private void convertUnicode() { 168 }
122 if (ch == '\\' && unicodeConversionBp != bp) { 169
123 bp++; ch = buf[bp]; col++; 170 @Override
124 if (ch == 'u') { 171 @SuppressWarnings("fallthrough")
125 do { 172 protected void scanDocComment() {
126 bp++; ch = buf[bp]; col++; 173 try {
127 } while (ch == 'u'); 174 boolean firstLine = true;
128 int limit = bp + 3; 175
129 if (limit < buflen) { 176 // Skip over first slash
130 int d = digit(16); 177 comment_reader.scanCommentChar();
131 int code = d; 178 // Skip over first star
132 while (bp < limit && d >= 0) { 179 comment_reader.scanCommentChar();
133 bp++; ch = buf[bp]; col++; 180
134 d = digit(16); 181 // consume any number of stars
135 code = (code << 4) + d; 182 while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
136 } 183 comment_reader.scanCommentChar();
137 if (d >= 0) { 184 }
138 ch = (char)code; 185 // is the comment in the form /**/, /***/, /****/, etc. ?
139 unicodeConversionBp = bp; 186 if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
140 return; 187 docComment = "";
141 } 188 return;
189 }
190
191 // skip a newline on the first line of the comment.
192 if (comment_reader.bp < comment_reader.buflen) {
193 if (comment_reader.ch == LF) {
194 comment_reader.scanCommentChar();
195 firstLine = false;
196 } else if (comment_reader.ch == CR) {
197 comment_reader.scanCommentChar();
198 if (comment_reader.ch == LF) {
199 comment_reader.scanCommentChar();
200 firstLine = false;
201 }
202 }
203 }
204
205 outerLoop:
206
207 // The outerLoop processes the doc comment, looping once
208 // for each line. For each line, it first strips off
209 // whitespace, then it consumes any stars, then it
210 // puts the rest of the line into our buffer.
211 while (comment_reader.bp < comment_reader.buflen) {
212
213 // The wsLoop consumes whitespace from the beginning
214 // of each line.
215 wsLoop:
216
217 while (comment_reader.bp < comment_reader.buflen) {
218 switch(comment_reader.ch) {
219 case ' ':
220 comment_reader.scanCommentChar();
221 break;
222 case '\t':
223 comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
224 comment_reader.scanCommentChar();
225 break;
226 case FF:
227 comment_reader.col = 0;
228 comment_reader.scanCommentChar();
229 break;
230 // Treat newline at beginning of line (blank line, no star)
231 // as comment text. Old Javadoc compatibility requires this.
232 /*---------------------------------*
233 case CR: // (Spec 3.4)
234 doc_reader.scanCommentChar();
235 if (ch == LF) {
236 col = 0;
237 doc_reader.scanCommentChar();
238 }
239 break;
240 case LF: // (Spec 3.4)
241 doc_reader.scanCommentChar();
242 break;
243 *---------------------------------*/
244 default:
245 // we've seen something that isn't whitespace;
246 // jump out.
247 break wsLoop;
248 }
249 }
250
251 // Are there stars here? If so, consume them all
252 // and check for the end of comment.
253 if (comment_reader.ch == '*') {
254 // skip all of the stars
255 do {
256 comment_reader.scanCommentChar();
257 } while (comment_reader.ch == '*');
258
259 // check for the closing slash.
260 if (comment_reader.ch == '/') {
261 // We're done with the doc comment
262 // scanChar() and breakout.
263 break outerLoop;
264 }
265 } else if (! firstLine) {
266 //The current line does not begin with a '*' so we will indent it.
267 for (int i = 1; i < comment_reader.col; i++) {
268 comment_reader.putChar(' ', false);
269 }
270 }
271 // The textLoop processes the rest of the characters
272 // on the line, adding them to our buffer.
273 textLoop:
274 while (comment_reader.bp < comment_reader.buflen) {
275 switch (comment_reader.ch) {
276 case '*':
277 // Is this just a star? Or is this the
278 // end of a comment?
279 comment_reader.scanCommentChar();
280 if (comment_reader.ch == '/') {
281 // This is the end of the comment,
282 // set ch and return our buffer.
283 break outerLoop;
284 }
285 // This is just an ordinary star. Add it to
286 // the buffer.
287 comment_reader.putChar('*', false);
288 break;
289 case ' ':
290 case '\t':
291 comment_reader.putChar(comment_reader.ch, false);
292 comment_reader.scanCommentChar();
293 break;
294 case FF:
295 comment_reader.scanCommentChar();
296 break textLoop; // treat as end of line
297 case CR: // (Spec 3.4)
298 comment_reader.scanCommentChar();
299 if (comment_reader.ch != LF) {
300 // Canonicalize CR-only line terminator to LF
301 comment_reader.putChar((char)LF, false);
302 break textLoop;
303 }
304 /* fall through to LF case */
305 case LF: // (Spec 3.4)
306 // We've seen a newline. Add it to our
307 // buffer and break out of this loop,
308 // starting fresh on a new line.
309 comment_reader.putChar(comment_reader.ch, false);
310 comment_reader.scanCommentChar();
311 break textLoop;
312 default:
313 // Add the character to our buffer.
314 comment_reader.putChar(comment_reader.ch, false);
315 comment_reader.scanCommentChar();
316 }
317 } // end textLoop
318 firstLine = false;
319 } // end outerLoop
320
321 if (comment_reader.sp > 0) {
322 int i = comment_reader.sp - 1;
323 trailLoop:
324 while (i > -1) {
325 switch (comment_reader.sbuf[i]) {
326 case '*':
327 i--;
328 break;
329 default:
330 break trailLoop;
331 }
332 }
333 comment_reader.sp = i + 1;
334
335 // Store the text of the doc comment
336 docComment = comment_reader.chars();
337 } else {
338 docComment = "";
142 } 339 }
143 // "illegal.Unicode.esc", reported by base scanner 340 } finally {
144 } else { 341 scanned = true;
145 bp--; 342 if (docComment != null &&
146 ch = '\\'; 343 docComment.matches("(?sm).*^\\s*@deprecated( |$).*")) {
147 col--; 344 deprecatedFlag = true;
148 }
149 }
150 }
151
152
153 /** Read next character.
154 */
155 private void scanChar() {
156 bp++;
157 ch = buf[bp];
158 switch (ch) {
159 case '\r': // return
160 col = 0;
161 break;
162 case '\n': // newline
163 if (bp == 0 || buf[bp-1] != '\r') {
164 col = 0;
165 }
166 break;
167 case '\t': // tab
168 col = (col / TabInc * TabInc) + TabInc;
169 break;
170 case '\\': // possible Unicode
171 col++;
172 convertUnicode();
173 break;
174 default:
175 col++;
176 break;
177 }
178 }
179
180 @Override
181 public Token readToken() {
182 docComment = null;
183 Token tk = super.readToken();
184 tk.docComment = docComment;
185 return tk;
186 }
187
188 /**
189 * Read next character in doc comment, skipping over double '\' characters.
190 * If a double '\' is skipped, put in the buffer and update buffer count.
191 */
192 private void scanDocCommentChar() {
193 scanChar();
194 if (ch == '\\') {
195 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
196 if (docCommentCount == docCommentBuffer.length)
197 expandCommentBuffer();
198 docCommentBuffer[docCommentCount++] = ch;
199 bp++; col++;
200 } else {
201 convertUnicode();
202 }
203 }
204 }
205
206 /**
207 * Process a doc comment and make the string content available.
208 * Strips leading whitespace and stars.
209 */
210 @SuppressWarnings("fallthrough")
211 protected void processComment(int pos, int endPos, CommentStyle style) {
212 if (style != CommentStyle.JAVADOC) {
213 return;
214 }
215
216 buf = reader.getRawCharacters(pos, endPos);
217 buflen = buf.length;
218 bp = 0;
219 col = 0;
220
221 docCommentCount = 0;
222
223 boolean firstLine = true;
224
225 // Skip over first slash
226 scanDocCommentChar();
227 // Skip over first star
228 scanDocCommentChar();
229
230 // consume any number of stars
231 while (bp < buflen && ch == '*') {
232 scanDocCommentChar();
233 }
234 // is the comment in the form /**/, /***/, /****/, etc. ?
235 if (bp < buflen && ch == '/') {
236 docComment = "";
237 return;
238 }
239
240 // skip a newline on the first line of the comment.
241 if (bp < buflen) {
242 if (ch == LF) {
243 scanDocCommentChar();
244 firstLine = false;
245 } else if (ch == CR) {
246 scanDocCommentChar();
247 if (ch == LF) {
248 scanDocCommentChar();
249 firstLine = false;
250 } 345 }
251 } 346 }
252 } 347 }
253 348 }
254 outerLoop: 349
255 350 @Override
256 // The outerLoop processes the doc comment, looping once
257 // for each line. For each line, it first strips off
258 // whitespace, then it consumes any stars, then it
259 // puts the rest of the line into our buffer.
260 while (bp < buflen) {
261
262 // The wsLoop consumes whitespace from the beginning
263 // of each line.
264 wsLoop:
265
266 while (bp < buflen) {
267 switch(ch) {
268 case ' ':
269 scanDocCommentChar();
270 break;
271 case '\t':
272 col = ((col - 1) / TabInc * TabInc) + TabInc;
273 scanDocCommentChar();
274 break;
275 case FF:
276 col = 0;
277 scanDocCommentChar();
278 break;
279 // Treat newline at beginning of line (blank line, no star)
280 // as comment text. Old Javadoc compatibility requires this.
281 /*---------------------------------*
282 case CR: // (Spec 3.4)
283 scanDocCommentChar();
284 if (ch == LF) {
285 col = 0;
286 scanDocCommentChar();
287 }
288 break;
289 case LF: // (Spec 3.4)
290 scanDocCommentChar();
291 break;
292 *---------------------------------*/
293 default:
294 // we've seen something that isn't whitespace;
295 // jump out.
296 break wsLoop;
297 }
298 }
299
300 // Are there stars here? If so, consume them all
301 // and check for the end of comment.
302 if (ch == '*') {
303 // skip all of the stars
304 do {
305 scanDocCommentChar();
306 } while (ch == '*');
307
308 // check for the closing slash.
309 if (ch == '/') {
310 // We're done with the doc comment
311 // scanChar() and breakout.
312 break outerLoop;
313 }
314 } else if (! firstLine) {
315 //The current line does not begin with a '*' so we will indent it.
316 for (int i = 1; i < col; i++) {
317 if (docCommentCount == docCommentBuffer.length)
318 expandCommentBuffer();
319 docCommentBuffer[docCommentCount++] = ' ';
320 }
321 }
322
323 // The textLoop processes the rest of the characters
324 // on the line, adding them to our buffer.
325 textLoop:
326 while (bp < buflen) {
327 switch (ch) {
328 case '*':
329 // Is this just a star? Or is this the
330 // end of a comment?
331 scanDocCommentChar();
332 if (ch == '/') {
333 // This is the end of the comment,
334 // set ch and return our buffer.
335 break outerLoop;
336 }
337 // This is just an ordinary star. Add it to
338 // the buffer.
339 if (docCommentCount == docCommentBuffer.length)
340 expandCommentBuffer();
341 docCommentBuffer[docCommentCount++] = '*';
342 break;
343 case ' ':
344 case '\t':
345 if (docCommentCount == docCommentBuffer.length)
346 expandCommentBuffer();
347 docCommentBuffer[docCommentCount++] = ch;
348 scanDocCommentChar();
349 break;
350 case FF:
351 scanDocCommentChar();
352 break textLoop; // treat as end of line
353 case CR: // (Spec 3.4)
354 scanDocCommentChar();
355 if (ch != LF) {
356 // Canonicalize CR-only line terminator to LF
357 if (docCommentCount == docCommentBuffer.length)
358 expandCommentBuffer();
359 docCommentBuffer[docCommentCount++] = (char)LF;
360 break textLoop;
361 }
362 /* fall through to LF case */
363 case LF: // (Spec 3.4)
364 // We've seen a newline. Add it to our
365 // buffer and break out of this loop,
366 // starting fresh on a new line.
367 if (docCommentCount == docCommentBuffer.length)
368 expandCommentBuffer();
369 docCommentBuffer[docCommentCount++] = ch;
370 scanDocCommentChar();
371 break textLoop;
372 default:
373 // Add the character to our buffer.
374 if (docCommentCount == docCommentBuffer.length)
375 expandCommentBuffer();
376 docCommentBuffer[docCommentCount++] = ch;
377 scanDocCommentChar();
378 }
379 } // end textLoop
380 firstLine = false;
381 } // end outerLoop
382
383 if (docCommentCount > 0) {
384 int i = docCommentCount - 1;
385 trailLoop:
386 while (i > -1) {
387 switch (docCommentBuffer[i]) {
388 case '*':
389 i--;
390 break;
391 default:
392 break trailLoop;
393 }
394 }
395 docCommentCount = i + 1;
396
397 // Store the text of the doc comment
398 docComment = new String(docCommentBuffer, 0 , docCommentCount);
399 } else {
400 docComment = "";
401 }
402 }
403
404 /** Build a map for translating between line numbers and
405 * positions in the input.
406 *
407 * @return a LineMap */
408 public Position.LineMap getLineMap() { 351 public Position.LineMap getLineMap() {
409 char[] buf = reader.getRawCharacters(); 352 char[] buf = reader.getRawCharacters();
410 return Position.makeLineMap(buf, buf.length, true); 353 return Position.makeLineMap(buf, buf.length, true);
411 } 354 }
412 } 355 }

mercurial