|
1 /* |
|
2 * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package com.sun.tools.javac.parser; |
|
27 |
|
28 import java.text.BreakIterator; |
|
29 import java.util.Arrays; |
|
30 import java.util.HashMap; |
|
31 import java.util.HashSet; |
|
32 import java.util.Locale; |
|
33 import java.util.Map; |
|
34 import java.util.Set; |
|
35 |
|
36 import com.sun.source.doctree.AttributeTree.ValueKind; |
|
37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind; |
|
38 import com.sun.tools.javac.parser.Tokens.Comment; |
|
39 import com.sun.tools.javac.parser.Tokens.TokenKind; |
|
40 import com.sun.tools.javac.tree.DCTree; |
|
41 import com.sun.tools.javac.tree.DCTree.DCAttribute; |
|
42 import com.sun.tools.javac.tree.DCTree.DCDocComment; |
|
43 import com.sun.tools.javac.tree.DCTree.DCEndElement; |
|
44 import com.sun.tools.javac.tree.DCTree.DCEndPosTree; |
|
45 import com.sun.tools.javac.tree.DCTree.DCErroneous; |
|
46 import com.sun.tools.javac.tree.DCTree.DCIdentifier; |
|
47 import com.sun.tools.javac.tree.DCTree.DCReference; |
|
48 import com.sun.tools.javac.tree.DCTree.DCStartElement; |
|
49 import com.sun.tools.javac.tree.DCTree.DCText; |
|
50 import com.sun.tools.javac.tree.DocTreeMaker; |
|
51 import com.sun.tools.javac.tree.JCTree; |
|
52 import com.sun.tools.javac.util.DiagnosticSource; |
|
53 import com.sun.tools.javac.util.List; |
|
54 import com.sun.tools.javac.util.ListBuffer; |
|
55 import com.sun.tools.javac.util.Log; |
|
56 import com.sun.tools.javac.util.Name; |
|
57 import com.sun.tools.javac.util.Names; |
|
58 import com.sun.tools.javac.util.Options; |
|
59 import com.sun.tools.javac.util.Position; |
|
60 import com.sun.tools.javac.util.StringUtils; |
|
61 import static com.sun.tools.javac.util.LayoutCharacters.*; |
|
62 |
|
63 /** |
|
64 * |
|
65 * <p><b>This is NOT part of any supported API. |
|
66 * If you write code that depends on this, you do so at your own risk. |
|
67 * This code and its internal interfaces are subject to change or |
|
68 * deletion without notice.</b> |
|
69 */ |
|
70 public class DocCommentParser { |
|
71 static class ParseException extends Exception { |
|
72 private static final long serialVersionUID = 0; |
|
73 ParseException(String key) { |
|
74 super(key); |
|
75 } |
|
76 } |
|
77 |
|
78 final ParserFactory fac; |
|
79 final DiagnosticSource diagSource; |
|
80 final Comment comment; |
|
81 final DocTreeMaker m; |
|
82 final Names names; |
|
83 |
|
84 BreakIterator sentenceBreaker; |
|
85 |
|
86 /** The input buffer, index of most recent character read, |
|
87 * index of one past last character in buffer. |
|
88 */ |
|
89 protected char[] buf; |
|
90 protected int bp; |
|
91 protected int buflen; |
|
92 |
|
93 /** The current character. |
|
94 */ |
|
95 protected char ch; |
|
96 |
|
97 int textStart = -1; |
|
98 int lastNonWhite = -1; |
|
99 boolean newline = true; |
|
100 |
|
101 Map<Name, TagParser> tagParsers; |
|
102 |
|
103 DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) { |
|
104 this.fac = fac; |
|
105 this.diagSource = diagSource; |
|
106 this.comment = comment; |
|
107 names = fac.names; |
|
108 m = fac.docTreeMaker; |
|
109 |
|
110 Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale; |
|
111 |
|
112 Options options = fac.options; |
|
113 boolean useBreakIterator = options.isSet("breakIterator"); |
|
114 if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) |
|
115 sentenceBreaker = BreakIterator.getSentenceInstance(locale); |
|
116 |
|
117 initTagParsers(); |
|
118 } |
|
119 |
|
120 DCDocComment parse() { |
|
121 String c = comment.getText(); |
|
122 buf = new char[c.length() + 1]; |
|
123 c.getChars(0, c.length(), buf, 0); |
|
124 buf[buf.length - 1] = EOI; |
|
125 buflen = buf.length - 1; |
|
126 bp = -1; |
|
127 nextChar(); |
|
128 |
|
129 List<DCTree> body = blockContent(); |
|
130 List<DCTree> tags = blockTags(); |
|
131 |
|
132 // split body into first sentence and body |
|
133 ListBuffer<DCTree> fs = new ListBuffer<DCTree>(); |
|
134 loop: |
|
135 for (; body.nonEmpty(); body = body.tail) { |
|
136 DCTree t = body.head; |
|
137 switch (t.getKind()) { |
|
138 case TEXT: |
|
139 String s = ((DCText) t).getBody(); |
|
140 int i = getSentenceBreak(s); |
|
141 if (i > 0) { |
|
142 int i0 = i; |
|
143 while (i0 > 0 && isWhitespace(s.charAt(i0 - 1))) |
|
144 i0--; |
|
145 fs.add(m.at(t.pos).Text(s.substring(0, i0))); |
|
146 int i1 = i; |
|
147 while (i1 < s.length() && isWhitespace(s.charAt(i1))) |
|
148 i1++; |
|
149 body = body.tail; |
|
150 if (i1 < s.length()) |
|
151 body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1))); |
|
152 break loop; |
|
153 } else if (body.tail.nonEmpty()) { |
|
154 if (isSentenceBreak(body.tail.head)) { |
|
155 int i0 = s.length() - 1; |
|
156 while (i0 > 0 && isWhitespace(s.charAt(i0))) |
|
157 i0--; |
|
158 fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1))); |
|
159 body = body.tail; |
|
160 break loop; |
|
161 } |
|
162 } |
|
163 break; |
|
164 |
|
165 case START_ELEMENT: |
|
166 case END_ELEMENT: |
|
167 if (isSentenceBreak(t)) |
|
168 break loop; |
|
169 break; |
|
170 } |
|
171 fs.add(t); |
|
172 } |
|
173 |
|
174 @SuppressWarnings("unchecked") |
|
175 DCTree first = getFirst(fs.toList(), body, tags); |
|
176 int pos = (first == null) ? Position.NOPOS : first.pos; |
|
177 |
|
178 DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags); |
|
179 return dc; |
|
180 } |
|
181 |
|
182 void nextChar() { |
|
183 ch = buf[bp < buflen ? ++bp : buflen]; |
|
184 switch (ch) { |
|
185 case '\f': case '\n': case '\r': |
|
186 newline = true; |
|
187 } |
|
188 } |
|
189 |
|
190 /** |
|
191 * Read block content, consisting of text, html and inline tags. |
|
192 * Terminated by the end of input, or the beginning of the next block tag: |
|
193 * i.e. @ as the first non-whitespace character on a line. |
|
194 */ |
|
195 @SuppressWarnings("fallthrough") |
|
196 protected List<DCTree> blockContent() { |
|
197 ListBuffer<DCTree> trees = new ListBuffer<DCTree>(); |
|
198 textStart = -1; |
|
199 |
|
200 loop: |
|
201 while (bp < buflen) { |
|
202 switch (ch) { |
|
203 case '\n': case '\r': case '\f': |
|
204 newline = true; |
|
205 // fallthrough |
|
206 |
|
207 case ' ': case '\t': |
|
208 nextChar(); |
|
209 break; |
|
210 |
|
211 case '&': |
|
212 entity(trees); |
|
213 break; |
|
214 |
|
215 case '<': |
|
216 newline = false; |
|
217 addPendingText(trees, bp - 1); |
|
218 trees.add(html()); |
|
219 if (textStart == -1) { |
|
220 textStart = bp; |
|
221 lastNonWhite = -1; |
|
222 } |
|
223 break; |
|
224 |
|
225 case '>': |
|
226 newline = false; |
|
227 addPendingText(trees, bp - 1); |
|
228 trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt")); |
|
229 nextChar(); |
|
230 if (textStart == -1) { |
|
231 textStart = bp; |
|
232 lastNonWhite = -1; |
|
233 } |
|
234 break; |
|
235 |
|
236 case '{': |
|
237 inlineTag(trees); |
|
238 break; |
|
239 |
|
240 case '@': |
|
241 if (newline) { |
|
242 addPendingText(trees, lastNonWhite); |
|
243 break loop; |
|
244 } |
|
245 // fallthrough |
|
246 |
|
247 default: |
|
248 newline = false; |
|
249 if (textStart == -1) |
|
250 textStart = bp; |
|
251 lastNonWhite = bp; |
|
252 nextChar(); |
|
253 } |
|
254 } |
|
255 |
|
256 if (lastNonWhite != -1) |
|
257 addPendingText(trees, lastNonWhite); |
|
258 |
|
259 return trees.toList(); |
|
260 } |
|
261 |
|
262 /** |
|
263 * Read a series of block tags, including their content. |
|
264 * Standard tags parse their content appropriately. |
|
265 * Non-standard tags are represented by {@link UnknownBlockTag}. |
|
266 */ |
|
267 protected List<DCTree> blockTags() { |
|
268 ListBuffer<DCTree> tags = new ListBuffer<DCTree>(); |
|
269 while (ch == '@') |
|
270 tags.add(blockTag()); |
|
271 return tags.toList(); |
|
272 } |
|
273 |
|
274 /** |
|
275 * Read a single block tag, including its content. |
|
276 * Standard tags parse their content appropriately. |
|
277 * Non-standard tags are represented by {@link UnknownBlockTag}. |
|
278 */ |
|
279 protected DCTree blockTag() { |
|
280 int p = bp; |
|
281 try { |
|
282 nextChar(); |
|
283 if (isIdentifierStart(ch)) { |
|
284 Name name = readTagName(); |
|
285 TagParser tp = tagParsers.get(name); |
|
286 if (tp == null) { |
|
287 List<DCTree> content = blockContent(); |
|
288 return m.at(p).UnknownBlockTag(name, content); |
|
289 } else { |
|
290 switch (tp.getKind()) { |
|
291 case BLOCK: |
|
292 return tp.parse(p); |
|
293 case INLINE: |
|
294 return erroneous("dc.bad.inline.tag", p); |
|
295 } |
|
296 } |
|
297 } |
|
298 blockContent(); |
|
299 |
|
300 return erroneous("dc.no.tag.name", p); |
|
301 } catch (ParseException e) { |
|
302 blockContent(); |
|
303 return erroneous(e.getMessage(), p); |
|
304 } |
|
305 } |
|
306 |
|
307 protected void inlineTag(ListBuffer<DCTree> list) { |
|
308 newline = false; |
|
309 nextChar(); |
|
310 if (ch == '@') { |
|
311 addPendingText(list, bp - 2); |
|
312 list.add(inlineTag()); |
|
313 textStart = bp; |
|
314 lastNonWhite = -1; |
|
315 } else { |
|
316 if (textStart == -1) |
|
317 textStart = bp - 1; |
|
318 lastNonWhite = bp; |
|
319 } |
|
320 } |
|
321 |
|
322 /** |
|
323 * Read a single inline tag, including its content. |
|
324 * Standard tags parse their content appropriately. |
|
325 * Non-standard tags are represented by {@link UnknownBlockTag}. |
|
326 * Malformed tags may be returned as {@link Erroneous}. |
|
327 */ |
|
328 protected DCTree inlineTag() { |
|
329 int p = bp - 1; |
|
330 try { |
|
331 nextChar(); |
|
332 if (isIdentifierStart(ch)) { |
|
333 Name name = readTagName(); |
|
334 skipWhitespace(); |
|
335 |
|
336 TagParser tp = tagParsers.get(name); |
|
337 if (tp == null) { |
|
338 DCTree text = inlineText(); |
|
339 if (text != null) { |
|
340 nextChar(); |
|
341 return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp); |
|
342 } |
|
343 } else if (tp.getKind() == TagParser.Kind.INLINE) { |
|
344 DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p); |
|
345 if (tree != null) { |
|
346 return tree.setEndPos(bp); |
|
347 } |
|
348 } else { |
|
349 inlineText(); // skip content |
|
350 nextChar(); |
|
351 } |
|
352 } |
|
353 return erroneous("dc.no.tag.name", p); |
|
354 } catch (ParseException e) { |
|
355 return erroneous(e.getMessage(), p); |
|
356 } |
|
357 } |
|
358 |
|
359 /** |
|
360 * Read plain text content of an inline tag. |
|
361 * Matching pairs of { } are skipped; the text is terminated by the first |
|
362 * unmatched }. It is an error if the beginning of the next tag is detected. |
|
363 */ |
|
364 protected DCTree inlineText() throws ParseException { |
|
365 skipWhitespace(); |
|
366 int pos = bp; |
|
367 int depth = 1; |
|
368 |
|
369 loop: |
|
370 while (bp < buflen) { |
|
371 switch (ch) { |
|
372 case '\n': case '\r': case '\f': |
|
373 newline = true; |
|
374 break; |
|
375 |
|
376 case ' ': case '\t': |
|
377 break; |
|
378 |
|
379 case '{': |
|
380 newline = false; |
|
381 lastNonWhite = bp; |
|
382 depth++; |
|
383 break; |
|
384 |
|
385 case '}': |
|
386 if (--depth == 0) { |
|
387 return m.at(pos).Text(newString(pos, bp)); |
|
388 } |
|
389 newline = false; |
|
390 lastNonWhite = bp; |
|
391 break; |
|
392 |
|
393 case '@': |
|
394 if (newline) |
|
395 break loop; |
|
396 newline = false; |
|
397 lastNonWhite = bp; |
|
398 break; |
|
399 |
|
400 default: |
|
401 newline = false; |
|
402 lastNonWhite = bp; |
|
403 break; |
|
404 } |
|
405 nextChar(); |
|
406 } |
|
407 throw new ParseException("dc.unterminated.inline.tag"); |
|
408 } |
|
409 |
|
410 /** |
|
411 * Read Java class name, possibly followed by member |
|
412 * Matching pairs of < > are skipped. The text is terminated by the first |
|
413 * unmatched }. It is an error if the beginning of the next tag is detected. |
|
414 */ |
|
415 // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE |
|
416 // TODO: improve quality of parse to forbid bad constructions. |
|
417 @SuppressWarnings("fallthrough") |
|
418 protected DCReference reference(boolean allowMember) throws ParseException { |
|
419 int pos = bp; |
|
420 int depth = 0; |
|
421 |
|
422 // scan to find the end of the signature, by looking for the first |
|
423 // whitespace not enclosed in () or <>, or the end of the tag |
|
424 loop: |
|
425 while (bp < buflen) { |
|
426 switch (ch) { |
|
427 case '\n': case '\r': case '\f': |
|
428 newline = true; |
|
429 // fallthrough |
|
430 |
|
431 case ' ': case '\t': |
|
432 if (depth == 0) |
|
433 break loop; |
|
434 break; |
|
435 |
|
436 case '(': |
|
437 case '<': |
|
438 newline = false; |
|
439 depth++; |
|
440 break; |
|
441 |
|
442 case ')': |
|
443 case '>': |
|
444 newline = false; |
|
445 --depth; |
|
446 break; |
|
447 |
|
448 case '}': |
|
449 if (bp == pos) |
|
450 return null; |
|
451 newline = false; |
|
452 break loop; |
|
453 |
|
454 case '@': |
|
455 if (newline) |
|
456 break loop; |
|
457 // fallthrough |
|
458 |
|
459 default: |
|
460 newline = false; |
|
461 |
|
462 } |
|
463 nextChar(); |
|
464 } |
|
465 |
|
466 if (depth != 0) |
|
467 throw new ParseException("dc.unterminated.signature"); |
|
468 |
|
469 String sig = newString(pos, bp); |
|
470 |
|
471 // Break sig apart into qualifiedExpr member paramTypes. |
|
472 JCTree qualExpr; |
|
473 Name member; |
|
474 List<JCTree> paramTypes; |
|
475 |
|
476 Log.DeferredDiagnosticHandler deferredDiagnosticHandler |
|
477 = new Log.DeferredDiagnosticHandler(fac.log); |
|
478 |
|
479 try { |
|
480 int hash = sig.indexOf("#"); |
|
481 int lparen = sig.indexOf("(", hash + 1); |
|
482 if (hash == -1) { |
|
483 if (lparen == -1) { |
|
484 qualExpr = parseType(sig); |
|
485 member = null; |
|
486 } else { |
|
487 qualExpr = null; |
|
488 member = parseMember(sig.substring(0, lparen)); |
|
489 } |
|
490 } else { |
|
491 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash)); |
|
492 if (lparen == -1) |
|
493 member = parseMember(sig.substring(hash + 1)); |
|
494 else |
|
495 member = parseMember(sig.substring(hash + 1, lparen)); |
|
496 } |
|
497 |
|
498 if (lparen < 0) { |
|
499 paramTypes = null; |
|
500 } else { |
|
501 int rparen = sig.indexOf(")", lparen); |
|
502 if (rparen != sig.length() - 1) |
|
503 throw new ParseException("dc.ref.bad.parens"); |
|
504 paramTypes = parseParams(sig.substring(lparen + 1, rparen)); |
|
505 } |
|
506 |
|
507 if (!deferredDiagnosticHandler.getDiagnostics().isEmpty()) |
|
508 throw new ParseException("dc.ref.syntax.error"); |
|
509 |
|
510 } finally { |
|
511 fac.log.popDiagnosticHandler(deferredDiagnosticHandler); |
|
512 } |
|
513 |
|
514 return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp); |
|
515 } |
|
516 |
|
517 JCTree parseType(String s) throws ParseException { |
|
518 JavacParser p = fac.newParser(s, false, false, false); |
|
519 JCTree tree = p.parseType(); |
|
520 if (p.token().kind != TokenKind.EOF) |
|
521 throw new ParseException("dc.ref.unexpected.input"); |
|
522 return tree; |
|
523 } |
|
524 |
|
525 Name parseMember(String s) throws ParseException { |
|
526 JavacParser p = fac.newParser(s, false, false, false); |
|
527 Name name = p.ident(); |
|
528 if (p.token().kind != TokenKind.EOF) |
|
529 throw new ParseException("dc.ref.unexpected.input"); |
|
530 return name; |
|
531 } |
|
532 |
|
533 List<JCTree> parseParams(String s) throws ParseException { |
|
534 if (s.trim().isEmpty()) |
|
535 return List.nil(); |
|
536 |
|
537 JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false); |
|
538 ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>(); |
|
539 paramTypes.add(p.parseType()); |
|
540 |
|
541 if (p.token().kind == TokenKind.IDENTIFIER) |
|
542 p.nextToken(); |
|
543 |
|
544 while (p.token().kind == TokenKind.COMMA) { |
|
545 p.nextToken(); |
|
546 paramTypes.add(p.parseType()); |
|
547 |
|
548 if (p.token().kind == TokenKind.IDENTIFIER) |
|
549 p.nextToken(); |
|
550 } |
|
551 |
|
552 if (p.token().kind != TokenKind.EOF) |
|
553 throw new ParseException("dc.ref.unexpected.input"); |
|
554 |
|
555 return paramTypes.toList(); |
|
556 } |
|
557 |
|
558 /** |
|
559 * Read Java identifier |
|
560 * Matching pairs of { } are skipped; the text is terminated by the first |
|
561 * unmatched }. It is an error if the beginning of the next tag is detected. |
|
562 */ |
|
563 @SuppressWarnings("fallthrough") |
|
564 protected DCIdentifier identifier() throws ParseException { |
|
565 skipWhitespace(); |
|
566 int pos = bp; |
|
567 |
|
568 if (isJavaIdentifierStart(ch)) { |
|
569 Name name = readJavaIdentifier(); |
|
570 return m.at(pos).Identifier(name); |
|
571 } |
|
572 |
|
573 throw new ParseException("dc.identifier.expected"); |
|
574 } |
|
575 |
|
576 /** |
|
577 * Read a quoted string. |
|
578 * It is an error if the beginning of the next tag is detected. |
|
579 */ |
|
580 @SuppressWarnings("fallthrough") |
|
581 protected DCText quotedString() { |
|
582 int pos = bp; |
|
583 nextChar(); |
|
584 |
|
585 loop: |
|
586 while (bp < buflen) { |
|
587 switch (ch) { |
|
588 case '\n': case '\r': case '\f': |
|
589 newline = true; |
|
590 break; |
|
591 |
|
592 case ' ': case '\t': |
|
593 break; |
|
594 |
|
595 case '"': |
|
596 nextChar(); |
|
597 // trim trailing white-space? |
|
598 return m.at(pos).Text(newString(pos, bp)); |
|
599 |
|
600 case '@': |
|
601 if (newline) |
|
602 break loop; |
|
603 |
|
604 } |
|
605 nextChar(); |
|
606 } |
|
607 return null; |
|
608 } |
|
609 |
|
610 /** |
|
611 * Read general text content of an inline tag, including HTML entities and elements. |
|
612 * Matching pairs of { } are skipped; the text is terminated by the first |
|
613 * unmatched }. It is an error if the beginning of the next tag is detected. |
|
614 */ |
|
615 @SuppressWarnings("fallthrough") |
|
616 protected List<DCTree> inlineContent() { |
|
617 ListBuffer<DCTree> trees = new ListBuffer<DCTree>(); |
|
618 |
|
619 skipWhitespace(); |
|
620 int pos = bp; |
|
621 int depth = 1; |
|
622 textStart = -1; |
|
623 |
|
624 loop: |
|
625 while (bp < buflen) { |
|
626 |
|
627 switch (ch) { |
|
628 case '\n': case '\r': case '\f': |
|
629 newline = true; |
|
630 // fall through |
|
631 |
|
632 case ' ': case '\t': |
|
633 nextChar(); |
|
634 break; |
|
635 |
|
636 case '&': |
|
637 entity(trees); |
|
638 break; |
|
639 |
|
640 case '<': |
|
641 newline = false; |
|
642 addPendingText(trees, bp - 1); |
|
643 trees.add(html()); |
|
644 break; |
|
645 |
|
646 case '{': |
|
647 newline = false; |
|
648 depth++; |
|
649 nextChar(); |
|
650 break; |
|
651 |
|
652 case '}': |
|
653 newline = false; |
|
654 if (--depth == 0) { |
|
655 addPendingText(trees, bp - 1); |
|
656 nextChar(); |
|
657 return trees.toList(); |
|
658 } |
|
659 nextChar(); |
|
660 break; |
|
661 |
|
662 case '@': |
|
663 if (newline) |
|
664 break loop; |
|
665 // fallthrough |
|
666 |
|
667 default: |
|
668 if (textStart == -1) |
|
669 textStart = bp; |
|
670 nextChar(); |
|
671 break; |
|
672 } |
|
673 } |
|
674 |
|
675 return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos)); |
|
676 } |
|
677 |
|
678 protected void entity(ListBuffer<DCTree> list) { |
|
679 newline = false; |
|
680 addPendingText(list, bp - 1); |
|
681 list.add(entity()); |
|
682 if (textStart == -1) { |
|
683 textStart = bp; |
|
684 lastNonWhite = -1; |
|
685 } |
|
686 } |
|
687 |
|
688 /** |
|
689 * Read an HTML entity. |
|
690 * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; } |
|
691 */ |
|
692 protected DCTree entity() { |
|
693 int p = bp; |
|
694 nextChar(); |
|
695 Name name = null; |
|
696 boolean checkSemi = false; |
|
697 if (ch == '#') { |
|
698 int namep = bp; |
|
699 nextChar(); |
|
700 if (isDecimalDigit(ch)) { |
|
701 nextChar(); |
|
702 while (isDecimalDigit(ch)) |
|
703 nextChar(); |
|
704 name = names.fromChars(buf, namep, bp - namep); |
|
705 } else if (ch == 'x' || ch == 'X') { |
|
706 nextChar(); |
|
707 if (isHexDigit(ch)) { |
|
708 nextChar(); |
|
709 while (isHexDigit(ch)) |
|
710 nextChar(); |
|
711 name = names.fromChars(buf, namep, bp - namep); |
|
712 } |
|
713 } |
|
714 } else if (isIdentifierStart(ch)) { |
|
715 name = readIdentifier(); |
|
716 } |
|
717 |
|
718 if (name == null) |
|
719 return erroneous("dc.bad.entity", p); |
|
720 else { |
|
721 if (ch != ';') |
|
722 return erroneous("dc.missing.semicolon", p); |
|
723 nextChar(); |
|
724 return m.at(p).Entity(name); |
|
725 } |
|
726 } |
|
727 |
|
728 /** |
|
729 * Read the start or end of an HTML tag, or an HTML comment |
|
730 * {@literal <identifier attrs> } or {@literal </identifier> } |
|
731 */ |
|
732 protected DCTree html() { |
|
733 int p = bp; |
|
734 nextChar(); |
|
735 if (isIdentifierStart(ch)) { |
|
736 Name name = readIdentifier(); |
|
737 List<DCTree> attrs = htmlAttrs(); |
|
738 if (attrs != null) { |
|
739 boolean selfClosing = false; |
|
740 if (ch == '/') { |
|
741 nextChar(); |
|
742 selfClosing = true; |
|
743 } |
|
744 if (ch == '>') { |
|
745 nextChar(); |
|
746 return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp); |
|
747 } |
|
748 } |
|
749 } else if (ch == '/') { |
|
750 nextChar(); |
|
751 if (isIdentifierStart(ch)) { |
|
752 Name name = readIdentifier(); |
|
753 skipWhitespace(); |
|
754 if (ch == '>') { |
|
755 nextChar(); |
|
756 return m.at(p).EndElement(name); |
|
757 } |
|
758 } |
|
759 } else if (ch == '!') { |
|
760 nextChar(); |
|
761 if (ch == '-') { |
|
762 nextChar(); |
|
763 if (ch == '-') { |
|
764 nextChar(); |
|
765 while (bp < buflen) { |
|
766 int dash = 0; |
|
767 while (ch == '-') { |
|
768 dash++; |
|
769 nextChar(); |
|
770 } |
|
771 // strictly speaking, a comment should not contain "--" |
|
772 // so dash > 2 is an error, dash == 2 implies ch == '>' |
|
773 if (dash >= 2 && ch == '>') { |
|
774 nextChar(); |
|
775 return m.at(p).Comment(newString(p, bp)); |
|
776 } |
|
777 |
|
778 nextChar(); |
|
779 } |
|
780 } |
|
781 } |
|
782 } |
|
783 |
|
784 bp = p + 1; |
|
785 ch = buf[bp]; |
|
786 return erroneous("dc.malformed.html", p); |
|
787 } |
|
788 |
|
789 /** |
|
790 * Read a series of HTML attributes, terminated by {@literal > }. |
|
791 * Each attribute is of the form {@literal identifier[=value] }. |
|
792 * "value" may be unquoted, single-quoted, or double-quoted. |
|
793 */ |
|
794 protected List<DCTree> htmlAttrs() { |
|
795 ListBuffer<DCTree> attrs = new ListBuffer<DCTree>(); |
|
796 skipWhitespace(); |
|
797 |
|
798 loop: |
|
799 while (isIdentifierStart(ch)) { |
|
800 int namePos = bp; |
|
801 Name name = readIdentifier(); |
|
802 skipWhitespace(); |
|
803 List<DCTree> value = null; |
|
804 ValueKind vkind = ValueKind.EMPTY; |
|
805 if (ch == '=') { |
|
806 ListBuffer<DCTree> v = new ListBuffer<DCTree>(); |
|
807 nextChar(); |
|
808 skipWhitespace(); |
|
809 if (ch == '\'' || ch == '"') { |
|
810 vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE; |
|
811 char quote = ch; |
|
812 nextChar(); |
|
813 textStart = bp; |
|
814 while (bp < buflen && ch != quote) { |
|
815 if (newline && ch == '@') { |
|
816 attrs.add(erroneous("dc.unterminated.string", namePos)); |
|
817 // No point trying to read more. |
|
818 // In fact, all attrs get discarded by the caller |
|
819 // and superseded by a malformed.html node because |
|
820 // the html tag itself is not terminated correctly. |
|
821 break loop; |
|
822 } |
|
823 attrValueChar(v); |
|
824 } |
|
825 addPendingText(v, bp - 1); |
|
826 nextChar(); |
|
827 } else { |
|
828 vkind = ValueKind.UNQUOTED; |
|
829 textStart = bp; |
|
830 while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) { |
|
831 attrValueChar(v); |
|
832 } |
|
833 addPendingText(v, bp - 1); |
|
834 } |
|
835 skipWhitespace(); |
|
836 value = v.toList(); |
|
837 } |
|
838 DCAttribute attr = m.at(namePos).Attribute(name, vkind, value); |
|
839 attrs.add(attr); |
|
840 } |
|
841 |
|
842 return attrs.toList(); |
|
843 } |
|
844 |
|
845 protected void attrValueChar(ListBuffer<DCTree> list) { |
|
846 switch (ch) { |
|
847 case '&': |
|
848 entity(list); |
|
849 break; |
|
850 |
|
851 case '{': |
|
852 inlineTag(list); |
|
853 break; |
|
854 |
|
855 default: |
|
856 nextChar(); |
|
857 } |
|
858 } |
|
859 |
|
860 protected void addPendingText(ListBuffer<DCTree> list, int textEnd) { |
|
861 if (textStart != -1) { |
|
862 if (textStart <= textEnd) { |
|
863 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1))); |
|
864 } |
|
865 textStart = -1; |
|
866 } |
|
867 } |
|
868 |
|
869 protected DCErroneous erroneous(String code, int pos) { |
|
870 int i = bp - 1; |
|
871 loop: |
|
872 while (i > pos) { |
|
873 switch (buf[i]) { |
|
874 case '\f': case '\n': case '\r': |
|
875 newline = true; |
|
876 break; |
|
877 case '\t': case ' ': |
|
878 break; |
|
879 default: |
|
880 break loop; |
|
881 } |
|
882 i--; |
|
883 } |
|
884 textStart = -1; |
|
885 return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code); |
|
886 } |
|
887 |
|
888 @SuppressWarnings("unchecked") |
|
889 <T> T getFirst(List<T>... lists) { |
|
890 for (List<T> list: lists) { |
|
891 if (list.nonEmpty()) |
|
892 return list.head; |
|
893 } |
|
894 return null; |
|
895 } |
|
896 |
|
897 protected boolean isIdentifierStart(char ch) { |
|
898 return Character.isUnicodeIdentifierStart(ch); |
|
899 } |
|
900 |
|
901 protected Name readIdentifier() { |
|
902 int start = bp; |
|
903 nextChar(); |
|
904 while (bp < buflen && Character.isUnicodeIdentifierPart(ch)) |
|
905 nextChar(); |
|
906 return names.fromChars(buf, start, bp - start); |
|
907 } |
|
908 |
|
909 protected Name readTagName() { |
|
910 int start = bp; |
|
911 nextChar(); |
|
912 while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.')) |
|
913 nextChar(); |
|
914 return names.fromChars(buf, start, bp - start); |
|
915 } |
|
916 |
|
917 protected boolean isJavaIdentifierStart(char ch) { |
|
918 return Character.isJavaIdentifierStart(ch); |
|
919 } |
|
920 |
|
921 protected Name readJavaIdentifier() { |
|
922 int start = bp; |
|
923 nextChar(); |
|
924 while (bp < buflen && Character.isJavaIdentifierPart(ch)) |
|
925 nextChar(); |
|
926 return names.fromChars(buf, start, bp - start); |
|
927 } |
|
928 |
|
929 protected boolean isDecimalDigit(char ch) { |
|
930 return ('0' <= ch && ch <= '9'); |
|
931 } |
|
932 |
|
933 protected boolean isHexDigit(char ch) { |
|
934 return ('0' <= ch && ch <= '9') |
|
935 || ('a' <= ch && ch <= 'f') |
|
936 || ('A' <= ch && ch <= 'F'); |
|
937 } |
|
938 |
|
939 protected boolean isUnquotedAttrValueTerminator(char ch) { |
|
940 switch (ch) { |
|
941 case '\f': case '\n': case '\r': case '\t': |
|
942 case ' ': |
|
943 case '"': case '\'': case '`': |
|
944 case '=': case '<': case '>': |
|
945 return true; |
|
946 default: |
|
947 return false; |
|
948 } |
|
949 } |
|
950 |
|
951 protected boolean isWhitespace(char ch) { |
|
952 return Character.isWhitespace(ch); |
|
953 } |
|
954 |
|
955 protected void skipWhitespace() { |
|
956 while (isWhitespace(ch)) |
|
957 nextChar(); |
|
958 } |
|
959 |
|
960 protected int getSentenceBreak(String s) { |
|
961 if (sentenceBreaker != null) { |
|
962 sentenceBreaker.setText(s); |
|
963 int i = sentenceBreaker.next(); |
|
964 return (i == s.length()) ? -1 : i; |
|
965 } |
|
966 |
|
967 // scan for period followed by whitespace |
|
968 boolean period = false; |
|
969 for (int i = 0; i < s.length(); i++) { |
|
970 switch (s.charAt(i)) { |
|
971 case '.': |
|
972 period = true; |
|
973 break; |
|
974 |
|
975 case ' ': |
|
976 case '\f': |
|
977 case '\n': |
|
978 case '\r': |
|
979 case '\t': |
|
980 if (period) |
|
981 return i; |
|
982 break; |
|
983 |
|
984 default: |
|
985 period = false; |
|
986 break; |
|
987 } |
|
988 } |
|
989 return -1; |
|
990 } |
|
991 |
|
992 |
|
993 Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList( |
|
994 "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre")); |
|
995 |
|
996 protected boolean isSentenceBreak(Name n) { |
|
997 return htmlBlockTags.contains(StringUtils.toLowerCase(n.toString())); |
|
998 } |
|
999 |
|
1000 protected boolean isSentenceBreak(DCTree t) { |
|
1001 switch (t.getKind()) { |
|
1002 case START_ELEMENT: |
|
1003 return isSentenceBreak(((DCStartElement) t).getName()); |
|
1004 |
|
1005 case END_ELEMENT: |
|
1006 return isSentenceBreak(((DCEndElement) t).getName()); |
|
1007 } |
|
1008 return false; |
|
1009 } |
|
1010 |
|
1011 /** |
|
1012 * @param start position of first character of string |
|
1013 * @param end position of character beyond last character to be included |
|
1014 */ |
|
1015 String newString(int start, int end) { |
|
1016 return new String(buf, start, end - start); |
|
1017 } |
|
1018 |
|
1019 static abstract class TagParser { |
|
1020 enum Kind { INLINE, BLOCK } |
|
1021 |
|
1022 Kind kind; |
|
1023 DCTree.Kind treeKind; |
|
1024 |
|
1025 TagParser(Kind k, DCTree.Kind tk) { |
|
1026 kind = k; |
|
1027 treeKind = tk; |
|
1028 } |
|
1029 |
|
1030 Kind getKind() { |
|
1031 return kind; |
|
1032 } |
|
1033 |
|
1034 DCTree.Kind getTreeKind() { |
|
1035 return treeKind; |
|
1036 } |
|
1037 |
|
1038 abstract DCTree parse(int pos) throws ParseException; |
|
1039 } |
|
1040 |
|
1041 /** |
|
1042 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a> |
|
1043 */ |
|
1044 private void initTagParsers() { |
|
1045 TagParser[] parsers = { |
|
1046 // @author name-text |
|
1047 new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) { |
|
1048 public DCTree parse(int pos) { |
|
1049 List<DCTree> name = blockContent(); |
|
1050 return m.at(pos).Author(name); |
|
1051 } |
|
1052 }, |
|
1053 |
|
1054 // {@code text} |
|
1055 new TagParser(Kind.INLINE, DCTree.Kind.CODE) { |
|
1056 public DCTree parse(int pos) throws ParseException { |
|
1057 DCTree text = inlineText(); |
|
1058 nextChar(); |
|
1059 return m.at(pos).Code((DCText) text); |
|
1060 } |
|
1061 }, |
|
1062 |
|
1063 // @deprecated deprecated-text |
|
1064 new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) { |
|
1065 public DCTree parse(int pos) { |
|
1066 List<DCTree> reason = blockContent(); |
|
1067 return m.at(pos).Deprecated(reason); |
|
1068 } |
|
1069 }, |
|
1070 |
|
1071 // {@docRoot} |
|
1072 new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) { |
|
1073 public DCTree parse(int pos) throws ParseException { |
|
1074 if (ch == '}') { |
|
1075 nextChar(); |
|
1076 return m.at(pos).DocRoot(); |
|
1077 } |
|
1078 inlineText(); // skip unexpected content |
|
1079 nextChar(); |
|
1080 throw new ParseException("dc.unexpected.content"); |
|
1081 } |
|
1082 }, |
|
1083 |
|
1084 // @exception class-name description |
|
1085 new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) { |
|
1086 public DCTree parse(int pos) throws ParseException { |
|
1087 skipWhitespace(); |
|
1088 DCReference ref = reference(false); |
|
1089 List<DCTree> description = blockContent(); |
|
1090 return m.at(pos).Exception(ref, description); |
|
1091 } |
|
1092 }, |
|
1093 |
|
1094 // {@inheritDoc} |
|
1095 new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) { |
|
1096 public DCTree parse(int pos) throws ParseException { |
|
1097 if (ch == '}') { |
|
1098 nextChar(); |
|
1099 return m.at(pos).InheritDoc(); |
|
1100 } |
|
1101 inlineText(); // skip unexpected content |
|
1102 nextChar(); |
|
1103 throw new ParseException("dc.unexpected.content"); |
|
1104 } |
|
1105 }, |
|
1106 |
|
1107 // {@link package.class#member label} |
|
1108 new TagParser(Kind.INLINE, DCTree.Kind.LINK) { |
|
1109 public DCTree parse(int pos) throws ParseException { |
|
1110 DCReference ref = reference(true); |
|
1111 List<DCTree> label = inlineContent(); |
|
1112 return m.at(pos).Link(ref, label); |
|
1113 } |
|
1114 }, |
|
1115 |
|
1116 // {@linkplain package.class#member label} |
|
1117 new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) { |
|
1118 public DCTree parse(int pos) throws ParseException { |
|
1119 DCReference ref = reference(true); |
|
1120 List<DCTree> label = inlineContent(); |
|
1121 return m.at(pos).LinkPlain(ref, label); |
|
1122 } |
|
1123 }, |
|
1124 |
|
1125 // {@literal text} |
|
1126 new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) { |
|
1127 public DCTree parse(int pos) throws ParseException { |
|
1128 DCTree text = inlineText(); |
|
1129 nextChar(); |
|
1130 return m.at(pos).Literal((DCText) text); |
|
1131 } |
|
1132 }, |
|
1133 |
|
1134 // @param parameter-name description |
|
1135 new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) { |
|
1136 public DCTree parse(int pos) throws ParseException { |
|
1137 skipWhitespace(); |
|
1138 |
|
1139 boolean typaram = false; |
|
1140 if (ch == '<') { |
|
1141 typaram = true; |
|
1142 nextChar(); |
|
1143 } |
|
1144 |
|
1145 DCIdentifier id = identifier(); |
|
1146 |
|
1147 if (typaram) { |
|
1148 if (ch != '>') |
|
1149 throw new ParseException("dc.gt.expected"); |
|
1150 nextChar(); |
|
1151 } |
|
1152 |
|
1153 skipWhitespace(); |
|
1154 List<DCTree> desc = blockContent(); |
|
1155 return m.at(pos).Param(typaram, id, desc); |
|
1156 } |
|
1157 }, |
|
1158 |
|
1159 // @return description |
|
1160 new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) { |
|
1161 public DCTree parse(int pos) { |
|
1162 List<DCTree> description = blockContent(); |
|
1163 return m.at(pos).Return(description); |
|
1164 } |
|
1165 }, |
|
1166 |
|
1167 // @see reference | quoted-string | HTML |
|
1168 new TagParser(Kind.BLOCK, DCTree.Kind.SEE) { |
|
1169 public DCTree parse(int pos) throws ParseException { |
|
1170 skipWhitespace(); |
|
1171 switch (ch) { |
|
1172 case '"': |
|
1173 DCText string = quotedString(); |
|
1174 if (string != null) { |
|
1175 skipWhitespace(); |
|
1176 if (ch == '@' |
|
1177 || ch == EOI && bp == buf.length - 1) { |
|
1178 return m.at(pos).See(List.<DCTree>of(string)); |
|
1179 } |
|
1180 } |
|
1181 break; |
|
1182 |
|
1183 case '<': |
|
1184 List<DCTree> html = blockContent(); |
|
1185 if (html != null) |
|
1186 return m.at(pos).See(html); |
|
1187 break; |
|
1188 |
|
1189 case '@': |
|
1190 if (newline) |
|
1191 throw new ParseException("dc.no.content"); |
|
1192 break; |
|
1193 |
|
1194 case EOI: |
|
1195 if (bp == buf.length - 1) |
|
1196 throw new ParseException("dc.no.content"); |
|
1197 break; |
|
1198 |
|
1199 default: |
|
1200 if (isJavaIdentifierStart(ch) || ch == '#') { |
|
1201 DCReference ref = reference(true); |
|
1202 List<DCTree> description = blockContent(); |
|
1203 return m.at(pos).See(description.prepend(ref)); |
|
1204 } |
|
1205 } |
|
1206 throw new ParseException("dc.unexpected.content"); |
|
1207 } |
|
1208 }, |
|
1209 |
|
1210 // @serialData data-description |
|
1211 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) { |
|
1212 public DCTree parse(int pos) { |
|
1213 List<DCTree> description = blockContent(); |
|
1214 return m.at(pos).SerialData(description); |
|
1215 } |
|
1216 }, |
|
1217 |
|
1218 // @serialField field-name field-type description |
|
1219 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) { |
|
1220 public DCTree parse(int pos) throws ParseException { |
|
1221 skipWhitespace(); |
|
1222 DCIdentifier name = identifier(); |
|
1223 skipWhitespace(); |
|
1224 DCReference type = reference(false); |
|
1225 List<DCTree> description = null; |
|
1226 if (isWhitespace(ch)) { |
|
1227 skipWhitespace(); |
|
1228 description = blockContent(); |
|
1229 } |
|
1230 return m.at(pos).SerialField(name, type, description); |
|
1231 } |
|
1232 }, |
|
1233 |
|
1234 // @serial field-description | include | exclude |
|
1235 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) { |
|
1236 public DCTree parse(int pos) { |
|
1237 List<DCTree> description = blockContent(); |
|
1238 return m.at(pos).Serial(description); |
|
1239 } |
|
1240 }, |
|
1241 |
|
1242 // @since since-text |
|
1243 new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) { |
|
1244 public DCTree parse(int pos) { |
|
1245 List<DCTree> description = blockContent(); |
|
1246 return m.at(pos).Since(description); |
|
1247 } |
|
1248 }, |
|
1249 |
|
1250 // @throws class-name description |
|
1251 new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) { |
|
1252 public DCTree parse(int pos) throws ParseException { |
|
1253 skipWhitespace(); |
|
1254 DCReference ref = reference(false); |
|
1255 List<DCTree> description = blockContent(); |
|
1256 return m.at(pos).Throws(ref, description); |
|
1257 } |
|
1258 }, |
|
1259 |
|
1260 // {@value package.class#field} |
|
1261 new TagParser(Kind.INLINE, DCTree.Kind.VALUE) { |
|
1262 public DCTree parse(int pos) throws ParseException { |
|
1263 DCReference ref = reference(true); |
|
1264 skipWhitespace(); |
|
1265 if (ch == '}') { |
|
1266 nextChar(); |
|
1267 return m.at(pos).Value(ref); |
|
1268 } |
|
1269 nextChar(); |
|
1270 throw new ParseException("dc.unexpected.content"); |
|
1271 } |
|
1272 }, |
|
1273 |
|
1274 // @version version-text |
|
1275 new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) { |
|
1276 public DCTree parse(int pos) { |
|
1277 List<DCTree> description = blockContent(); |
|
1278 return m.at(pos).Version(description); |
|
1279 } |
|
1280 }, |
|
1281 }; |
|
1282 |
|
1283 tagParsers = new HashMap<Name,TagParser>(); |
|
1284 for (TagParser p: parsers) |
|
1285 tagParsers.put(names.fromString(p.getTreeKind().tagName), p); |
|
1286 |
|
1287 } |
|
1288 } |