Wed, 23 Jan 2013 13:27:24 -0800
8006775: JSR 308: Compiler changes in JDK8
Reviewed-by: jjg
Contributed-by: mernst@cs.washington.edu, wmdietl@cs.washington.edu, mpapi@csail.mit.edu, mahmood@notnoop.com
1 /*
2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.text.BreakIterator;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.Locale;
33 import java.util.Map;
34 import java.util.Set;
36 import com.sun.source.doctree.AttributeTree.ValueKind;
37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
38 import com.sun.tools.javac.parser.Tokens.Comment;
39 import com.sun.tools.javac.parser.Tokens.TokenKind;
40 import com.sun.tools.javac.tree.DCTree;
41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
44 import com.sun.tools.javac.tree.DCTree.DCErroneous;
45 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
46 import com.sun.tools.javac.tree.DCTree.DCReference;
47 import com.sun.tools.javac.tree.DCTree.DCStartElement;
48 import com.sun.tools.javac.tree.DCTree.DCText;
49 import com.sun.tools.javac.tree.DocTreeMaker;
50 import com.sun.tools.javac.tree.JCTree;
51 import com.sun.tools.javac.util.DiagnosticSource;
52 import com.sun.tools.javac.util.List;
53 import com.sun.tools.javac.util.ListBuffer;
54 import com.sun.tools.javac.util.Log;
55 import com.sun.tools.javac.util.Name;
56 import com.sun.tools.javac.util.Names;
57 import com.sun.tools.javac.util.Options;
58 import com.sun.tools.javac.util.Position;
59 import static com.sun.tools.javac.util.LayoutCharacters.*;
61 /**
62 *
63 * <p><b>This is NOT part of any supported API.
64 * If you write code that depends on this, you do so at your own risk.
65 * This code and its internal interfaces are subject to change or
66 * deletion without notice.</b>
67 */
68 public class DocCommentParser {
69 static class ParseException extends Exception {
70 private static final long serialVersionUID = 0;
71 ParseException(String key) {
72 super(key);
73 }
74 }
76 final ParserFactory fac;
77 final DiagnosticSource diagSource;
78 final Comment comment;
79 final DocTreeMaker m;
80 final Names names;
82 BreakIterator sentenceBreaker;
84 /** The input buffer, index of most recent character read,
85 * index of one past last character in buffer.
86 */
87 protected char[] buf;
88 protected int bp;
89 protected int buflen;
91 /** The current character.
92 */
93 protected char ch;
95 int textStart = -1;
96 int lastNonWhite = -1;
97 boolean newline = true;
99 Map<Name, TagParser> tagParsers;
101 DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
102 this.fac = fac;
103 this.diagSource = diagSource;
104 this.comment = comment;
105 names = fac.names;
106 m = fac.docTreeMaker;
108 Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
110 Options options = fac.options;
111 boolean useBreakIterator = options.isSet("breakIterator");
112 if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
113 sentenceBreaker = BreakIterator.getSentenceInstance(locale);
115 initTagParsers();
116 }
118 DCDocComment parse() {
119 String c = comment.getText();
120 buf = new char[c.length() + 1];
121 c.getChars(0, c.length(), buf, 0);
122 buf[buf.length - 1] = EOI;
123 buflen = buf.length - 1;
124 bp = -1;
125 nextChar();
127 List<DCTree> body = blockContent();
128 List<DCTree> tags = blockTags();
130 // split body into first sentence and body
131 ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
132 loop:
133 for (; body.nonEmpty(); body = body.tail) {
134 DCTree t = body.head;
135 switch (t.getKind()) {
136 case TEXT:
137 String s = ((DCText) t).getBody();
138 int i = getSentenceBreak(s);
139 if (i > 0) {
140 int i0 = i;
141 while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
142 i0--;
143 fs.add(m.at(t.pos).Text(s.substring(0, i0)));
144 int i1 = i;
145 while (i1 < s.length() && isWhitespace(s.charAt(i1)))
146 i1++;
147 body = body.tail;
148 if (i1 < s.length())
149 body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
150 break loop;
151 } else if (body.tail.nonEmpty()) {
152 if (isSentenceBreak(body.tail.head)) {
153 int i0 = s.length() - 1;
154 while (i0 > 0 && isWhitespace(s.charAt(i0)))
155 i0--;
156 fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
157 body = body.tail;
158 break loop;
159 }
160 }
161 break;
163 case START_ELEMENT:
164 case END_ELEMENT:
165 if (isSentenceBreak(t))
166 break loop;
167 break;
168 }
169 fs.add(t);
170 }
172 @SuppressWarnings("unchecked")
173 DCTree first = getFirst(fs.toList(), body, tags);
174 int pos = (first == null) ? Position.NOPOS : first.pos;
176 DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
177 return dc;
178 }
180 void nextChar() {
181 ch = buf[bp < buflen ? ++bp : buflen];
182 switch (ch) {
183 case '\f': case '\n': case '\r':
184 newline = true;
185 }
186 }
188 /**
189 * Read block content, consisting of text, html and inline tags.
190 * Terminated by the end of input, or the beginning of the next block tag:
191 * i.e. @ as the first non-whitespace character on a line.
192 */
193 @SuppressWarnings("fallthrough")
194 protected List<DCTree> blockContent() {
195 ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
196 textStart = -1;
198 loop:
199 while (bp < buflen) {
200 switch (ch) {
201 case '\n': case '\r': case '\f':
202 newline = true;
203 // fallthrough
205 case ' ': case '\t':
206 nextChar();
207 break;
209 case '&':
210 entity(trees);
211 break;
213 case '<':
214 newline = false;
215 addPendingText(trees, bp - 1);
216 trees.add(html());
217 if (textStart == -1) {
218 textStart = bp;
219 lastNonWhite = -1;
220 }
221 break;
223 case '>':
224 newline = false;
225 addPendingText(trees, bp - 1);
226 trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
227 nextChar();
228 if (textStart == -1) {
229 textStart = bp;
230 lastNonWhite = -1;
231 }
232 break;
234 case '{':
235 inlineTag(trees);
236 break;
238 case '@':
239 if (newline) {
240 addPendingText(trees, lastNonWhite);
241 break loop;
242 }
243 // fallthrough
245 default:
246 newline = false;
247 if (textStart == -1)
248 textStart = bp;
249 lastNonWhite = bp;
250 nextChar();
251 }
252 }
254 if (lastNonWhite != -1)
255 addPendingText(trees, lastNonWhite);
257 return trees.toList();
258 }
260 /**
261 * Read a series of block tags, including their content.
262 * Standard tags parse their content appropriately.
263 * Non-standard tags are represented by {@link UnknownBlockTag}.
264 */
265 protected List<DCTree> blockTags() {
266 ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
267 while (ch == '@')
268 tags.add(blockTag());
269 return tags.toList();
270 }
272 /**
273 * Read a single block tag, including its content.
274 * Standard tags parse their content appropriately.
275 * Non-standard tags are represented by {@link UnknownBlockTag}.
276 */
277 protected DCTree blockTag() {
278 int p = bp;
279 try {
280 nextChar();
281 if (isIdentifierStart(ch)) {
282 int namePos = bp;
283 nextChar();
284 while (isIdentifierPart(ch))
285 nextChar();
286 int nameLen = bp - namePos;
288 Name name = names.fromChars(buf, namePos, nameLen);
289 TagParser tp = tagParsers.get(name);
290 if (tp == null) {
291 List<DCTree> content = blockContent();
292 return m.at(p).UnknownBlockTag(name, content);
293 } else {
294 switch (tp.getKind()) {
295 case BLOCK:
296 return tp.parse(p);
297 case INLINE:
298 return erroneous("dc.bad.inline.tag", p);
299 }
300 }
301 }
302 blockContent();
304 return erroneous("dc.no.tag.name", p);
305 } catch (ParseException e) {
306 blockContent();
307 return erroneous(e.getMessage(), p);
308 }
309 }
311 protected void inlineTag(ListBuffer<DCTree> list) {
312 newline = false;
313 nextChar();
314 if (ch == '@') {
315 addPendingText(list, bp - 2);
316 list.add(inlineTag());
317 textStart = bp;
318 lastNonWhite = -1;
319 } else {
320 if (textStart == -1)
321 textStart = bp - 1;
322 lastNonWhite = bp;
323 }
324 }
326 /**
327 * Read a single inline tag, including its content.
328 * Standard tags parse their content appropriately.
329 * Non-standard tags are represented by {@link UnknownBlockTag}.
330 * Malformed tags may be returned as {@link Erroneous}.
331 */
332 protected DCTree inlineTag() {
333 int p = bp - 1;
334 try {
335 nextChar();
336 if (isIdentifierStart(ch)) {
337 int namePos = bp;
338 nextChar();
339 while (isIdentifierPart(ch))
340 nextChar();
341 int nameLen = bp - namePos;
342 skipWhitespace();
344 Name name = names.fromChars(buf, namePos, nameLen);
345 TagParser tp = tagParsers.get(name);
346 if (tp == null) {
347 DCTree text = inlineText();
348 if (text != null) {
349 nextChar();
350 return m.at(p).UnknownInlineTag(name, List.of(text));
351 }
352 } else if (tp.getKind() == TagParser.Kind.INLINE) {
353 DCTree tree = tp.parse(p);
354 if (tree != null) {
355 return tree;
356 }
357 } else {
358 inlineText(); // skip content
359 nextChar();
360 }
361 }
362 return erroneous("dc.no.tag.name", p);
363 } catch (ParseException e) {
364 return erroneous(e.getMessage(), p);
365 }
366 }
368 /**
369 * Read plain text content of an inline tag.
370 * Matching pairs of { } are skipped; the text is terminated by the first
371 * unmatched }. It is an error if the beginning of the next tag is detected.
372 */
373 protected DCTree inlineText() throws ParseException {
374 skipWhitespace();
375 int pos = bp;
376 int depth = 1;
378 loop:
379 while (bp < buflen) {
380 switch (ch) {
381 case '\n': case '\r': case '\f':
382 newline = true;
383 break;
385 case ' ': case '\t':
386 break;
388 case '{':
389 newline = false;
390 lastNonWhite = bp;
391 depth++;
392 break;
394 case '}':
395 if (--depth == 0) {
396 return m.at(pos).Text(newString(pos, bp));
397 }
398 newline = false;
399 lastNonWhite = bp;
400 break;
402 case '@':
403 if (newline)
404 break loop;
405 newline = false;
406 lastNonWhite = bp;
407 break;
409 default:
410 newline = false;
411 lastNonWhite = bp;
412 break;
413 }
414 nextChar();
415 }
416 throw new ParseException("dc.unterminated.inline.tag");
417 }
419 /**
420 * Read Java class name, possibly followed by member
421 * Matching pairs of < > are skipped. The text is terminated by the first
422 * unmatched }. It is an error if the beginning of the next tag is detected.
423 */
424 // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
425 // TODO: improve quality of parse to forbid bad constructions.
426 @SuppressWarnings("fallthrough")
427 protected DCReference reference(boolean allowMember) throws ParseException {
428 int pos = bp;
429 int depth = 0;
431 // scan to find the end of the signature, by looking for the first
432 // whitespace not enclosed in () or <>, or the end of the tag
433 loop:
434 while (bp < buflen) {
435 switch (ch) {
436 case '\n': case '\r': case '\f':
437 newline = true;
438 // fallthrough
440 case ' ': case '\t':
441 if (depth == 0)
442 break loop;
443 break;
445 case '(':
446 case '<':
447 newline = false;
448 depth++;
449 break;
451 case ')':
452 case '>':
453 newline = false;
454 --depth;
455 break;
457 case '}':
458 if (bp == pos)
459 return null;
460 newline = false;
461 break loop;
463 case '@':
464 if (newline)
465 break loop;
466 // fallthrough
468 default:
469 newline = false;
471 }
472 nextChar();
473 }
475 if (depth != 0)
476 throw new ParseException("dc.unterminated.signature");
478 String sig = newString(pos, bp);
480 // Break sig apart into qualifiedExpr member paramTypes.
481 JCTree qualExpr;
482 Name member;
483 List<JCTree> paramTypes;
485 Log.DeferredDiagnosticHandler deferredDiagnosticHandler
486 = new Log.DeferredDiagnosticHandler(fac.log);
488 try {
489 int hash = sig.indexOf("#");
490 int lparen = sig.indexOf("(", hash + 1);
491 if (hash == -1) {
492 if (lparen == -1) {
493 qualExpr = parseType(sig);
494 member = null;
495 } else {
496 qualExpr = null;
497 member = parseMember(sig.substring(0, lparen));
498 }
499 } else {
500 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
501 if (lparen == -1)
502 member = parseMember(sig.substring(hash + 1));
503 else
504 member = parseMember(sig.substring(hash + 1, lparen));
505 }
507 if (lparen < 0) {
508 paramTypes = null;
509 } else {
510 int rparen = sig.indexOf(")", lparen);
511 if (rparen != sig.length() - 1)
512 throw new ParseException("dc.ref.bad.parens");
513 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
514 }
516 if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
517 throw new ParseException("dc.ref.syntax.error");
519 } finally {
520 fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
521 }
523 return m.at(pos).Reference(sig, qualExpr, member, paramTypes);
524 }
526 JCTree parseType(String s) throws ParseException {
527 JavacParser p = fac.newParser(s, false, false, false);
528 JCTree tree = p.parseType();
529 if (p.token().kind != TokenKind.EOF)
530 throw new ParseException("dc.ref.unexpected.input");
531 return tree;
532 }
534 Name parseMember(String s) throws ParseException {
535 JavacParser p = fac.newParser(s, false, false, false);
536 Name name = p.ident();
537 if (p.token().kind != TokenKind.EOF)
538 throw new ParseException("dc.ref.unexpected.input");
539 return name;
540 }
542 List<JCTree> parseParams(String s) throws ParseException {
543 if (s.trim().isEmpty())
544 return List.nil();
546 JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
547 ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
548 paramTypes.add(p.parseType());
550 if (p.token().kind == TokenKind.IDENTIFIER)
551 p.nextToken();
553 while (p.token().kind == TokenKind.COMMA) {
554 p.nextToken();
555 paramTypes.add(p.parseType());
557 if (p.token().kind == TokenKind.IDENTIFIER)
558 p.nextToken();
559 }
561 if (p.token().kind != TokenKind.EOF)
562 throw new ParseException("dc.ref.unexpected.input");
564 return paramTypes.toList();
565 }
567 /**
568 * Read Java identifier
569 * Matching pairs of { } are skipped; the text is terminated by the first
570 * unmatched }. It is an error if the beginning of the next tag is detected.
571 */
572 @SuppressWarnings("fallthrough")
573 protected DCIdentifier identifier() throws ParseException {
574 skipWhitespace();
575 int pos = bp;
577 if (isJavaIdentifierStart(ch)) {
578 nextChar();
579 while (isJavaIdentifierPart(ch))
580 nextChar();
581 return m.at(pos).Identifier(names.fromChars(buf, pos, bp - pos));
582 }
584 throw new ParseException("dc.identifier.expected");
585 }
587 /**
588 * Read a quoted string.
589 * It is an error if the beginning of the next tag is detected.
590 */
591 @SuppressWarnings("fallthrough")
592 protected DCText quotedString() {
593 int pos = bp;
594 nextChar();
596 loop:
597 while (bp < buflen) {
598 switch (ch) {
599 case '\n': case '\r': case '\f':
600 newline = true;
601 break;
603 case ' ': case '\t':
604 break;
606 case '"':
607 nextChar();
608 // trim trailing white-space?
609 return m.at(pos).Text(newString(pos, bp));
611 case '@':
612 if (newline)
613 break loop;
615 }
616 nextChar();
617 }
618 return null;
619 }
621 /**
622 * Read general text content of an inline tag, including HTML entities and elements.
623 * Matching pairs of { } are skipped; the text is terminated by the first
624 * unmatched }. It is an error if the beginning of the next tag is detected.
625 */
626 @SuppressWarnings("fallthrough")
627 protected List<DCTree> inlineContent() {
628 ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
630 skipWhitespace();
631 int pos = bp;
632 int depth = 1;
633 textStart = -1;
635 loop:
636 while (bp < buflen) {
638 switch (ch) {
639 case '\n': case '\r': case '\f':
640 newline = true;
641 // fall through
643 case ' ': case '\t':
644 nextChar();
645 break;
647 case '&':
648 entity(trees);
649 break;
651 case '<':
652 newline = false;
653 addPendingText(trees, bp - 1);
654 trees.add(html());
655 break;
657 case '{':
658 newline = false;
659 depth++;
660 nextChar();
661 break;
663 case '}':
664 newline = false;
665 if (--depth == 0) {
666 addPendingText(trees, bp - 1);
667 nextChar();
668 return trees.toList();
669 }
670 nextChar();
671 break;
673 case '@':
674 if (newline)
675 break loop;
676 // fallthrough
678 default:
679 if (textStart == -1)
680 textStart = bp;
681 nextChar();
682 break;
683 }
684 }
686 return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
687 }
689 protected void entity(ListBuffer<DCTree> list) {
690 newline = false;
691 addPendingText(list, bp - 1);
692 list.add(entity());
693 if (textStart == -1) {
694 textStart = bp;
695 lastNonWhite = -1;
696 }
697 }
699 /**
700 * Read an HTML entity.
701 * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
702 */
703 protected DCTree entity() {
704 int p = bp;
705 nextChar();
706 int namep = bp;
707 boolean checkSemi = false;
708 if (ch == '#') {
709 nextChar();
710 if (isDecimalDigit(ch)) {
711 nextChar();
712 while (isDecimalDigit(ch))
713 nextChar();
714 checkSemi = true;
715 } else if (ch == 'x' || ch == 'X') {
716 nextChar();
717 if (isHexDigit(ch)) {
718 nextChar();
719 while (isHexDigit(ch))
720 nextChar();
721 checkSemi = true;
722 }
723 }
724 } else if (isIdentifierStart(ch)) {
725 nextChar();
726 while (isIdentifierPart(ch))
727 nextChar();
728 checkSemi = true;
729 }
731 if (checkSemi && ch == ';') {
732 nextChar();
733 return m.at(p).Entity(names.fromChars(buf, namep, bp - namep - 1));
734 } else {
735 String code = checkSemi
736 ? "dc.missing.semicolon"
737 : "dc.bad.entity";
738 return erroneous(code, p);
739 }
740 }
742 /**
743 * Read the start or end of an HTML tag, or an HTML comment
744 * {@literal <identifier attrs> } or {@literal </identifier> }
745 */
746 protected DCTree html() {
747 int p = bp;
748 nextChar();
749 if (isIdentifierStart(ch)) {
750 int namePos = bp;
751 nextChar();
752 while (isIdentifierPart(ch))
753 nextChar();
754 int nameLen = bp - namePos;
755 List<DCTree> attrs = htmlAttrs();
756 if (attrs != null) {
757 boolean selfClosing = false;
758 if (ch == '/') {
759 nextChar();
760 selfClosing = true;
761 }
762 if (ch == '>') {
763 nextChar();
764 Name name = names.fromChars(buf, namePos, nameLen);
765 return m.at(p).StartElement(name, attrs, selfClosing);
766 }
767 }
768 } else if (ch == '/') {
769 nextChar();
770 if (isIdentifierStart(ch)) {
771 int namePos = bp;
772 nextChar();
773 while (isIdentifierPart(ch))
774 nextChar();
775 int nameLen = bp - namePos;
776 skipWhitespace();
777 if (ch == '>') {
778 nextChar();
779 Name name = names.fromChars(buf, namePos, nameLen);
780 return m.at(p).EndElement(name);
781 }
782 }
783 } else if (ch == '!') {
784 nextChar();
785 if (ch == '-') {
786 nextChar();
787 if (ch == '-') {
788 nextChar();
789 while (bp < buflen) {
790 int dash = 0;
791 while (ch == '-') {
792 dash++;
793 nextChar();
794 }
795 // strictly speaking, a comment should not contain "--"
796 // so dash > 2 is an error, dash == 2 implies ch == '>'
797 if (dash >= 2 && ch == '>') {
798 nextChar();
799 return m.at(p).Comment(newString(p, bp));
800 }
802 nextChar();
803 }
804 }
805 }
806 }
808 bp = p + 1;
809 ch = buf[bp];
810 return erroneous("dc.malformed.html", p);
811 }
813 /**
814 * Read a series of HTML attributes, terminated by {@literal > }.
815 * Each attribute is of the form {@literal identifier[=value] }.
816 * "value" may be unquoted, single-quoted, or double-quoted.
817 */
818 protected List<DCTree> htmlAttrs() {
819 ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
820 skipWhitespace();
822 loop:
823 while (isIdentifierStart(ch)) {
824 int namePos = bp;
825 nextChar();
826 while (isIdentifierPart(ch))
827 nextChar();
828 int nameLen = bp - namePos;
829 skipWhitespace();
830 List<DCTree> value = null;
831 ValueKind vkind = ValueKind.EMPTY;
832 if (ch == '=') {
833 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
834 nextChar();
835 skipWhitespace();
836 if (ch == '\'' || ch == '"') {
837 vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
838 char quote = ch;
839 nextChar();
840 textStart = bp;
841 while (bp < buflen && ch != quote) {
842 if (newline && ch == '@') {
843 attrs.add(erroneous("dc.unterminated.string", namePos));
844 // No point trying to read more.
845 // In fact, all attrs get discarded by the caller
846 // and superseded by a malformed.html node because
847 // the html tag itself is not terminated correctly.
848 break loop;
849 }
850 attrValueChar(v);
851 }
852 addPendingText(v, bp - 1);
853 nextChar();
854 } else {
855 vkind = ValueKind.UNQUOTED;
856 textStart = bp;
857 while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
858 attrValueChar(v);
859 }
860 addPendingText(v, bp - 1);
861 }
862 skipWhitespace();
863 value = v.toList();
864 }
865 Name name = names.fromChars(buf, namePos, nameLen);
866 DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
867 attrs.add(attr);
868 }
870 return attrs.toList();
871 }
873 protected void attrValueChar(ListBuffer<DCTree> list) {
874 switch (ch) {
875 case '&':
876 entity(list);
877 break;
879 case '{':
880 inlineTag(list);
881 break;
883 default:
884 nextChar();
885 }
886 }
888 protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
889 if (textStart != -1) {
890 if (textStart <= textEnd) {
891 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
892 }
893 textStart = -1;
894 }
895 }
897 protected DCErroneous erroneous(String code, int pos) {
898 int i = bp - 1;
899 loop:
900 while (i > 0) {
901 switch (buf[i]) {
902 case '\f': case '\n': case '\r':
903 newline = true;
904 break;
905 case '\t': case ' ':
906 break;
907 default:
908 break loop;
909 }
910 i--;
911 }
912 textStart = -1;
913 return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
914 }
916 @SuppressWarnings("unchecked")
917 <T> T getFirst(List<T>... lists) {
918 for (List<T> list: lists) {
919 if (list.nonEmpty())
920 return list.head;
921 }
922 return null;
923 }
925 protected boolean isIdentifierStart(char ch) {
926 return Character.isUnicodeIdentifierStart(ch);
927 }
929 protected boolean isIdentifierPart(char ch) {
930 return Character.isUnicodeIdentifierPart(ch);
931 }
933 protected boolean isJavaIdentifierStart(char ch) {
934 return Character.isJavaIdentifierStart(ch);
935 }
937 protected boolean isJavaIdentifierPart(char ch) {
938 return Character.isJavaIdentifierPart(ch);
939 }
941 protected boolean isDecimalDigit(char ch) {
942 return ('0' <= ch && ch <= '9');
943 }
945 protected boolean isHexDigit(char ch) {
946 return ('0' <= ch && ch <= '9')
947 || ('a' <= ch && ch <= 'f')
948 || ('A' <= ch && ch <= 'F');
949 }
951 protected boolean isUnquotedAttrValueTerminator(char ch) {
952 switch (ch) {
953 case '\f': case '\n': case '\r': case '\t':
954 case ' ':
955 case '"': case '\'': case '`':
956 case '=': case '<': case '>':
957 return true;
958 default:
959 return false;
960 }
961 }
963 protected boolean isWhitespace(char ch) {
964 return Character.isWhitespace(ch);
965 }
967 protected void skipWhitespace() {
968 while (isWhitespace(ch))
969 nextChar();
970 }
972 protected int getSentenceBreak(String s) {
973 if (sentenceBreaker != null) {
974 sentenceBreaker.setText(s);
975 int i = sentenceBreaker.next();
976 return (i == s.length()) ? -1 : i;
977 }
979 // scan for period followed by whitespace
980 boolean period = false;
981 for (int i = 0; i < s.length(); i++) {
982 switch (s.charAt(i)) {
983 case '.':
984 period = true;
985 break;
987 case ' ':
988 case '\f':
989 case '\n':
990 case '\r':
991 case '\t':
992 if (period)
993 return i;
994 break;
996 default:
997 period = false;
998 break;
999 }
1000 }
1001 return -1;
1002 }
1005 Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
1006 "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
1008 protected boolean isSentenceBreak(Name n) {
1009 return htmlBlockTags.contains(n.toString().toLowerCase());
1010 }
1012 protected boolean isSentenceBreak(DCTree t) {
1013 switch (t.getKind()) {
1014 case START_ELEMENT:
1015 return isSentenceBreak(((DCStartElement) t).getName());
1017 case END_ELEMENT:
1018 return isSentenceBreak(((DCEndElement) t).getName());
1019 }
1020 return false;
1021 }
1023 /**
1024 * @param start position of first character of string
1025 * @param end position of character beyond last character to be included
1026 */
1027 String newString(int start, int end) {
1028 return new String(buf, start, end - start);
1029 }
1031 static abstract class TagParser {
1032 enum Kind { INLINE, BLOCK }
1034 Kind kind;
1035 DCTree.Kind treeKind;
1037 TagParser(Kind k, DCTree.Kind tk) {
1038 kind = k;
1039 treeKind = tk;
1040 }
1042 Kind getKind() {
1043 return kind;
1044 }
1046 DCTree.Kind getTreeKind() {
1047 return treeKind;
1048 }
1050 abstract DCTree parse(int pos) throws ParseException;
1051 }
1053 /**
1054 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
1055 */
1056 private void initTagParsers() {
1057 TagParser[] parsers = {
1058 // @author name-text
1059 new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1060 public DCTree parse(int pos) {
1061 List<DCTree> name = blockContent();
1062 return m.at(pos).Author(name);
1063 }
1064 },
1066 // {@code text}
1067 new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
1068 public DCTree parse(int pos) throws ParseException {
1069 DCTree text = inlineText();
1070 nextChar();
1071 return m.at(pos).Code((DCText) text);
1072 }
1073 },
1075 // @deprecated deprecated-text
1076 new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1077 public DCTree parse(int pos) {
1078 List<DCTree> reason = blockContent();
1079 return m.at(pos).Deprecated(reason);
1080 }
1081 },
1083 // {@docRoot}
1084 new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1085 public DCTree parse(int pos) throws ParseException {
1086 if (ch == '}') {
1087 nextChar();
1088 return m.at(pos).DocRoot();
1089 }
1090 inlineText(); // skip unexpected content
1091 nextChar();
1092 throw new ParseException("dc.unexpected.content");
1093 }
1094 },
1096 // @exception class-name description
1097 new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1098 public DCTree parse(int pos) throws ParseException {
1099 skipWhitespace();
1100 DCReference ref = reference(false);
1101 List<DCTree> description = blockContent();
1102 return m.at(pos).Exception(ref, description);
1103 }
1104 },
1106 // {@inheritDoc}
1107 new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1108 public DCTree parse(int pos) throws ParseException {
1109 if (ch == '}') {
1110 nextChar();
1111 return m.at(pos).InheritDoc();
1112 }
1113 inlineText(); // skip unexpected content
1114 nextChar();
1115 throw new ParseException("dc.unexpected.content");
1116 }
1117 },
1119 // {@link package.class#member label}
1120 new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1121 public DCTree parse(int pos) throws ParseException {
1122 DCReference ref = reference(true);
1123 List<DCTree> label = inlineContent();
1124 return m.at(pos).Link(ref, label);
1125 }
1126 },
1128 // {@linkplain package.class#member label}
1129 new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1130 public DCTree parse(int pos) throws ParseException {
1131 DCReference ref = reference(true);
1132 List<DCTree> label = inlineContent();
1133 return m.at(pos).LinkPlain(ref, label);
1134 }
1135 },
1137 // {@literal text}
1138 new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
1139 public DCTree parse(int pos) throws ParseException {
1140 DCTree text = inlineText();
1141 nextChar();
1142 return m.at(pos).Literal((DCText) text);
1143 }
1144 },
1146 // @param parameter-name description
1147 new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1148 public DCTree parse(int pos) throws ParseException {
1149 skipWhitespace();
1151 boolean typaram = false;
1152 if (ch == '<') {
1153 typaram = true;
1154 nextChar();
1155 }
1157 DCIdentifier id = identifier();
1159 if (typaram) {
1160 if (ch != '>')
1161 throw new ParseException("dc.gt.expected");
1162 nextChar();
1163 }
1165 skipWhitespace();
1166 List<DCTree> desc = blockContent();
1167 return m.at(pos).Param(typaram, id, desc);
1168 }
1169 },
1171 // @return description
1172 new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1173 public DCTree parse(int pos) {
1174 List<DCTree> description = blockContent();
1175 return m.at(pos).Return(description);
1176 }
1177 },
1179 // @see reference | quoted-string | HTML
1180 new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1181 public DCTree parse(int pos) throws ParseException {
1182 skipWhitespace();
1183 switch (ch) {
1184 case '"':
1185 DCText string = quotedString();
1186 if (string != null) {
1187 skipWhitespace();
1188 if (ch == '@')
1189 return m.at(pos).See(List.<DCTree>of(string));
1190 }
1191 break;
1193 case '<':
1194 List<DCTree> html = blockContent();
1195 if (html != null)
1196 return m.at(pos).See(html);
1197 break;
1199 case '@':
1200 if (newline)
1201 throw new ParseException("dc.no.content");
1202 break;
1204 case EOI:
1205 if (bp == buf.length - 1)
1206 throw new ParseException("dc.no.content");
1207 break;
1209 default:
1210 if (isJavaIdentifierStart(ch) || ch == '#') {
1211 DCReference ref = reference(true);
1212 List<DCTree> description = blockContent();
1213 return m.at(pos).See(description.prepend(ref));
1214 }
1215 }
1216 throw new ParseException("dc.unexpected.content");
1217 }
1218 },
1220 // @serialData data-description
1221 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1222 public DCTree parse(int pos) {
1223 List<DCTree> description = blockContent();
1224 return m.at(pos).SerialData(description);
1225 }
1226 },
1228 // @serialField field-name field-type description
1229 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1230 public DCTree parse(int pos) throws ParseException {
1231 skipWhitespace();
1232 DCIdentifier name = identifier();
1233 skipWhitespace();
1234 DCReference type = reference(false);
1235 List<DCTree> description = null;
1236 if (isWhitespace(ch)) {
1237 skipWhitespace();
1238 description = blockContent();
1239 }
1240 return m.at(pos).SerialField(name, type, description);
1241 }
1242 },
1244 // @serial field-description | include | exclude
1245 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1246 public DCTree parse(int pos) {
1247 List<DCTree> description = blockContent();
1248 return m.at(pos).Serial(description);
1249 }
1250 },
1252 // @since since-text
1253 new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1254 public DCTree parse(int pos) {
1255 List<DCTree> description = blockContent();
1256 return m.at(pos).Since(description);
1257 }
1258 },
1260 // @throws class-name description
1261 new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1262 public DCTree parse(int pos) throws ParseException {
1263 skipWhitespace();
1264 DCReference ref = reference(false);
1265 List<DCTree> description = blockContent();
1266 return m.at(pos).Throws(ref, description);
1267 }
1268 },
1270 // {@value package.class#field}
1271 new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1272 public DCTree parse(int pos) throws ParseException {
1273 DCReference ref = reference(true);
1274 skipWhitespace();
1275 if (ch == '}') {
1276 nextChar();
1277 return m.at(pos).Value(ref);
1278 }
1279 nextChar();
1280 throw new ParseException("dc.unexpected.content");
1281 }
1282 },
1284 // @version version-text
1285 new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1286 public DCTree parse(int pos) {
1287 List<DCTree> description = blockContent();
1288 return m.at(pos).Version(description);
1289 }
1290 },
1291 };
1293 tagParsers = new HashMap<Name,TagParser>();
1294 for (TagParser p: parsers)
1295 tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1297 }
1298 }