src/share/classes/com/sun/tools/javac/parser/DocCommentParser.java

changeset 0
959103a6100f
child 2525
2eb010b6cb22
equal deleted inserted replaced
-1:000000000000 0:959103a6100f
1 /*
2 * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.tools.javac.parser;
27
28 import java.text.BreakIterator;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.Locale;
33 import java.util.Map;
34 import java.util.Set;
35
36 import com.sun.source.doctree.AttributeTree.ValueKind;
37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
38 import com.sun.tools.javac.parser.Tokens.Comment;
39 import com.sun.tools.javac.parser.Tokens.TokenKind;
40 import com.sun.tools.javac.tree.DCTree;
41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
44 import com.sun.tools.javac.tree.DCTree.DCEndPosTree;
45 import com.sun.tools.javac.tree.DCTree.DCErroneous;
46 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
47 import com.sun.tools.javac.tree.DCTree.DCReference;
48 import com.sun.tools.javac.tree.DCTree.DCStartElement;
49 import com.sun.tools.javac.tree.DCTree.DCText;
50 import com.sun.tools.javac.tree.DocTreeMaker;
51 import com.sun.tools.javac.tree.JCTree;
52 import com.sun.tools.javac.util.DiagnosticSource;
53 import com.sun.tools.javac.util.List;
54 import com.sun.tools.javac.util.ListBuffer;
55 import com.sun.tools.javac.util.Log;
56 import com.sun.tools.javac.util.Name;
57 import com.sun.tools.javac.util.Names;
58 import com.sun.tools.javac.util.Options;
59 import com.sun.tools.javac.util.Position;
60 import com.sun.tools.javac.util.StringUtils;
61 import static com.sun.tools.javac.util.LayoutCharacters.*;
62
63 /**
64 *
65 * <p><b>This is NOT part of any supported API.
66 * If you write code that depends on this, you do so at your own risk.
67 * This code and its internal interfaces are subject to change or
68 * deletion without notice.</b>
69 */
70 public class DocCommentParser {
71 static class ParseException extends Exception {
72 private static final long serialVersionUID = 0;
73 ParseException(String key) {
74 super(key);
75 }
76 }
77
78 final ParserFactory fac;
79 final DiagnosticSource diagSource;
80 final Comment comment;
81 final DocTreeMaker m;
82 final Names names;
83
84 BreakIterator sentenceBreaker;
85
86 /** The input buffer, index of most recent character read,
87 * index of one past last character in buffer.
88 */
89 protected char[] buf;
90 protected int bp;
91 protected int buflen;
92
93 /** The current character.
94 */
95 protected char ch;
96
97 int textStart = -1;
98 int lastNonWhite = -1;
99 boolean newline = true;
100
101 Map<Name, TagParser> tagParsers;
102
103 DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
104 this.fac = fac;
105 this.diagSource = diagSource;
106 this.comment = comment;
107 names = fac.names;
108 m = fac.docTreeMaker;
109
110 Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
111
112 Options options = fac.options;
113 boolean useBreakIterator = options.isSet("breakIterator");
114 if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
115 sentenceBreaker = BreakIterator.getSentenceInstance(locale);
116
117 initTagParsers();
118 }
119
120 DCDocComment parse() {
121 String c = comment.getText();
122 buf = new char[c.length() + 1];
123 c.getChars(0, c.length(), buf, 0);
124 buf[buf.length - 1] = EOI;
125 buflen = buf.length - 1;
126 bp = -1;
127 nextChar();
128
129 List<DCTree> body = blockContent();
130 List<DCTree> tags = blockTags();
131
132 // split body into first sentence and body
133 ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
134 loop:
135 for (; body.nonEmpty(); body = body.tail) {
136 DCTree t = body.head;
137 switch (t.getKind()) {
138 case TEXT:
139 String s = ((DCText) t).getBody();
140 int i = getSentenceBreak(s);
141 if (i > 0) {
142 int i0 = i;
143 while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
144 i0--;
145 fs.add(m.at(t.pos).Text(s.substring(0, i0)));
146 int i1 = i;
147 while (i1 < s.length() && isWhitespace(s.charAt(i1)))
148 i1++;
149 body = body.tail;
150 if (i1 < s.length())
151 body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
152 break loop;
153 } else if (body.tail.nonEmpty()) {
154 if (isSentenceBreak(body.tail.head)) {
155 int i0 = s.length() - 1;
156 while (i0 > 0 && isWhitespace(s.charAt(i0)))
157 i0--;
158 fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
159 body = body.tail;
160 break loop;
161 }
162 }
163 break;
164
165 case START_ELEMENT:
166 case END_ELEMENT:
167 if (isSentenceBreak(t))
168 break loop;
169 break;
170 }
171 fs.add(t);
172 }
173
174 @SuppressWarnings("unchecked")
175 DCTree first = getFirst(fs.toList(), body, tags);
176 int pos = (first == null) ? Position.NOPOS : first.pos;
177
178 DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
179 return dc;
180 }
181
182 void nextChar() {
183 ch = buf[bp < buflen ? ++bp : buflen];
184 switch (ch) {
185 case '\f': case '\n': case '\r':
186 newline = true;
187 }
188 }
189
190 /**
191 * Read block content, consisting of text, html and inline tags.
192 * Terminated by the end of input, or the beginning of the next block tag:
193 * i.e. @ as the first non-whitespace character on a line.
194 */
195 @SuppressWarnings("fallthrough")
196 protected List<DCTree> blockContent() {
197 ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
198 textStart = -1;
199
200 loop:
201 while (bp < buflen) {
202 switch (ch) {
203 case '\n': case '\r': case '\f':
204 newline = true;
205 // fallthrough
206
207 case ' ': case '\t':
208 nextChar();
209 break;
210
211 case '&':
212 entity(trees);
213 break;
214
215 case '<':
216 newline = false;
217 addPendingText(trees, bp - 1);
218 trees.add(html());
219 if (textStart == -1) {
220 textStart = bp;
221 lastNonWhite = -1;
222 }
223 break;
224
225 case '>':
226 newline = false;
227 addPendingText(trees, bp - 1);
228 trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
229 nextChar();
230 if (textStart == -1) {
231 textStart = bp;
232 lastNonWhite = -1;
233 }
234 break;
235
236 case '{':
237 inlineTag(trees);
238 break;
239
240 case '@':
241 if (newline) {
242 addPendingText(trees, lastNonWhite);
243 break loop;
244 }
245 // fallthrough
246
247 default:
248 newline = false;
249 if (textStart == -1)
250 textStart = bp;
251 lastNonWhite = bp;
252 nextChar();
253 }
254 }
255
256 if (lastNonWhite != -1)
257 addPendingText(trees, lastNonWhite);
258
259 return trees.toList();
260 }
261
262 /**
263 * Read a series of block tags, including their content.
264 * Standard tags parse their content appropriately.
265 * Non-standard tags are represented by {@link UnknownBlockTag}.
266 */
267 protected List<DCTree> blockTags() {
268 ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
269 while (ch == '@')
270 tags.add(blockTag());
271 return tags.toList();
272 }
273
274 /**
275 * Read a single block tag, including its content.
276 * Standard tags parse their content appropriately.
277 * Non-standard tags are represented by {@link UnknownBlockTag}.
278 */
279 protected DCTree blockTag() {
280 int p = bp;
281 try {
282 nextChar();
283 if (isIdentifierStart(ch)) {
284 Name name = readTagName();
285 TagParser tp = tagParsers.get(name);
286 if (tp == null) {
287 List<DCTree> content = blockContent();
288 return m.at(p).UnknownBlockTag(name, content);
289 } else {
290 switch (tp.getKind()) {
291 case BLOCK:
292 return tp.parse(p);
293 case INLINE:
294 return erroneous("dc.bad.inline.tag", p);
295 }
296 }
297 }
298 blockContent();
299
300 return erroneous("dc.no.tag.name", p);
301 } catch (ParseException e) {
302 blockContent();
303 return erroneous(e.getMessage(), p);
304 }
305 }
306
307 protected void inlineTag(ListBuffer<DCTree> list) {
308 newline = false;
309 nextChar();
310 if (ch == '@') {
311 addPendingText(list, bp - 2);
312 list.add(inlineTag());
313 textStart = bp;
314 lastNonWhite = -1;
315 } else {
316 if (textStart == -1)
317 textStart = bp - 1;
318 lastNonWhite = bp;
319 }
320 }
321
322 /**
323 * Read a single inline tag, including its content.
324 * Standard tags parse their content appropriately.
325 * Non-standard tags are represented by {@link UnknownBlockTag}.
326 * Malformed tags may be returned as {@link Erroneous}.
327 */
328 protected DCTree inlineTag() {
329 int p = bp - 1;
330 try {
331 nextChar();
332 if (isIdentifierStart(ch)) {
333 Name name = readTagName();
334 skipWhitespace();
335
336 TagParser tp = tagParsers.get(name);
337 if (tp == null) {
338 DCTree text = inlineText();
339 if (text != null) {
340 nextChar();
341 return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp);
342 }
343 } else if (tp.getKind() == TagParser.Kind.INLINE) {
344 DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p);
345 if (tree != null) {
346 return tree.setEndPos(bp);
347 }
348 } else {
349 inlineText(); // skip content
350 nextChar();
351 }
352 }
353 return erroneous("dc.no.tag.name", p);
354 } catch (ParseException e) {
355 return erroneous(e.getMessage(), p);
356 }
357 }
358
359 /**
360 * Read plain text content of an inline tag.
361 * Matching pairs of { } are skipped; the text is terminated by the first
362 * unmatched }. It is an error if the beginning of the next tag is detected.
363 */
364 protected DCTree inlineText() throws ParseException {
365 skipWhitespace();
366 int pos = bp;
367 int depth = 1;
368
369 loop:
370 while (bp < buflen) {
371 switch (ch) {
372 case '\n': case '\r': case '\f':
373 newline = true;
374 break;
375
376 case ' ': case '\t':
377 break;
378
379 case '{':
380 newline = false;
381 lastNonWhite = bp;
382 depth++;
383 break;
384
385 case '}':
386 if (--depth == 0) {
387 return m.at(pos).Text(newString(pos, bp));
388 }
389 newline = false;
390 lastNonWhite = bp;
391 break;
392
393 case '@':
394 if (newline)
395 break loop;
396 newline = false;
397 lastNonWhite = bp;
398 break;
399
400 default:
401 newline = false;
402 lastNonWhite = bp;
403 break;
404 }
405 nextChar();
406 }
407 throw new ParseException("dc.unterminated.inline.tag");
408 }
409
410 /**
411 * Read Java class name, possibly followed by member
412 * Matching pairs of < > are skipped. The text is terminated by the first
413 * unmatched }. It is an error if the beginning of the next tag is detected.
414 */
415 // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
416 // TODO: improve quality of parse to forbid bad constructions.
417 @SuppressWarnings("fallthrough")
418 protected DCReference reference(boolean allowMember) throws ParseException {
419 int pos = bp;
420 int depth = 0;
421
422 // scan to find the end of the signature, by looking for the first
423 // whitespace not enclosed in () or <>, or the end of the tag
424 loop:
425 while (bp < buflen) {
426 switch (ch) {
427 case '\n': case '\r': case '\f':
428 newline = true;
429 // fallthrough
430
431 case ' ': case '\t':
432 if (depth == 0)
433 break loop;
434 break;
435
436 case '(':
437 case '<':
438 newline = false;
439 depth++;
440 break;
441
442 case ')':
443 case '>':
444 newline = false;
445 --depth;
446 break;
447
448 case '}':
449 if (bp == pos)
450 return null;
451 newline = false;
452 break loop;
453
454 case '@':
455 if (newline)
456 break loop;
457 // fallthrough
458
459 default:
460 newline = false;
461
462 }
463 nextChar();
464 }
465
466 if (depth != 0)
467 throw new ParseException("dc.unterminated.signature");
468
469 String sig = newString(pos, bp);
470
471 // Break sig apart into qualifiedExpr member paramTypes.
472 JCTree qualExpr;
473 Name member;
474 List<JCTree> paramTypes;
475
476 Log.DeferredDiagnosticHandler deferredDiagnosticHandler
477 = new Log.DeferredDiagnosticHandler(fac.log);
478
479 try {
480 int hash = sig.indexOf("#");
481 int lparen = sig.indexOf("(", hash + 1);
482 if (hash == -1) {
483 if (lparen == -1) {
484 qualExpr = parseType(sig);
485 member = null;
486 } else {
487 qualExpr = null;
488 member = parseMember(sig.substring(0, lparen));
489 }
490 } else {
491 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
492 if (lparen == -1)
493 member = parseMember(sig.substring(hash + 1));
494 else
495 member = parseMember(sig.substring(hash + 1, lparen));
496 }
497
498 if (lparen < 0) {
499 paramTypes = null;
500 } else {
501 int rparen = sig.indexOf(")", lparen);
502 if (rparen != sig.length() - 1)
503 throw new ParseException("dc.ref.bad.parens");
504 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
505 }
506
507 if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
508 throw new ParseException("dc.ref.syntax.error");
509
510 } finally {
511 fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
512 }
513
514 return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp);
515 }
516
517 JCTree parseType(String s) throws ParseException {
518 JavacParser p = fac.newParser(s, false, false, false);
519 JCTree tree = p.parseType();
520 if (p.token().kind != TokenKind.EOF)
521 throw new ParseException("dc.ref.unexpected.input");
522 return tree;
523 }
524
525 Name parseMember(String s) throws ParseException {
526 JavacParser p = fac.newParser(s, false, false, false);
527 Name name = p.ident();
528 if (p.token().kind != TokenKind.EOF)
529 throw new ParseException("dc.ref.unexpected.input");
530 return name;
531 }
532
533 List<JCTree> parseParams(String s) throws ParseException {
534 if (s.trim().isEmpty())
535 return List.nil();
536
537 JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
538 ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
539 paramTypes.add(p.parseType());
540
541 if (p.token().kind == TokenKind.IDENTIFIER)
542 p.nextToken();
543
544 while (p.token().kind == TokenKind.COMMA) {
545 p.nextToken();
546 paramTypes.add(p.parseType());
547
548 if (p.token().kind == TokenKind.IDENTIFIER)
549 p.nextToken();
550 }
551
552 if (p.token().kind != TokenKind.EOF)
553 throw new ParseException("dc.ref.unexpected.input");
554
555 return paramTypes.toList();
556 }
557
558 /**
559 * Read Java identifier
560 * Matching pairs of { } are skipped; the text is terminated by the first
561 * unmatched }. It is an error if the beginning of the next tag is detected.
562 */
563 @SuppressWarnings("fallthrough")
564 protected DCIdentifier identifier() throws ParseException {
565 skipWhitespace();
566 int pos = bp;
567
568 if (isJavaIdentifierStart(ch)) {
569 Name name = readJavaIdentifier();
570 return m.at(pos).Identifier(name);
571 }
572
573 throw new ParseException("dc.identifier.expected");
574 }
575
576 /**
577 * Read a quoted string.
578 * It is an error if the beginning of the next tag is detected.
579 */
580 @SuppressWarnings("fallthrough")
581 protected DCText quotedString() {
582 int pos = bp;
583 nextChar();
584
585 loop:
586 while (bp < buflen) {
587 switch (ch) {
588 case '\n': case '\r': case '\f':
589 newline = true;
590 break;
591
592 case ' ': case '\t':
593 break;
594
595 case '"':
596 nextChar();
597 // trim trailing white-space?
598 return m.at(pos).Text(newString(pos, bp));
599
600 case '@':
601 if (newline)
602 break loop;
603
604 }
605 nextChar();
606 }
607 return null;
608 }
609
610 /**
611 * Read general text content of an inline tag, including HTML entities and elements.
612 * Matching pairs of { } are skipped; the text is terminated by the first
613 * unmatched }. It is an error if the beginning of the next tag is detected.
614 */
615 @SuppressWarnings("fallthrough")
616 protected List<DCTree> inlineContent() {
617 ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
618
619 skipWhitespace();
620 int pos = bp;
621 int depth = 1;
622 textStart = -1;
623
624 loop:
625 while (bp < buflen) {
626
627 switch (ch) {
628 case '\n': case '\r': case '\f':
629 newline = true;
630 // fall through
631
632 case ' ': case '\t':
633 nextChar();
634 break;
635
636 case '&':
637 entity(trees);
638 break;
639
640 case '<':
641 newline = false;
642 addPendingText(trees, bp - 1);
643 trees.add(html());
644 break;
645
646 case '{':
647 newline = false;
648 depth++;
649 nextChar();
650 break;
651
652 case '}':
653 newline = false;
654 if (--depth == 0) {
655 addPendingText(trees, bp - 1);
656 nextChar();
657 return trees.toList();
658 }
659 nextChar();
660 break;
661
662 case '@':
663 if (newline)
664 break loop;
665 // fallthrough
666
667 default:
668 if (textStart == -1)
669 textStart = bp;
670 nextChar();
671 break;
672 }
673 }
674
675 return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
676 }
677
678 protected void entity(ListBuffer<DCTree> list) {
679 newline = false;
680 addPendingText(list, bp - 1);
681 list.add(entity());
682 if (textStart == -1) {
683 textStart = bp;
684 lastNonWhite = -1;
685 }
686 }
687
688 /**
689 * Read an HTML entity.
690 * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
691 */
692 protected DCTree entity() {
693 int p = bp;
694 nextChar();
695 Name name = null;
696 boolean checkSemi = false;
697 if (ch == '#') {
698 int namep = bp;
699 nextChar();
700 if (isDecimalDigit(ch)) {
701 nextChar();
702 while (isDecimalDigit(ch))
703 nextChar();
704 name = names.fromChars(buf, namep, bp - namep);
705 } else if (ch == 'x' || ch == 'X') {
706 nextChar();
707 if (isHexDigit(ch)) {
708 nextChar();
709 while (isHexDigit(ch))
710 nextChar();
711 name = names.fromChars(buf, namep, bp - namep);
712 }
713 }
714 } else if (isIdentifierStart(ch)) {
715 name = readIdentifier();
716 }
717
718 if (name == null)
719 return erroneous("dc.bad.entity", p);
720 else {
721 if (ch != ';')
722 return erroneous("dc.missing.semicolon", p);
723 nextChar();
724 return m.at(p).Entity(name);
725 }
726 }
727
728 /**
729 * Read the start or end of an HTML tag, or an HTML comment
730 * {@literal <identifier attrs> } or {@literal </identifier> }
731 */
732 protected DCTree html() {
733 int p = bp;
734 nextChar();
735 if (isIdentifierStart(ch)) {
736 Name name = readIdentifier();
737 List<DCTree> attrs = htmlAttrs();
738 if (attrs != null) {
739 boolean selfClosing = false;
740 if (ch == '/') {
741 nextChar();
742 selfClosing = true;
743 }
744 if (ch == '>') {
745 nextChar();
746 return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp);
747 }
748 }
749 } else if (ch == '/') {
750 nextChar();
751 if (isIdentifierStart(ch)) {
752 Name name = readIdentifier();
753 skipWhitespace();
754 if (ch == '>') {
755 nextChar();
756 return m.at(p).EndElement(name);
757 }
758 }
759 } else if (ch == '!') {
760 nextChar();
761 if (ch == '-') {
762 nextChar();
763 if (ch == '-') {
764 nextChar();
765 while (bp < buflen) {
766 int dash = 0;
767 while (ch == '-') {
768 dash++;
769 nextChar();
770 }
771 // strictly speaking, a comment should not contain "--"
772 // so dash > 2 is an error, dash == 2 implies ch == '>'
773 if (dash >= 2 && ch == '>') {
774 nextChar();
775 return m.at(p).Comment(newString(p, bp));
776 }
777
778 nextChar();
779 }
780 }
781 }
782 }
783
784 bp = p + 1;
785 ch = buf[bp];
786 return erroneous("dc.malformed.html", p);
787 }
788
789 /**
790 * Read a series of HTML attributes, terminated by {@literal > }.
791 * Each attribute is of the form {@literal identifier[=value] }.
792 * "value" may be unquoted, single-quoted, or double-quoted.
793 */
794 protected List<DCTree> htmlAttrs() {
795 ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
796 skipWhitespace();
797
798 loop:
799 while (isIdentifierStart(ch)) {
800 int namePos = bp;
801 Name name = readIdentifier();
802 skipWhitespace();
803 List<DCTree> value = null;
804 ValueKind vkind = ValueKind.EMPTY;
805 if (ch == '=') {
806 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
807 nextChar();
808 skipWhitespace();
809 if (ch == '\'' || ch == '"') {
810 vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
811 char quote = ch;
812 nextChar();
813 textStart = bp;
814 while (bp < buflen && ch != quote) {
815 if (newline && ch == '@') {
816 attrs.add(erroneous("dc.unterminated.string", namePos));
817 // No point trying to read more.
818 // In fact, all attrs get discarded by the caller
819 // and superseded by a malformed.html node because
820 // the html tag itself is not terminated correctly.
821 break loop;
822 }
823 attrValueChar(v);
824 }
825 addPendingText(v, bp - 1);
826 nextChar();
827 } else {
828 vkind = ValueKind.UNQUOTED;
829 textStart = bp;
830 while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
831 attrValueChar(v);
832 }
833 addPendingText(v, bp - 1);
834 }
835 skipWhitespace();
836 value = v.toList();
837 }
838 DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
839 attrs.add(attr);
840 }
841
842 return attrs.toList();
843 }
844
845 protected void attrValueChar(ListBuffer<DCTree> list) {
846 switch (ch) {
847 case '&':
848 entity(list);
849 break;
850
851 case '{':
852 inlineTag(list);
853 break;
854
855 default:
856 nextChar();
857 }
858 }
859
860 protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
861 if (textStart != -1) {
862 if (textStart <= textEnd) {
863 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
864 }
865 textStart = -1;
866 }
867 }
868
869 protected DCErroneous erroneous(String code, int pos) {
870 int i = bp - 1;
871 loop:
872 while (i > pos) {
873 switch (buf[i]) {
874 case '\f': case '\n': case '\r':
875 newline = true;
876 break;
877 case '\t': case ' ':
878 break;
879 default:
880 break loop;
881 }
882 i--;
883 }
884 textStart = -1;
885 return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
886 }
887
888 @SuppressWarnings("unchecked")
889 <T> T getFirst(List<T>... lists) {
890 for (List<T> list: lists) {
891 if (list.nonEmpty())
892 return list.head;
893 }
894 return null;
895 }
896
897 protected boolean isIdentifierStart(char ch) {
898 return Character.isUnicodeIdentifierStart(ch);
899 }
900
901 protected Name readIdentifier() {
902 int start = bp;
903 nextChar();
904 while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
905 nextChar();
906 return names.fromChars(buf, start, bp - start);
907 }
908
909 protected Name readTagName() {
910 int start = bp;
911 nextChar();
912 while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.'))
913 nextChar();
914 return names.fromChars(buf, start, bp - start);
915 }
916
917 protected boolean isJavaIdentifierStart(char ch) {
918 return Character.isJavaIdentifierStart(ch);
919 }
920
921 protected Name readJavaIdentifier() {
922 int start = bp;
923 nextChar();
924 while (bp < buflen && Character.isJavaIdentifierPart(ch))
925 nextChar();
926 return names.fromChars(buf, start, bp - start);
927 }
928
929 protected boolean isDecimalDigit(char ch) {
930 return ('0' <= ch && ch <= '9');
931 }
932
933 protected boolean isHexDigit(char ch) {
934 return ('0' <= ch && ch <= '9')
935 || ('a' <= ch && ch <= 'f')
936 || ('A' <= ch && ch <= 'F');
937 }
938
939 protected boolean isUnquotedAttrValueTerminator(char ch) {
940 switch (ch) {
941 case '\f': case '\n': case '\r': case '\t':
942 case ' ':
943 case '"': case '\'': case '`':
944 case '=': case '<': case '>':
945 return true;
946 default:
947 return false;
948 }
949 }
950
951 protected boolean isWhitespace(char ch) {
952 return Character.isWhitespace(ch);
953 }
954
955 protected void skipWhitespace() {
956 while (isWhitespace(ch))
957 nextChar();
958 }
959
960 protected int getSentenceBreak(String s) {
961 if (sentenceBreaker != null) {
962 sentenceBreaker.setText(s);
963 int i = sentenceBreaker.next();
964 return (i == s.length()) ? -1 : i;
965 }
966
967 // scan for period followed by whitespace
968 boolean period = false;
969 for (int i = 0; i < s.length(); i++) {
970 switch (s.charAt(i)) {
971 case '.':
972 period = true;
973 break;
974
975 case ' ':
976 case '\f':
977 case '\n':
978 case '\r':
979 case '\t':
980 if (period)
981 return i;
982 break;
983
984 default:
985 period = false;
986 break;
987 }
988 }
989 return -1;
990 }
991
992
993 Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
994 "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
995
996 protected boolean isSentenceBreak(Name n) {
997 return htmlBlockTags.contains(StringUtils.toLowerCase(n.toString()));
998 }
999
1000 protected boolean isSentenceBreak(DCTree t) {
1001 switch (t.getKind()) {
1002 case START_ELEMENT:
1003 return isSentenceBreak(((DCStartElement) t).getName());
1004
1005 case END_ELEMENT:
1006 return isSentenceBreak(((DCEndElement) t).getName());
1007 }
1008 return false;
1009 }
1010
1011 /**
1012 * @param start position of first character of string
1013 * @param end position of character beyond last character to be included
1014 */
1015 String newString(int start, int end) {
1016 return new String(buf, start, end - start);
1017 }
1018
1019 static abstract class TagParser {
1020 enum Kind { INLINE, BLOCK }
1021
1022 Kind kind;
1023 DCTree.Kind treeKind;
1024
1025 TagParser(Kind k, DCTree.Kind tk) {
1026 kind = k;
1027 treeKind = tk;
1028 }
1029
1030 Kind getKind() {
1031 return kind;
1032 }
1033
1034 DCTree.Kind getTreeKind() {
1035 return treeKind;
1036 }
1037
1038 abstract DCTree parse(int pos) throws ParseException;
1039 }
1040
1041 /**
1042 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
1043 */
1044 private void initTagParsers() {
1045 TagParser[] parsers = {
1046 // @author name-text
1047 new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1048 public DCTree parse(int pos) {
1049 List<DCTree> name = blockContent();
1050 return m.at(pos).Author(name);
1051 }
1052 },
1053
1054 // {@code text}
1055 new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
1056 public DCTree parse(int pos) throws ParseException {
1057 DCTree text = inlineText();
1058 nextChar();
1059 return m.at(pos).Code((DCText) text);
1060 }
1061 },
1062
1063 // @deprecated deprecated-text
1064 new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1065 public DCTree parse(int pos) {
1066 List<DCTree> reason = blockContent();
1067 return m.at(pos).Deprecated(reason);
1068 }
1069 },
1070
1071 // {@docRoot}
1072 new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1073 public DCTree parse(int pos) throws ParseException {
1074 if (ch == '}') {
1075 nextChar();
1076 return m.at(pos).DocRoot();
1077 }
1078 inlineText(); // skip unexpected content
1079 nextChar();
1080 throw new ParseException("dc.unexpected.content");
1081 }
1082 },
1083
1084 // @exception class-name description
1085 new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1086 public DCTree parse(int pos) throws ParseException {
1087 skipWhitespace();
1088 DCReference ref = reference(false);
1089 List<DCTree> description = blockContent();
1090 return m.at(pos).Exception(ref, description);
1091 }
1092 },
1093
1094 // {@inheritDoc}
1095 new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1096 public DCTree parse(int pos) throws ParseException {
1097 if (ch == '}') {
1098 nextChar();
1099 return m.at(pos).InheritDoc();
1100 }
1101 inlineText(); // skip unexpected content
1102 nextChar();
1103 throw new ParseException("dc.unexpected.content");
1104 }
1105 },
1106
1107 // {@link package.class#member label}
1108 new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1109 public DCTree parse(int pos) throws ParseException {
1110 DCReference ref = reference(true);
1111 List<DCTree> label = inlineContent();
1112 return m.at(pos).Link(ref, label);
1113 }
1114 },
1115
1116 // {@linkplain package.class#member label}
1117 new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1118 public DCTree parse(int pos) throws ParseException {
1119 DCReference ref = reference(true);
1120 List<DCTree> label = inlineContent();
1121 return m.at(pos).LinkPlain(ref, label);
1122 }
1123 },
1124
1125 // {@literal text}
1126 new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
1127 public DCTree parse(int pos) throws ParseException {
1128 DCTree text = inlineText();
1129 nextChar();
1130 return m.at(pos).Literal((DCText) text);
1131 }
1132 },
1133
1134 // @param parameter-name description
1135 new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1136 public DCTree parse(int pos) throws ParseException {
1137 skipWhitespace();
1138
1139 boolean typaram = false;
1140 if (ch == '<') {
1141 typaram = true;
1142 nextChar();
1143 }
1144
1145 DCIdentifier id = identifier();
1146
1147 if (typaram) {
1148 if (ch != '>')
1149 throw new ParseException("dc.gt.expected");
1150 nextChar();
1151 }
1152
1153 skipWhitespace();
1154 List<DCTree> desc = blockContent();
1155 return m.at(pos).Param(typaram, id, desc);
1156 }
1157 },
1158
1159 // @return description
1160 new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1161 public DCTree parse(int pos) {
1162 List<DCTree> description = blockContent();
1163 return m.at(pos).Return(description);
1164 }
1165 },
1166
1167 // @see reference | quoted-string | HTML
1168 new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1169 public DCTree parse(int pos) throws ParseException {
1170 skipWhitespace();
1171 switch (ch) {
1172 case '"':
1173 DCText string = quotedString();
1174 if (string != null) {
1175 skipWhitespace();
1176 if (ch == '@'
1177 || ch == EOI && bp == buf.length - 1) {
1178 return m.at(pos).See(List.<DCTree>of(string));
1179 }
1180 }
1181 break;
1182
1183 case '<':
1184 List<DCTree> html = blockContent();
1185 if (html != null)
1186 return m.at(pos).See(html);
1187 break;
1188
1189 case '@':
1190 if (newline)
1191 throw new ParseException("dc.no.content");
1192 break;
1193
1194 case EOI:
1195 if (bp == buf.length - 1)
1196 throw new ParseException("dc.no.content");
1197 break;
1198
1199 default:
1200 if (isJavaIdentifierStart(ch) || ch == '#') {
1201 DCReference ref = reference(true);
1202 List<DCTree> description = blockContent();
1203 return m.at(pos).See(description.prepend(ref));
1204 }
1205 }
1206 throw new ParseException("dc.unexpected.content");
1207 }
1208 },
1209
1210 // @serialData data-description
1211 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1212 public DCTree parse(int pos) {
1213 List<DCTree> description = blockContent();
1214 return m.at(pos).SerialData(description);
1215 }
1216 },
1217
1218 // @serialField field-name field-type description
1219 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1220 public DCTree parse(int pos) throws ParseException {
1221 skipWhitespace();
1222 DCIdentifier name = identifier();
1223 skipWhitespace();
1224 DCReference type = reference(false);
1225 List<DCTree> description = null;
1226 if (isWhitespace(ch)) {
1227 skipWhitespace();
1228 description = blockContent();
1229 }
1230 return m.at(pos).SerialField(name, type, description);
1231 }
1232 },
1233
1234 // @serial field-description | include | exclude
1235 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1236 public DCTree parse(int pos) {
1237 List<DCTree> description = blockContent();
1238 return m.at(pos).Serial(description);
1239 }
1240 },
1241
1242 // @since since-text
1243 new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1244 public DCTree parse(int pos) {
1245 List<DCTree> description = blockContent();
1246 return m.at(pos).Since(description);
1247 }
1248 },
1249
1250 // @throws class-name description
1251 new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1252 public DCTree parse(int pos) throws ParseException {
1253 skipWhitespace();
1254 DCReference ref = reference(false);
1255 List<DCTree> description = blockContent();
1256 return m.at(pos).Throws(ref, description);
1257 }
1258 },
1259
1260 // {@value package.class#field}
1261 new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1262 public DCTree parse(int pos) throws ParseException {
1263 DCReference ref = reference(true);
1264 skipWhitespace();
1265 if (ch == '}') {
1266 nextChar();
1267 return m.at(pos).Value(ref);
1268 }
1269 nextChar();
1270 throw new ParseException("dc.unexpected.content");
1271 }
1272 },
1273
1274 // @version version-text
1275 new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1276 public DCTree parse(int pos) {
1277 List<DCTree> description = blockContent();
1278 return m.at(pos).Version(description);
1279 }
1280 },
1281 };
1282
1283 tagParsers = new HashMap<Name,TagParser>();
1284 for (TagParser p: parsers)
1285 tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1286
1287 }
1288 }

mercurial