Mon, 25 Mar 2013 16:55:14 -0700
8010521: jdk8 l10n resource file translation update 2
Reviewed-by: naoto, yhuang
1 /*
2 * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
26 package com.sun.tools.javac.parser;
28 import java.text.BreakIterator;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.HashSet;
32 import java.util.Locale;
33 import java.util.Map;
34 import java.util.Set;
36 import com.sun.source.doctree.AttributeTree.ValueKind;
37 import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind;
38 import com.sun.tools.javac.parser.Tokens.Comment;
39 import com.sun.tools.javac.parser.Tokens.TokenKind;
40 import com.sun.tools.javac.tree.DCTree;
41 import com.sun.tools.javac.tree.DCTree.DCAttribute;
42 import com.sun.tools.javac.tree.DCTree.DCDocComment;
43 import com.sun.tools.javac.tree.DCTree.DCEndElement;
44 import com.sun.tools.javac.tree.DCTree.DCErroneous;
45 import com.sun.tools.javac.tree.DCTree.DCIdentifier;
46 import com.sun.tools.javac.tree.DCTree.DCReference;
47 import com.sun.tools.javac.tree.DCTree.DCStartElement;
48 import com.sun.tools.javac.tree.DCTree.DCText;
49 import com.sun.tools.javac.tree.DocTreeMaker;
50 import com.sun.tools.javac.tree.JCTree;
51 import com.sun.tools.javac.util.DiagnosticSource;
52 import com.sun.tools.javac.util.List;
53 import com.sun.tools.javac.util.ListBuffer;
54 import com.sun.tools.javac.util.Log;
55 import com.sun.tools.javac.util.Name;
56 import com.sun.tools.javac.util.Names;
57 import com.sun.tools.javac.util.Options;
58 import com.sun.tools.javac.util.Position;
59 import static com.sun.tools.javac.util.LayoutCharacters.*;
61 /**
62 *
63 * <p><b>This is NOT part of any supported API.
64 * If you write code that depends on this, you do so at your own risk.
65 * This code and its internal interfaces are subject to change or
66 * deletion without notice.</b>
67 */
68 public class DocCommentParser {
69 static class ParseException extends Exception {
70 private static final long serialVersionUID = 0;
71 ParseException(String key) {
72 super(key);
73 }
74 }
76 final ParserFactory fac;
77 final DiagnosticSource diagSource;
78 final Comment comment;
79 final DocTreeMaker m;
80 final Names names;
82 BreakIterator sentenceBreaker;
84 /** The input buffer, index of most recent character read,
85 * index of one past last character in buffer.
86 */
87 protected char[] buf;
88 protected int bp;
89 protected int buflen;
91 /** The current character.
92 */
93 protected char ch;
95 int textStart = -1;
96 int lastNonWhite = -1;
97 boolean newline = true;
99 Map<Name, TagParser> tagParsers;
101 DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) {
102 this.fac = fac;
103 this.diagSource = diagSource;
104 this.comment = comment;
105 names = fac.names;
106 m = fac.docTreeMaker;
108 Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale;
110 Options options = fac.options;
111 boolean useBreakIterator = options.isSet("breakIterator");
112 if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage()))
113 sentenceBreaker = BreakIterator.getSentenceInstance(locale);
115 initTagParsers();
116 }
118 DCDocComment parse() {
119 String c = comment.getText();
120 buf = new char[c.length() + 1];
121 c.getChars(0, c.length(), buf, 0);
122 buf[buf.length - 1] = EOI;
123 buflen = buf.length - 1;
124 bp = -1;
125 nextChar();
127 List<DCTree> body = blockContent();
128 List<DCTree> tags = blockTags();
130 // split body into first sentence and body
131 ListBuffer<DCTree> fs = new ListBuffer<DCTree>();
132 loop:
133 for (; body.nonEmpty(); body = body.tail) {
134 DCTree t = body.head;
135 switch (t.getKind()) {
136 case TEXT:
137 String s = ((DCText) t).getBody();
138 int i = getSentenceBreak(s);
139 if (i > 0) {
140 int i0 = i;
141 while (i0 > 0 && isWhitespace(s.charAt(i0 - 1)))
142 i0--;
143 fs.add(m.at(t.pos).Text(s.substring(0, i0)));
144 int i1 = i;
145 while (i1 < s.length() && isWhitespace(s.charAt(i1)))
146 i1++;
147 body = body.tail;
148 if (i1 < s.length())
149 body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1)));
150 break loop;
151 } else if (body.tail.nonEmpty()) {
152 if (isSentenceBreak(body.tail.head)) {
153 int i0 = s.length() - 1;
154 while (i0 > 0 && isWhitespace(s.charAt(i0)))
155 i0--;
156 fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1)));
157 body = body.tail;
158 break loop;
159 }
160 }
161 break;
163 case START_ELEMENT:
164 case END_ELEMENT:
165 if (isSentenceBreak(t))
166 break loop;
167 break;
168 }
169 fs.add(t);
170 }
172 @SuppressWarnings("unchecked")
173 DCTree first = getFirst(fs.toList(), body, tags);
174 int pos = (first == null) ? Position.NOPOS : first.pos;
176 DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags);
177 return dc;
178 }
180 void nextChar() {
181 ch = buf[bp < buflen ? ++bp : buflen];
182 switch (ch) {
183 case '\f': case '\n': case '\r':
184 newline = true;
185 }
186 }
188 /**
189 * Read block content, consisting of text, html and inline tags.
190 * Terminated by the end of input, or the beginning of the next block tag:
191 * i.e. @ as the first non-whitespace character on a line.
192 */
193 @SuppressWarnings("fallthrough")
194 protected List<DCTree> blockContent() {
195 ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
196 textStart = -1;
198 loop:
199 while (bp < buflen) {
200 switch (ch) {
201 case '\n': case '\r': case '\f':
202 newline = true;
203 // fallthrough
205 case ' ': case '\t':
206 nextChar();
207 break;
209 case '&':
210 entity(trees);
211 break;
213 case '<':
214 newline = false;
215 addPendingText(trees, bp - 1);
216 trees.add(html());
217 if (textStart == -1) {
218 textStart = bp;
219 lastNonWhite = -1;
220 }
221 break;
223 case '>':
224 newline = false;
225 addPendingText(trees, bp - 1);
226 trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt"));
227 nextChar();
228 if (textStart == -1) {
229 textStart = bp;
230 lastNonWhite = -1;
231 }
232 break;
234 case '{':
235 inlineTag(trees);
236 break;
238 case '@':
239 if (newline) {
240 addPendingText(trees, lastNonWhite);
241 break loop;
242 }
243 // fallthrough
245 default:
246 newline = false;
247 if (textStart == -1)
248 textStart = bp;
249 lastNonWhite = bp;
250 nextChar();
251 }
252 }
254 if (lastNonWhite != -1)
255 addPendingText(trees, lastNonWhite);
257 return trees.toList();
258 }
260 /**
261 * Read a series of block tags, including their content.
262 * Standard tags parse their content appropriately.
263 * Non-standard tags are represented by {@link UnknownBlockTag}.
264 */
265 protected List<DCTree> blockTags() {
266 ListBuffer<DCTree> tags = new ListBuffer<DCTree>();
267 while (ch == '@')
268 tags.add(blockTag());
269 return tags.toList();
270 }
272 /**
273 * Read a single block tag, including its content.
274 * Standard tags parse their content appropriately.
275 * Non-standard tags are represented by {@link UnknownBlockTag}.
276 */
277 protected DCTree blockTag() {
278 int p = bp;
279 try {
280 nextChar();
281 if (isIdentifierStart(ch)) {
282 Name name = readIdentifier();
283 TagParser tp = tagParsers.get(name);
284 if (tp == null) {
285 List<DCTree> content = blockContent();
286 return m.at(p).UnknownBlockTag(name, content);
287 } else {
288 switch (tp.getKind()) {
289 case BLOCK:
290 return tp.parse(p);
291 case INLINE:
292 return erroneous("dc.bad.inline.tag", p);
293 }
294 }
295 }
296 blockContent();
298 return erroneous("dc.no.tag.name", p);
299 } catch (ParseException e) {
300 blockContent();
301 return erroneous(e.getMessage(), p);
302 }
303 }
305 protected void inlineTag(ListBuffer<DCTree> list) {
306 newline = false;
307 nextChar();
308 if (ch == '@') {
309 addPendingText(list, bp - 2);
310 list.add(inlineTag());
311 textStart = bp;
312 lastNonWhite = -1;
313 } else {
314 if (textStart == -1)
315 textStart = bp - 1;
316 lastNonWhite = bp;
317 }
318 }
320 /**
321 * Read a single inline tag, including its content.
322 * Standard tags parse their content appropriately.
323 * Non-standard tags are represented by {@link UnknownBlockTag}.
324 * Malformed tags may be returned as {@link Erroneous}.
325 */
326 protected DCTree inlineTag() {
327 int p = bp - 1;
328 try {
329 nextChar();
330 if (isIdentifierStart(ch)) {
331 Name name = readIdentifier();
332 skipWhitespace();
334 TagParser tp = tagParsers.get(name);
335 if (tp == null) {
336 DCTree text = inlineText();
337 if (text != null) {
338 nextChar();
339 return m.at(p).UnknownInlineTag(name, List.of(text));
340 }
341 } else if (tp.getKind() == TagParser.Kind.INLINE) {
342 DCTree tree = tp.parse(p);
343 if (tree != null) {
344 return tree;
345 }
346 } else {
347 inlineText(); // skip content
348 nextChar();
349 }
350 }
351 return erroneous("dc.no.tag.name", p);
352 } catch (ParseException e) {
353 return erroneous(e.getMessage(), p);
354 }
355 }
357 /**
358 * Read plain text content of an inline tag.
359 * Matching pairs of { } are skipped; the text is terminated by the first
360 * unmatched }. It is an error if the beginning of the next tag is detected.
361 */
362 protected DCTree inlineText() throws ParseException {
363 skipWhitespace();
364 int pos = bp;
365 int depth = 1;
367 loop:
368 while (bp < buflen) {
369 switch (ch) {
370 case '\n': case '\r': case '\f':
371 newline = true;
372 break;
374 case ' ': case '\t':
375 break;
377 case '{':
378 newline = false;
379 lastNonWhite = bp;
380 depth++;
381 break;
383 case '}':
384 if (--depth == 0) {
385 return m.at(pos).Text(newString(pos, bp));
386 }
387 newline = false;
388 lastNonWhite = bp;
389 break;
391 case '@':
392 if (newline)
393 break loop;
394 newline = false;
395 lastNonWhite = bp;
396 break;
398 default:
399 newline = false;
400 lastNonWhite = bp;
401 break;
402 }
403 nextChar();
404 }
405 throw new ParseException("dc.unterminated.inline.tag");
406 }
408 /**
409 * Read Java class name, possibly followed by member
410 * Matching pairs of < > are skipped. The text is terminated by the first
411 * unmatched }. It is an error if the beginning of the next tag is detected.
412 */
413 // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE
414 // TODO: improve quality of parse to forbid bad constructions.
415 @SuppressWarnings("fallthrough")
416 protected DCReference reference(boolean allowMember) throws ParseException {
417 int pos = bp;
418 int depth = 0;
420 // scan to find the end of the signature, by looking for the first
421 // whitespace not enclosed in () or <>, or the end of the tag
422 loop:
423 while (bp < buflen) {
424 switch (ch) {
425 case '\n': case '\r': case '\f':
426 newline = true;
427 // fallthrough
429 case ' ': case '\t':
430 if (depth == 0)
431 break loop;
432 break;
434 case '(':
435 case '<':
436 newline = false;
437 depth++;
438 break;
440 case ')':
441 case '>':
442 newline = false;
443 --depth;
444 break;
446 case '}':
447 if (bp == pos)
448 return null;
449 newline = false;
450 break loop;
452 case '@':
453 if (newline)
454 break loop;
455 // fallthrough
457 default:
458 newline = false;
460 }
461 nextChar();
462 }
464 if (depth != 0)
465 throw new ParseException("dc.unterminated.signature");
467 String sig = newString(pos, bp);
469 // Break sig apart into qualifiedExpr member paramTypes.
470 JCTree qualExpr;
471 Name member;
472 List<JCTree> paramTypes;
474 Log.DeferredDiagnosticHandler deferredDiagnosticHandler
475 = new Log.DeferredDiagnosticHandler(fac.log);
477 try {
478 int hash = sig.indexOf("#");
479 int lparen = sig.indexOf("(", hash + 1);
480 if (hash == -1) {
481 if (lparen == -1) {
482 qualExpr = parseType(sig);
483 member = null;
484 } else {
485 qualExpr = null;
486 member = parseMember(sig.substring(0, lparen));
487 }
488 } else {
489 qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash));
490 if (lparen == -1)
491 member = parseMember(sig.substring(hash + 1));
492 else
493 member = parseMember(sig.substring(hash + 1, lparen));
494 }
496 if (lparen < 0) {
497 paramTypes = null;
498 } else {
499 int rparen = sig.indexOf(")", lparen);
500 if (rparen != sig.length() - 1)
501 throw new ParseException("dc.ref.bad.parens");
502 paramTypes = parseParams(sig.substring(lparen + 1, rparen));
503 }
505 if (!deferredDiagnosticHandler.getDiagnostics().isEmpty())
506 throw new ParseException("dc.ref.syntax.error");
508 } finally {
509 fac.log.popDiagnosticHandler(deferredDiagnosticHandler);
510 }
512 return m.at(pos).Reference(sig, qualExpr, member, paramTypes);
513 }
515 JCTree parseType(String s) throws ParseException {
516 JavacParser p = fac.newParser(s, false, false, false);
517 JCTree tree = p.parseType();
518 if (p.token().kind != TokenKind.EOF)
519 throw new ParseException("dc.ref.unexpected.input");
520 return tree;
521 }
523 Name parseMember(String s) throws ParseException {
524 JavacParser p = fac.newParser(s, false, false, false);
525 Name name = p.ident();
526 if (p.token().kind != TokenKind.EOF)
527 throw new ParseException("dc.ref.unexpected.input");
528 return name;
529 }
531 List<JCTree> parseParams(String s) throws ParseException {
532 if (s.trim().isEmpty())
533 return List.nil();
535 JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false);
536 ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>();
537 paramTypes.add(p.parseType());
539 if (p.token().kind == TokenKind.IDENTIFIER)
540 p.nextToken();
542 while (p.token().kind == TokenKind.COMMA) {
543 p.nextToken();
544 paramTypes.add(p.parseType());
546 if (p.token().kind == TokenKind.IDENTIFIER)
547 p.nextToken();
548 }
550 if (p.token().kind != TokenKind.EOF)
551 throw new ParseException("dc.ref.unexpected.input");
553 return paramTypes.toList();
554 }
556 /**
557 * Read Java identifier
558 * Matching pairs of { } are skipped; the text is terminated by the first
559 * unmatched }. It is an error if the beginning of the next tag is detected.
560 */
561 @SuppressWarnings("fallthrough")
562 protected DCIdentifier identifier() throws ParseException {
563 skipWhitespace();
564 int pos = bp;
566 if (isJavaIdentifierStart(ch)) {
567 Name name = readJavaIdentifier();
568 return m.at(pos).Identifier(name);
569 }
571 throw new ParseException("dc.identifier.expected");
572 }
574 /**
575 * Read a quoted string.
576 * It is an error if the beginning of the next tag is detected.
577 */
578 @SuppressWarnings("fallthrough")
579 protected DCText quotedString() {
580 int pos = bp;
581 nextChar();
583 loop:
584 while (bp < buflen) {
585 switch (ch) {
586 case '\n': case '\r': case '\f':
587 newline = true;
588 break;
590 case ' ': case '\t':
591 break;
593 case '"':
594 nextChar();
595 // trim trailing white-space?
596 return m.at(pos).Text(newString(pos, bp));
598 case '@':
599 if (newline)
600 break loop;
602 }
603 nextChar();
604 }
605 return null;
606 }
608 /**
609 * Read general text content of an inline tag, including HTML entities and elements.
610 * Matching pairs of { } are skipped; the text is terminated by the first
611 * unmatched }. It is an error if the beginning of the next tag is detected.
612 */
613 @SuppressWarnings("fallthrough")
614 protected List<DCTree> inlineContent() {
615 ListBuffer<DCTree> trees = new ListBuffer<DCTree>();
617 skipWhitespace();
618 int pos = bp;
619 int depth = 1;
620 textStart = -1;
622 loop:
623 while (bp < buflen) {
625 switch (ch) {
626 case '\n': case '\r': case '\f':
627 newline = true;
628 // fall through
630 case ' ': case '\t':
631 nextChar();
632 break;
634 case '&':
635 entity(trees);
636 break;
638 case '<':
639 newline = false;
640 addPendingText(trees, bp - 1);
641 trees.add(html());
642 break;
644 case '{':
645 newline = false;
646 depth++;
647 nextChar();
648 break;
650 case '}':
651 newline = false;
652 if (--depth == 0) {
653 addPendingText(trees, bp - 1);
654 nextChar();
655 return trees.toList();
656 }
657 nextChar();
658 break;
660 case '@':
661 if (newline)
662 break loop;
663 // fallthrough
665 default:
666 if (textStart == -1)
667 textStart = bp;
668 nextChar();
669 break;
670 }
671 }
673 return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos));
674 }
676 protected void entity(ListBuffer<DCTree> list) {
677 newline = false;
678 addPendingText(list, bp - 1);
679 list.add(entity());
680 if (textStart == -1) {
681 textStart = bp;
682 lastNonWhite = -1;
683 }
684 }
686 /**
687 * Read an HTML entity.
688 * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; }
689 */
690 protected DCTree entity() {
691 int p = bp;
692 nextChar();
693 Name name = null;
694 boolean checkSemi = false;
695 if (ch == '#') {
696 int namep = bp;
697 nextChar();
698 if (isDecimalDigit(ch)) {
699 nextChar();
700 while (isDecimalDigit(ch))
701 nextChar();
702 name = names.fromChars(buf, namep, bp - namep);
703 } else if (ch == 'x' || ch == 'X') {
704 nextChar();
705 if (isHexDigit(ch)) {
706 nextChar();
707 while (isHexDigit(ch))
708 nextChar();
709 name = names.fromChars(buf, namep, bp - namep);
710 }
711 }
712 } else if (isIdentifierStart(ch)) {
713 name = readIdentifier();
714 }
716 if (name == null)
717 return erroneous("dc.bad.entity", p);
718 else {
719 if (ch != ';')
720 return erroneous("dc.missing.semicolon", p);
721 nextChar();
722 return m.at(p).Entity(name);
723 }
724 }
726 /**
727 * Read the start or end of an HTML tag, or an HTML comment
728 * {@literal <identifier attrs> } or {@literal </identifier> }
729 */
730 protected DCTree html() {
731 int p = bp;
732 nextChar();
733 if (isIdentifierStart(ch)) {
734 Name name = readIdentifier();
735 List<DCTree> attrs = htmlAttrs();
736 if (attrs != null) {
737 boolean selfClosing = false;
738 if (ch == '/') {
739 nextChar();
740 selfClosing = true;
741 }
742 if (ch == '>') {
743 nextChar();
744 return m.at(p).StartElement(name, attrs, selfClosing);
745 }
746 }
747 } else if (ch == '/') {
748 nextChar();
749 if (isIdentifierStart(ch)) {
750 Name name = readIdentifier();
751 skipWhitespace();
752 if (ch == '>') {
753 nextChar();
754 return m.at(p).EndElement(name);
755 }
756 }
757 } else if (ch == '!') {
758 nextChar();
759 if (ch == '-') {
760 nextChar();
761 if (ch == '-') {
762 nextChar();
763 while (bp < buflen) {
764 int dash = 0;
765 while (ch == '-') {
766 dash++;
767 nextChar();
768 }
769 // strictly speaking, a comment should not contain "--"
770 // so dash > 2 is an error, dash == 2 implies ch == '>'
771 if (dash >= 2 && ch == '>') {
772 nextChar();
773 return m.at(p).Comment(newString(p, bp));
774 }
776 nextChar();
777 }
778 }
779 }
780 }
782 bp = p + 1;
783 ch = buf[bp];
784 return erroneous("dc.malformed.html", p);
785 }
787 /**
788 * Read a series of HTML attributes, terminated by {@literal > }.
789 * Each attribute is of the form {@literal identifier[=value] }.
790 * "value" may be unquoted, single-quoted, or double-quoted.
791 */
792 protected List<DCTree> htmlAttrs() {
793 ListBuffer<DCTree> attrs = new ListBuffer<DCTree>();
794 skipWhitespace();
796 loop:
797 while (isIdentifierStart(ch)) {
798 int namePos = bp;
799 Name name = readIdentifier();
800 skipWhitespace();
801 List<DCTree> value = null;
802 ValueKind vkind = ValueKind.EMPTY;
803 if (ch == '=') {
804 ListBuffer<DCTree> v = new ListBuffer<DCTree>();
805 nextChar();
806 skipWhitespace();
807 if (ch == '\'' || ch == '"') {
808 vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE;
809 char quote = ch;
810 nextChar();
811 textStart = bp;
812 while (bp < buflen && ch != quote) {
813 if (newline && ch == '@') {
814 attrs.add(erroneous("dc.unterminated.string", namePos));
815 // No point trying to read more.
816 // In fact, all attrs get discarded by the caller
817 // and superseded by a malformed.html node because
818 // the html tag itself is not terminated correctly.
819 break loop;
820 }
821 attrValueChar(v);
822 }
823 addPendingText(v, bp - 1);
824 nextChar();
825 } else {
826 vkind = ValueKind.UNQUOTED;
827 textStart = bp;
828 while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) {
829 attrValueChar(v);
830 }
831 addPendingText(v, bp - 1);
832 }
833 skipWhitespace();
834 value = v.toList();
835 }
836 DCAttribute attr = m.at(namePos).Attribute(name, vkind, value);
837 attrs.add(attr);
838 }
840 return attrs.toList();
841 }
843 protected void attrValueChar(ListBuffer<DCTree> list) {
844 switch (ch) {
845 case '&':
846 entity(list);
847 break;
849 case '{':
850 inlineTag(list);
851 break;
853 default:
854 nextChar();
855 }
856 }
858 protected void addPendingText(ListBuffer<DCTree> list, int textEnd) {
859 if (textStart != -1) {
860 if (textStart <= textEnd) {
861 list.add(m.at(textStart).Text(newString(textStart, textEnd + 1)));
862 }
863 textStart = -1;
864 }
865 }
867 protected DCErroneous erroneous(String code, int pos) {
868 int i = bp - 1;
869 loop:
870 while (i > pos) {
871 switch (buf[i]) {
872 case '\f': case '\n': case '\r':
873 newline = true;
874 break;
875 case '\t': case ' ':
876 break;
877 default:
878 break loop;
879 }
880 i--;
881 }
882 textStart = -1;
883 return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code);
884 }
886 @SuppressWarnings("unchecked")
887 <T> T getFirst(List<T>... lists) {
888 for (List<T> list: lists) {
889 if (list.nonEmpty())
890 return list.head;
891 }
892 return null;
893 }
895 protected boolean isIdentifierStart(char ch) {
896 return Character.isUnicodeIdentifierStart(ch);
897 }
899 protected Name readIdentifier() {
900 int start = bp;
901 nextChar();
902 while (bp < buflen && Character.isUnicodeIdentifierPart(ch))
903 nextChar();
904 return names.fromChars(buf, start, bp - start);
905 }
907 protected boolean isJavaIdentifierStart(char ch) {
908 return Character.isJavaIdentifierStart(ch);
909 }
911 protected Name readJavaIdentifier() {
912 int start = bp;
913 nextChar();
914 while (bp < buflen && Character.isJavaIdentifierPart(ch))
915 nextChar();
916 return names.fromChars(buf, start, bp - start);
917 }
919 protected boolean isDecimalDigit(char ch) {
920 return ('0' <= ch && ch <= '9');
921 }
923 protected boolean isHexDigit(char ch) {
924 return ('0' <= ch && ch <= '9')
925 || ('a' <= ch && ch <= 'f')
926 || ('A' <= ch && ch <= 'F');
927 }
929 protected boolean isUnquotedAttrValueTerminator(char ch) {
930 switch (ch) {
931 case '\f': case '\n': case '\r': case '\t':
932 case ' ':
933 case '"': case '\'': case '`':
934 case '=': case '<': case '>':
935 return true;
936 default:
937 return false;
938 }
939 }
941 protected boolean isWhitespace(char ch) {
942 return Character.isWhitespace(ch);
943 }
945 protected void skipWhitespace() {
946 while (isWhitespace(ch))
947 nextChar();
948 }
950 protected int getSentenceBreak(String s) {
951 if (sentenceBreaker != null) {
952 sentenceBreaker.setText(s);
953 int i = sentenceBreaker.next();
954 return (i == s.length()) ? -1 : i;
955 }
957 // scan for period followed by whitespace
958 boolean period = false;
959 for (int i = 0; i < s.length(); i++) {
960 switch (s.charAt(i)) {
961 case '.':
962 period = true;
963 break;
965 case ' ':
966 case '\f':
967 case '\n':
968 case '\r':
969 case '\t':
970 if (period)
971 return i;
972 break;
974 default:
975 period = false;
976 break;
977 }
978 }
979 return -1;
980 }
983 Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList(
984 "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre"));
986 protected boolean isSentenceBreak(Name n) {
987 return htmlBlockTags.contains(n.toString().toLowerCase());
988 }
990 protected boolean isSentenceBreak(DCTree t) {
991 switch (t.getKind()) {
992 case START_ELEMENT:
993 return isSentenceBreak(((DCStartElement) t).getName());
995 case END_ELEMENT:
996 return isSentenceBreak(((DCEndElement) t).getName());
997 }
998 return false;
999 }
1001 /**
1002 * @param start position of first character of string
1003 * @param end position of character beyond last character to be included
1004 */
1005 String newString(int start, int end) {
1006 return new String(buf, start, end - start);
1007 }
1009 static abstract class TagParser {
1010 enum Kind { INLINE, BLOCK }
1012 Kind kind;
1013 DCTree.Kind treeKind;
1015 TagParser(Kind k, DCTree.Kind tk) {
1016 kind = k;
1017 treeKind = tk;
1018 }
1020 Kind getKind() {
1021 return kind;
1022 }
1024 DCTree.Kind getTreeKind() {
1025 return treeKind;
1026 }
1028 abstract DCTree parse(int pos) throws ParseException;
1029 }
1031 /**
1032 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a>
1033 */
1034 private void initTagParsers() {
1035 TagParser[] parsers = {
1036 // @author name-text
1037 new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) {
1038 public DCTree parse(int pos) {
1039 List<DCTree> name = blockContent();
1040 return m.at(pos).Author(name);
1041 }
1042 },
1044 // {@code text}
1045 new TagParser(Kind.INLINE, DCTree.Kind.CODE) {
1046 public DCTree parse(int pos) throws ParseException {
1047 DCTree text = inlineText();
1048 nextChar();
1049 return m.at(pos).Code((DCText) text);
1050 }
1051 },
1053 // @deprecated deprecated-text
1054 new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) {
1055 public DCTree parse(int pos) {
1056 List<DCTree> reason = blockContent();
1057 return m.at(pos).Deprecated(reason);
1058 }
1059 },
1061 // {@docRoot}
1062 new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) {
1063 public DCTree parse(int pos) throws ParseException {
1064 if (ch == '}') {
1065 nextChar();
1066 return m.at(pos).DocRoot();
1067 }
1068 inlineText(); // skip unexpected content
1069 nextChar();
1070 throw new ParseException("dc.unexpected.content");
1071 }
1072 },
1074 // @exception class-name description
1075 new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) {
1076 public DCTree parse(int pos) throws ParseException {
1077 skipWhitespace();
1078 DCReference ref = reference(false);
1079 List<DCTree> description = blockContent();
1080 return m.at(pos).Exception(ref, description);
1081 }
1082 },
1084 // {@inheritDoc}
1085 new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) {
1086 public DCTree parse(int pos) throws ParseException {
1087 if (ch == '}') {
1088 nextChar();
1089 return m.at(pos).InheritDoc();
1090 }
1091 inlineText(); // skip unexpected content
1092 nextChar();
1093 throw new ParseException("dc.unexpected.content");
1094 }
1095 },
1097 // {@link package.class#member label}
1098 new TagParser(Kind.INLINE, DCTree.Kind.LINK) {
1099 public DCTree parse(int pos) throws ParseException {
1100 DCReference ref = reference(true);
1101 List<DCTree> label = inlineContent();
1102 return m.at(pos).Link(ref, label);
1103 }
1104 },
1106 // {@linkplain package.class#member label}
1107 new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) {
1108 public DCTree parse(int pos) throws ParseException {
1109 DCReference ref = reference(true);
1110 List<DCTree> label = inlineContent();
1111 return m.at(pos).LinkPlain(ref, label);
1112 }
1113 },
1115 // {@literal text}
1116 new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) {
1117 public DCTree parse(int pos) throws ParseException {
1118 DCTree text = inlineText();
1119 nextChar();
1120 return m.at(pos).Literal((DCText) text);
1121 }
1122 },
1124 // @param parameter-name description
1125 new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) {
1126 public DCTree parse(int pos) throws ParseException {
1127 skipWhitespace();
1129 boolean typaram = false;
1130 if (ch == '<') {
1131 typaram = true;
1132 nextChar();
1133 }
1135 DCIdentifier id = identifier();
1137 if (typaram) {
1138 if (ch != '>')
1139 throw new ParseException("dc.gt.expected");
1140 nextChar();
1141 }
1143 skipWhitespace();
1144 List<DCTree> desc = blockContent();
1145 return m.at(pos).Param(typaram, id, desc);
1146 }
1147 },
1149 // @return description
1150 new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) {
1151 public DCTree parse(int pos) {
1152 List<DCTree> description = blockContent();
1153 return m.at(pos).Return(description);
1154 }
1155 },
1157 // @see reference | quoted-string | HTML
1158 new TagParser(Kind.BLOCK, DCTree.Kind.SEE) {
1159 public DCTree parse(int pos) throws ParseException {
1160 skipWhitespace();
1161 switch (ch) {
1162 case '"':
1163 DCText string = quotedString();
1164 if (string != null) {
1165 skipWhitespace();
1166 if (ch == '@')
1167 return m.at(pos).See(List.<DCTree>of(string));
1168 }
1169 break;
1171 case '<':
1172 List<DCTree> html = blockContent();
1173 if (html != null)
1174 return m.at(pos).See(html);
1175 break;
1177 case '@':
1178 if (newline)
1179 throw new ParseException("dc.no.content");
1180 break;
1182 case EOI:
1183 if (bp == buf.length - 1)
1184 throw new ParseException("dc.no.content");
1185 break;
1187 default:
1188 if (isJavaIdentifierStart(ch) || ch == '#') {
1189 DCReference ref = reference(true);
1190 List<DCTree> description = blockContent();
1191 return m.at(pos).See(description.prepend(ref));
1192 }
1193 }
1194 throw new ParseException("dc.unexpected.content");
1195 }
1196 },
1198 // @serialData data-description
1199 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) {
1200 public DCTree parse(int pos) {
1201 List<DCTree> description = blockContent();
1202 return m.at(pos).SerialData(description);
1203 }
1204 },
1206 // @serialField field-name field-type description
1207 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) {
1208 public DCTree parse(int pos) throws ParseException {
1209 skipWhitespace();
1210 DCIdentifier name = identifier();
1211 skipWhitespace();
1212 DCReference type = reference(false);
1213 List<DCTree> description = null;
1214 if (isWhitespace(ch)) {
1215 skipWhitespace();
1216 description = blockContent();
1217 }
1218 return m.at(pos).SerialField(name, type, description);
1219 }
1220 },
1222 // @serial field-description | include | exclude
1223 new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) {
1224 public DCTree parse(int pos) {
1225 List<DCTree> description = blockContent();
1226 return m.at(pos).Serial(description);
1227 }
1228 },
1230 // @since since-text
1231 new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) {
1232 public DCTree parse(int pos) {
1233 List<DCTree> description = blockContent();
1234 return m.at(pos).Since(description);
1235 }
1236 },
1238 // @throws class-name description
1239 new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) {
1240 public DCTree parse(int pos) throws ParseException {
1241 skipWhitespace();
1242 DCReference ref = reference(false);
1243 List<DCTree> description = blockContent();
1244 return m.at(pos).Throws(ref, description);
1245 }
1246 },
1248 // {@value package.class#field}
1249 new TagParser(Kind.INLINE, DCTree.Kind.VALUE) {
1250 public DCTree parse(int pos) throws ParseException {
1251 DCReference ref = reference(true);
1252 skipWhitespace();
1253 if (ch == '}') {
1254 nextChar();
1255 return m.at(pos).Value(ref);
1256 }
1257 nextChar();
1258 throw new ParseException("dc.unexpected.content");
1259 }
1260 },
1262 // @version version-text
1263 new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) {
1264 public DCTree parse(int pos) {
1265 List<DCTree> description = blockContent();
1266 return m.at(pos).Version(description);
1267 }
1268 },
1269 };
1271 tagParsers = new HashMap<Name,TagParser>();
1272 for (TagParser p: parsers)
1273 tagParsers.put(names.fromString(p.getTreeKind().tagName), p);
1275 }
1276 }