1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/classes/com/sun/tools/javac/parser/DocCommentParser.java Wed Apr 27 01:34:52 2016 +0800 1.3 @@ -0,0 +1,1288 @@ 1.4 +/* 1.5 + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Oracle designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Oracle in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.25 + * or visit www.oracle.com if you need additional information or have any 1.26 + * questions. 1.27 + */ 1.28 + 1.29 +package com.sun.tools.javac.parser; 1.30 + 1.31 +import java.text.BreakIterator; 1.32 +import java.util.Arrays; 1.33 +import java.util.HashMap; 1.34 +import java.util.HashSet; 1.35 +import java.util.Locale; 1.36 +import java.util.Map; 1.37 +import java.util.Set; 1.38 + 1.39 +import com.sun.source.doctree.AttributeTree.ValueKind; 1.40 +import com.sun.tools.javac.parser.DocCommentParser.TagParser.Kind; 1.41 +import com.sun.tools.javac.parser.Tokens.Comment; 1.42 +import com.sun.tools.javac.parser.Tokens.TokenKind; 1.43 +import com.sun.tools.javac.tree.DCTree; 1.44 +import com.sun.tools.javac.tree.DCTree.DCAttribute; 1.45 +import com.sun.tools.javac.tree.DCTree.DCDocComment; 1.46 +import com.sun.tools.javac.tree.DCTree.DCEndElement; 1.47 +import com.sun.tools.javac.tree.DCTree.DCEndPosTree; 1.48 +import com.sun.tools.javac.tree.DCTree.DCErroneous; 1.49 +import com.sun.tools.javac.tree.DCTree.DCIdentifier; 1.50 +import com.sun.tools.javac.tree.DCTree.DCReference; 1.51 +import com.sun.tools.javac.tree.DCTree.DCStartElement; 1.52 +import com.sun.tools.javac.tree.DCTree.DCText; 1.53 +import com.sun.tools.javac.tree.DocTreeMaker; 1.54 +import com.sun.tools.javac.tree.JCTree; 1.55 +import com.sun.tools.javac.util.DiagnosticSource; 1.56 +import com.sun.tools.javac.util.List; 1.57 +import com.sun.tools.javac.util.ListBuffer; 1.58 +import com.sun.tools.javac.util.Log; 1.59 +import com.sun.tools.javac.util.Name; 1.60 +import com.sun.tools.javac.util.Names; 1.61 +import com.sun.tools.javac.util.Options; 1.62 +import com.sun.tools.javac.util.Position; 1.63 +import com.sun.tools.javac.util.StringUtils; 1.64 +import static com.sun.tools.javac.util.LayoutCharacters.*; 1.65 + 1.66 +/** 1.67 + * 1.68 + * <p><b>This is NOT part of any supported API. 1.69 + * If you write code that depends on this, you do so at your own risk. 1.70 + * This code and its internal interfaces are subject to change or 1.71 + * deletion without notice.</b> 1.72 + */ 1.73 +public class DocCommentParser { 1.74 + static class ParseException extends Exception { 1.75 + private static final long serialVersionUID = 0; 1.76 + ParseException(String key) { 1.77 + super(key); 1.78 + } 1.79 + } 1.80 + 1.81 + final ParserFactory fac; 1.82 + final DiagnosticSource diagSource; 1.83 + final Comment comment; 1.84 + final DocTreeMaker m; 1.85 + final Names names; 1.86 + 1.87 + BreakIterator sentenceBreaker; 1.88 + 1.89 + /** The input buffer, index of most recent character read, 1.90 + * index of one past last character in buffer. 1.91 + */ 1.92 + protected char[] buf; 1.93 + protected int bp; 1.94 + protected int buflen; 1.95 + 1.96 + /** The current character. 1.97 + */ 1.98 + protected char ch; 1.99 + 1.100 + int textStart = -1; 1.101 + int lastNonWhite = -1; 1.102 + boolean newline = true; 1.103 + 1.104 + Map<Name, TagParser> tagParsers; 1.105 + 1.106 + DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) { 1.107 + this.fac = fac; 1.108 + this.diagSource = diagSource; 1.109 + this.comment = comment; 1.110 + names = fac.names; 1.111 + m = fac.docTreeMaker; 1.112 + 1.113 + Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale; 1.114 + 1.115 + Options options = fac.options; 1.116 + boolean useBreakIterator = options.isSet("breakIterator"); 1.117 + if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) 1.118 + sentenceBreaker = BreakIterator.getSentenceInstance(locale); 1.119 + 1.120 + initTagParsers(); 1.121 + } 1.122 + 1.123 + DCDocComment parse() { 1.124 + String c = comment.getText(); 1.125 + buf = new char[c.length() + 1]; 1.126 + c.getChars(0, c.length(), buf, 0); 1.127 + buf[buf.length - 1] = EOI; 1.128 + buflen = buf.length - 1; 1.129 + bp = -1; 1.130 + nextChar(); 1.131 + 1.132 + List<DCTree> body = blockContent(); 1.133 + List<DCTree> tags = blockTags(); 1.134 + 1.135 + // split body into first sentence and body 1.136 + ListBuffer<DCTree> fs = new ListBuffer<DCTree>(); 1.137 + loop: 1.138 + for (; body.nonEmpty(); body = body.tail) { 1.139 + DCTree t = body.head; 1.140 + switch (t.getKind()) { 1.141 + case TEXT: 1.142 + String s = ((DCText) t).getBody(); 1.143 + int i = getSentenceBreak(s); 1.144 + if (i > 0) { 1.145 + int i0 = i; 1.146 + while (i0 > 0 && isWhitespace(s.charAt(i0 - 1))) 1.147 + i0--; 1.148 + fs.add(m.at(t.pos).Text(s.substring(0, i0))); 1.149 + int i1 = i; 1.150 + while (i1 < s.length() && isWhitespace(s.charAt(i1))) 1.151 + i1++; 1.152 + body = body.tail; 1.153 + if (i1 < s.length()) 1.154 + body = body.prepend(m.at(t.pos + i1).Text(s.substring(i1))); 1.155 + break loop; 1.156 + } else if (body.tail.nonEmpty()) { 1.157 + if (isSentenceBreak(body.tail.head)) { 1.158 + int i0 = s.length() - 1; 1.159 + while (i0 > 0 && isWhitespace(s.charAt(i0))) 1.160 + i0--; 1.161 + fs.add(m.at(t.pos).Text(s.substring(0, i0 + 1))); 1.162 + body = body.tail; 1.163 + break loop; 1.164 + } 1.165 + } 1.166 + break; 1.167 + 1.168 + case START_ELEMENT: 1.169 + case END_ELEMENT: 1.170 + if (isSentenceBreak(t)) 1.171 + break loop; 1.172 + break; 1.173 + } 1.174 + fs.add(t); 1.175 + } 1.176 + 1.177 + @SuppressWarnings("unchecked") 1.178 + DCTree first = getFirst(fs.toList(), body, tags); 1.179 + int pos = (first == null) ? Position.NOPOS : first.pos; 1.180 + 1.181 + DCDocComment dc = m.at(pos).DocComment(comment, fs.toList(), body, tags); 1.182 + return dc; 1.183 + } 1.184 + 1.185 + void nextChar() { 1.186 + ch = buf[bp < buflen ? ++bp : buflen]; 1.187 + switch (ch) { 1.188 + case '\f': case '\n': case '\r': 1.189 + newline = true; 1.190 + } 1.191 + } 1.192 + 1.193 + /** 1.194 + * Read block content, consisting of text, html and inline tags. 1.195 + * Terminated by the end of input, or the beginning of the next block tag: 1.196 + * i.e. @ as the first non-whitespace character on a line. 1.197 + */ 1.198 + @SuppressWarnings("fallthrough") 1.199 + protected List<DCTree> blockContent() { 1.200 + ListBuffer<DCTree> trees = new ListBuffer<DCTree>(); 1.201 + textStart = -1; 1.202 + 1.203 + loop: 1.204 + while (bp < buflen) { 1.205 + switch (ch) { 1.206 + case '\n': case '\r': case '\f': 1.207 + newline = true; 1.208 + // fallthrough 1.209 + 1.210 + case ' ': case '\t': 1.211 + nextChar(); 1.212 + break; 1.213 + 1.214 + case '&': 1.215 + entity(trees); 1.216 + break; 1.217 + 1.218 + case '<': 1.219 + newline = false; 1.220 + addPendingText(trees, bp - 1); 1.221 + trees.add(html()); 1.222 + if (textStart == -1) { 1.223 + textStart = bp; 1.224 + lastNonWhite = -1; 1.225 + } 1.226 + break; 1.227 + 1.228 + case '>': 1.229 + newline = false; 1.230 + addPendingText(trees, bp - 1); 1.231 + trees.add(m.at(bp).Erroneous(newString(bp, bp+1), diagSource, "dc.bad.gt")); 1.232 + nextChar(); 1.233 + if (textStart == -1) { 1.234 + textStart = bp; 1.235 + lastNonWhite = -1; 1.236 + } 1.237 + break; 1.238 + 1.239 + case '{': 1.240 + inlineTag(trees); 1.241 + break; 1.242 + 1.243 + case '@': 1.244 + if (newline) { 1.245 + addPendingText(trees, lastNonWhite); 1.246 + break loop; 1.247 + } 1.248 + // fallthrough 1.249 + 1.250 + default: 1.251 + newline = false; 1.252 + if (textStart == -1) 1.253 + textStart = bp; 1.254 + lastNonWhite = bp; 1.255 + nextChar(); 1.256 + } 1.257 + } 1.258 + 1.259 + if (lastNonWhite != -1) 1.260 + addPendingText(trees, lastNonWhite); 1.261 + 1.262 + return trees.toList(); 1.263 + } 1.264 + 1.265 + /** 1.266 + * Read a series of block tags, including their content. 1.267 + * Standard tags parse their content appropriately. 1.268 + * Non-standard tags are represented by {@link UnknownBlockTag}. 1.269 + */ 1.270 + protected List<DCTree> blockTags() { 1.271 + ListBuffer<DCTree> tags = new ListBuffer<DCTree>(); 1.272 + while (ch == '@') 1.273 + tags.add(blockTag()); 1.274 + return tags.toList(); 1.275 + } 1.276 + 1.277 + /** 1.278 + * Read a single block tag, including its content. 1.279 + * Standard tags parse their content appropriately. 1.280 + * Non-standard tags are represented by {@link UnknownBlockTag}. 1.281 + */ 1.282 + protected DCTree blockTag() { 1.283 + int p = bp; 1.284 + try { 1.285 + nextChar(); 1.286 + if (isIdentifierStart(ch)) { 1.287 + Name name = readTagName(); 1.288 + TagParser tp = tagParsers.get(name); 1.289 + if (tp == null) { 1.290 + List<DCTree> content = blockContent(); 1.291 + return m.at(p).UnknownBlockTag(name, content); 1.292 + } else { 1.293 + switch (tp.getKind()) { 1.294 + case BLOCK: 1.295 + return tp.parse(p); 1.296 + case INLINE: 1.297 + return erroneous("dc.bad.inline.tag", p); 1.298 + } 1.299 + } 1.300 + } 1.301 + blockContent(); 1.302 + 1.303 + return erroneous("dc.no.tag.name", p); 1.304 + } catch (ParseException e) { 1.305 + blockContent(); 1.306 + return erroneous(e.getMessage(), p); 1.307 + } 1.308 + } 1.309 + 1.310 + protected void inlineTag(ListBuffer<DCTree> list) { 1.311 + newline = false; 1.312 + nextChar(); 1.313 + if (ch == '@') { 1.314 + addPendingText(list, bp - 2); 1.315 + list.add(inlineTag()); 1.316 + textStart = bp; 1.317 + lastNonWhite = -1; 1.318 + } else { 1.319 + if (textStart == -1) 1.320 + textStart = bp - 1; 1.321 + lastNonWhite = bp; 1.322 + } 1.323 + } 1.324 + 1.325 + /** 1.326 + * Read a single inline tag, including its content. 1.327 + * Standard tags parse their content appropriately. 1.328 + * Non-standard tags are represented by {@link UnknownBlockTag}. 1.329 + * Malformed tags may be returned as {@link Erroneous}. 1.330 + */ 1.331 + protected DCTree inlineTag() { 1.332 + int p = bp - 1; 1.333 + try { 1.334 + nextChar(); 1.335 + if (isIdentifierStart(ch)) { 1.336 + Name name = readTagName(); 1.337 + skipWhitespace(); 1.338 + 1.339 + TagParser tp = tagParsers.get(name); 1.340 + if (tp == null) { 1.341 + DCTree text = inlineText(); 1.342 + if (text != null) { 1.343 + nextChar(); 1.344 + return m.at(p).UnknownInlineTag(name, List.of(text)).setEndPos(bp); 1.345 + } 1.346 + } else if (tp.getKind() == TagParser.Kind.INLINE) { 1.347 + DCEndPosTree<?> tree = (DCEndPosTree<?>) tp.parse(p); 1.348 + if (tree != null) { 1.349 + return tree.setEndPos(bp); 1.350 + } 1.351 + } else { 1.352 + inlineText(); // skip content 1.353 + nextChar(); 1.354 + } 1.355 + } 1.356 + return erroneous("dc.no.tag.name", p); 1.357 + } catch (ParseException e) { 1.358 + return erroneous(e.getMessage(), p); 1.359 + } 1.360 + } 1.361 + 1.362 + /** 1.363 + * Read plain text content of an inline tag. 1.364 + * Matching pairs of { } are skipped; the text is terminated by the first 1.365 + * unmatched }. It is an error if the beginning of the next tag is detected. 1.366 + */ 1.367 + protected DCTree inlineText() throws ParseException { 1.368 + skipWhitespace(); 1.369 + int pos = bp; 1.370 + int depth = 1; 1.371 + 1.372 + loop: 1.373 + while (bp < buflen) { 1.374 + switch (ch) { 1.375 + case '\n': case '\r': case '\f': 1.376 + newline = true; 1.377 + break; 1.378 + 1.379 + case ' ': case '\t': 1.380 + break; 1.381 + 1.382 + case '{': 1.383 + newline = false; 1.384 + lastNonWhite = bp; 1.385 + depth++; 1.386 + break; 1.387 + 1.388 + case '}': 1.389 + if (--depth == 0) { 1.390 + return m.at(pos).Text(newString(pos, bp)); 1.391 + } 1.392 + newline = false; 1.393 + lastNonWhite = bp; 1.394 + break; 1.395 + 1.396 + case '@': 1.397 + if (newline) 1.398 + break loop; 1.399 + newline = false; 1.400 + lastNonWhite = bp; 1.401 + break; 1.402 + 1.403 + default: 1.404 + newline = false; 1.405 + lastNonWhite = bp; 1.406 + break; 1.407 + } 1.408 + nextChar(); 1.409 + } 1.410 + throw new ParseException("dc.unterminated.inline.tag"); 1.411 + } 1.412 + 1.413 + /** 1.414 + * Read Java class name, possibly followed by member 1.415 + * Matching pairs of < > are skipped. The text is terminated by the first 1.416 + * unmatched }. It is an error if the beginning of the next tag is detected. 1.417 + */ 1.418 + // TODO: boolean allowMember should be enum FORBID, ALLOW, REQUIRE 1.419 + // TODO: improve quality of parse to forbid bad constructions. 1.420 + @SuppressWarnings("fallthrough") 1.421 + protected DCReference reference(boolean allowMember) throws ParseException { 1.422 + int pos = bp; 1.423 + int depth = 0; 1.424 + 1.425 + // scan to find the end of the signature, by looking for the first 1.426 + // whitespace not enclosed in () or <>, or the end of the tag 1.427 + loop: 1.428 + while (bp < buflen) { 1.429 + switch (ch) { 1.430 + case '\n': case '\r': case '\f': 1.431 + newline = true; 1.432 + // fallthrough 1.433 + 1.434 + case ' ': case '\t': 1.435 + if (depth == 0) 1.436 + break loop; 1.437 + break; 1.438 + 1.439 + case '(': 1.440 + case '<': 1.441 + newline = false; 1.442 + depth++; 1.443 + break; 1.444 + 1.445 + case ')': 1.446 + case '>': 1.447 + newline = false; 1.448 + --depth; 1.449 + break; 1.450 + 1.451 + case '}': 1.452 + if (bp == pos) 1.453 + return null; 1.454 + newline = false; 1.455 + break loop; 1.456 + 1.457 + case '@': 1.458 + if (newline) 1.459 + break loop; 1.460 + // fallthrough 1.461 + 1.462 + default: 1.463 + newline = false; 1.464 + 1.465 + } 1.466 + nextChar(); 1.467 + } 1.468 + 1.469 + if (depth != 0) 1.470 + throw new ParseException("dc.unterminated.signature"); 1.471 + 1.472 + String sig = newString(pos, bp); 1.473 + 1.474 + // Break sig apart into qualifiedExpr member paramTypes. 1.475 + JCTree qualExpr; 1.476 + Name member; 1.477 + List<JCTree> paramTypes; 1.478 + 1.479 + Log.DeferredDiagnosticHandler deferredDiagnosticHandler 1.480 + = new Log.DeferredDiagnosticHandler(fac.log); 1.481 + 1.482 + try { 1.483 + int hash = sig.indexOf("#"); 1.484 + int lparen = sig.indexOf("(", hash + 1); 1.485 + if (hash == -1) { 1.486 + if (lparen == -1) { 1.487 + qualExpr = parseType(sig); 1.488 + member = null; 1.489 + } else { 1.490 + qualExpr = null; 1.491 + member = parseMember(sig.substring(0, lparen)); 1.492 + } 1.493 + } else { 1.494 + qualExpr = (hash == 0) ? null : parseType(sig.substring(0, hash)); 1.495 + if (lparen == -1) 1.496 + member = parseMember(sig.substring(hash + 1)); 1.497 + else 1.498 + member = parseMember(sig.substring(hash + 1, lparen)); 1.499 + } 1.500 + 1.501 + if (lparen < 0) { 1.502 + paramTypes = null; 1.503 + } else { 1.504 + int rparen = sig.indexOf(")", lparen); 1.505 + if (rparen != sig.length() - 1) 1.506 + throw new ParseException("dc.ref.bad.parens"); 1.507 + paramTypes = parseParams(sig.substring(lparen + 1, rparen)); 1.508 + } 1.509 + 1.510 + if (!deferredDiagnosticHandler.getDiagnostics().isEmpty()) 1.511 + throw new ParseException("dc.ref.syntax.error"); 1.512 + 1.513 + } finally { 1.514 + fac.log.popDiagnosticHandler(deferredDiagnosticHandler); 1.515 + } 1.516 + 1.517 + return m.at(pos).Reference(sig, qualExpr, member, paramTypes).setEndPos(bp); 1.518 + } 1.519 + 1.520 + JCTree parseType(String s) throws ParseException { 1.521 + JavacParser p = fac.newParser(s, false, false, false); 1.522 + JCTree tree = p.parseType(); 1.523 + if (p.token().kind != TokenKind.EOF) 1.524 + throw new ParseException("dc.ref.unexpected.input"); 1.525 + return tree; 1.526 + } 1.527 + 1.528 + Name parseMember(String s) throws ParseException { 1.529 + JavacParser p = fac.newParser(s, false, false, false); 1.530 + Name name = p.ident(); 1.531 + if (p.token().kind != TokenKind.EOF) 1.532 + throw new ParseException("dc.ref.unexpected.input"); 1.533 + return name; 1.534 + } 1.535 + 1.536 + List<JCTree> parseParams(String s) throws ParseException { 1.537 + if (s.trim().isEmpty()) 1.538 + return List.nil(); 1.539 + 1.540 + JavacParser p = fac.newParser(s.replace("...", "[]"), false, false, false); 1.541 + ListBuffer<JCTree> paramTypes = new ListBuffer<JCTree>(); 1.542 + paramTypes.add(p.parseType()); 1.543 + 1.544 + if (p.token().kind == TokenKind.IDENTIFIER) 1.545 + p.nextToken(); 1.546 + 1.547 + while (p.token().kind == TokenKind.COMMA) { 1.548 + p.nextToken(); 1.549 + paramTypes.add(p.parseType()); 1.550 + 1.551 + if (p.token().kind == TokenKind.IDENTIFIER) 1.552 + p.nextToken(); 1.553 + } 1.554 + 1.555 + if (p.token().kind != TokenKind.EOF) 1.556 + throw new ParseException("dc.ref.unexpected.input"); 1.557 + 1.558 + return paramTypes.toList(); 1.559 + } 1.560 + 1.561 + /** 1.562 + * Read Java identifier 1.563 + * Matching pairs of { } are skipped; the text is terminated by the first 1.564 + * unmatched }. It is an error if the beginning of the next tag is detected. 1.565 + */ 1.566 + @SuppressWarnings("fallthrough") 1.567 + protected DCIdentifier identifier() throws ParseException { 1.568 + skipWhitespace(); 1.569 + int pos = bp; 1.570 + 1.571 + if (isJavaIdentifierStart(ch)) { 1.572 + Name name = readJavaIdentifier(); 1.573 + return m.at(pos).Identifier(name); 1.574 + } 1.575 + 1.576 + throw new ParseException("dc.identifier.expected"); 1.577 + } 1.578 + 1.579 + /** 1.580 + * Read a quoted string. 1.581 + * It is an error if the beginning of the next tag is detected. 1.582 + */ 1.583 + @SuppressWarnings("fallthrough") 1.584 + protected DCText quotedString() { 1.585 + int pos = bp; 1.586 + nextChar(); 1.587 + 1.588 + loop: 1.589 + while (bp < buflen) { 1.590 + switch (ch) { 1.591 + case '\n': case '\r': case '\f': 1.592 + newline = true; 1.593 + break; 1.594 + 1.595 + case ' ': case '\t': 1.596 + break; 1.597 + 1.598 + case '"': 1.599 + nextChar(); 1.600 + // trim trailing white-space? 1.601 + return m.at(pos).Text(newString(pos, bp)); 1.602 + 1.603 + case '@': 1.604 + if (newline) 1.605 + break loop; 1.606 + 1.607 + } 1.608 + nextChar(); 1.609 + } 1.610 + return null; 1.611 + } 1.612 + 1.613 + /** 1.614 + * Read general text content of an inline tag, including HTML entities and elements. 1.615 + * Matching pairs of { } are skipped; the text is terminated by the first 1.616 + * unmatched }. It is an error if the beginning of the next tag is detected. 1.617 + */ 1.618 + @SuppressWarnings("fallthrough") 1.619 + protected List<DCTree> inlineContent() { 1.620 + ListBuffer<DCTree> trees = new ListBuffer<DCTree>(); 1.621 + 1.622 + skipWhitespace(); 1.623 + int pos = bp; 1.624 + int depth = 1; 1.625 + textStart = -1; 1.626 + 1.627 + loop: 1.628 + while (bp < buflen) { 1.629 + 1.630 + switch (ch) { 1.631 + case '\n': case '\r': case '\f': 1.632 + newline = true; 1.633 + // fall through 1.634 + 1.635 + case ' ': case '\t': 1.636 + nextChar(); 1.637 + break; 1.638 + 1.639 + case '&': 1.640 + entity(trees); 1.641 + break; 1.642 + 1.643 + case '<': 1.644 + newline = false; 1.645 + addPendingText(trees, bp - 1); 1.646 + trees.add(html()); 1.647 + break; 1.648 + 1.649 + case '{': 1.650 + newline = false; 1.651 + depth++; 1.652 + nextChar(); 1.653 + break; 1.654 + 1.655 + case '}': 1.656 + newline = false; 1.657 + if (--depth == 0) { 1.658 + addPendingText(trees, bp - 1); 1.659 + nextChar(); 1.660 + return trees.toList(); 1.661 + } 1.662 + nextChar(); 1.663 + break; 1.664 + 1.665 + case '@': 1.666 + if (newline) 1.667 + break loop; 1.668 + // fallthrough 1.669 + 1.670 + default: 1.671 + if (textStart == -1) 1.672 + textStart = bp; 1.673 + nextChar(); 1.674 + break; 1.675 + } 1.676 + } 1.677 + 1.678 + return List.<DCTree>of(erroneous("dc.unterminated.inline.tag", pos)); 1.679 + } 1.680 + 1.681 + protected void entity(ListBuffer<DCTree> list) { 1.682 + newline = false; 1.683 + addPendingText(list, bp - 1); 1.684 + list.add(entity()); 1.685 + if (textStart == -1) { 1.686 + textStart = bp; 1.687 + lastNonWhite = -1; 1.688 + } 1.689 + } 1.690 + 1.691 + /** 1.692 + * Read an HTML entity. 1.693 + * {@literal &identifier; } or {@literal &#digits; } or {@literal &#xhex-digits; } 1.694 + */ 1.695 + protected DCTree entity() { 1.696 + int p = bp; 1.697 + nextChar(); 1.698 + Name name = null; 1.699 + boolean checkSemi = false; 1.700 + if (ch == '#') { 1.701 + int namep = bp; 1.702 + nextChar(); 1.703 + if (isDecimalDigit(ch)) { 1.704 + nextChar(); 1.705 + while (isDecimalDigit(ch)) 1.706 + nextChar(); 1.707 + name = names.fromChars(buf, namep, bp - namep); 1.708 + } else if (ch == 'x' || ch == 'X') { 1.709 + nextChar(); 1.710 + if (isHexDigit(ch)) { 1.711 + nextChar(); 1.712 + while (isHexDigit(ch)) 1.713 + nextChar(); 1.714 + name = names.fromChars(buf, namep, bp - namep); 1.715 + } 1.716 + } 1.717 + } else if (isIdentifierStart(ch)) { 1.718 + name = readIdentifier(); 1.719 + } 1.720 + 1.721 + if (name == null) 1.722 + return erroneous("dc.bad.entity", p); 1.723 + else { 1.724 + if (ch != ';') 1.725 + return erroneous("dc.missing.semicolon", p); 1.726 + nextChar(); 1.727 + return m.at(p).Entity(name); 1.728 + } 1.729 + } 1.730 + 1.731 + /** 1.732 + * Read the start or end of an HTML tag, or an HTML comment 1.733 + * {@literal <identifier attrs> } or {@literal </identifier> } 1.734 + */ 1.735 + protected DCTree html() { 1.736 + int p = bp; 1.737 + nextChar(); 1.738 + if (isIdentifierStart(ch)) { 1.739 + Name name = readIdentifier(); 1.740 + List<DCTree> attrs = htmlAttrs(); 1.741 + if (attrs != null) { 1.742 + boolean selfClosing = false; 1.743 + if (ch == '/') { 1.744 + nextChar(); 1.745 + selfClosing = true; 1.746 + } 1.747 + if (ch == '>') { 1.748 + nextChar(); 1.749 + return m.at(p).StartElement(name, attrs, selfClosing).setEndPos(bp); 1.750 + } 1.751 + } 1.752 + } else if (ch == '/') { 1.753 + nextChar(); 1.754 + if (isIdentifierStart(ch)) { 1.755 + Name name = readIdentifier(); 1.756 + skipWhitespace(); 1.757 + if (ch == '>') { 1.758 + nextChar(); 1.759 + return m.at(p).EndElement(name); 1.760 + } 1.761 + } 1.762 + } else if (ch == '!') { 1.763 + nextChar(); 1.764 + if (ch == '-') { 1.765 + nextChar(); 1.766 + if (ch == '-') { 1.767 + nextChar(); 1.768 + while (bp < buflen) { 1.769 + int dash = 0; 1.770 + while (ch == '-') { 1.771 + dash++; 1.772 + nextChar(); 1.773 + } 1.774 + // strictly speaking, a comment should not contain "--" 1.775 + // so dash > 2 is an error, dash == 2 implies ch == '>' 1.776 + if (dash >= 2 && ch == '>') { 1.777 + nextChar(); 1.778 + return m.at(p).Comment(newString(p, bp)); 1.779 + } 1.780 + 1.781 + nextChar(); 1.782 + } 1.783 + } 1.784 + } 1.785 + } 1.786 + 1.787 + bp = p + 1; 1.788 + ch = buf[bp]; 1.789 + return erroneous("dc.malformed.html", p); 1.790 + } 1.791 + 1.792 + /** 1.793 + * Read a series of HTML attributes, terminated by {@literal > }. 1.794 + * Each attribute is of the form {@literal identifier[=value] }. 1.795 + * "value" may be unquoted, single-quoted, or double-quoted. 1.796 + */ 1.797 + protected List<DCTree> htmlAttrs() { 1.798 + ListBuffer<DCTree> attrs = new ListBuffer<DCTree>(); 1.799 + skipWhitespace(); 1.800 + 1.801 + loop: 1.802 + while (isIdentifierStart(ch)) { 1.803 + int namePos = bp; 1.804 + Name name = readIdentifier(); 1.805 + skipWhitespace(); 1.806 + List<DCTree> value = null; 1.807 + ValueKind vkind = ValueKind.EMPTY; 1.808 + if (ch == '=') { 1.809 + ListBuffer<DCTree> v = new ListBuffer<DCTree>(); 1.810 + nextChar(); 1.811 + skipWhitespace(); 1.812 + if (ch == '\'' || ch == '"') { 1.813 + vkind = (ch == '\'') ? ValueKind.SINGLE : ValueKind.DOUBLE; 1.814 + char quote = ch; 1.815 + nextChar(); 1.816 + textStart = bp; 1.817 + while (bp < buflen && ch != quote) { 1.818 + if (newline && ch == '@') { 1.819 + attrs.add(erroneous("dc.unterminated.string", namePos)); 1.820 + // No point trying to read more. 1.821 + // In fact, all attrs get discarded by the caller 1.822 + // and superseded by a malformed.html node because 1.823 + // the html tag itself is not terminated correctly. 1.824 + break loop; 1.825 + } 1.826 + attrValueChar(v); 1.827 + } 1.828 + addPendingText(v, bp - 1); 1.829 + nextChar(); 1.830 + } else { 1.831 + vkind = ValueKind.UNQUOTED; 1.832 + textStart = bp; 1.833 + while (bp < buflen && !isUnquotedAttrValueTerminator(ch)) { 1.834 + attrValueChar(v); 1.835 + } 1.836 + addPendingText(v, bp - 1); 1.837 + } 1.838 + skipWhitespace(); 1.839 + value = v.toList(); 1.840 + } 1.841 + DCAttribute attr = m.at(namePos).Attribute(name, vkind, value); 1.842 + attrs.add(attr); 1.843 + } 1.844 + 1.845 + return attrs.toList(); 1.846 + } 1.847 + 1.848 + protected void attrValueChar(ListBuffer<DCTree> list) { 1.849 + switch (ch) { 1.850 + case '&': 1.851 + entity(list); 1.852 + break; 1.853 + 1.854 + case '{': 1.855 + inlineTag(list); 1.856 + break; 1.857 + 1.858 + default: 1.859 + nextChar(); 1.860 + } 1.861 + } 1.862 + 1.863 + protected void addPendingText(ListBuffer<DCTree> list, int textEnd) { 1.864 + if (textStart != -1) { 1.865 + if (textStart <= textEnd) { 1.866 + list.add(m.at(textStart).Text(newString(textStart, textEnd + 1))); 1.867 + } 1.868 + textStart = -1; 1.869 + } 1.870 + } 1.871 + 1.872 + protected DCErroneous erroneous(String code, int pos) { 1.873 + int i = bp - 1; 1.874 + loop: 1.875 + while (i > pos) { 1.876 + switch (buf[i]) { 1.877 + case '\f': case '\n': case '\r': 1.878 + newline = true; 1.879 + break; 1.880 + case '\t': case ' ': 1.881 + break; 1.882 + default: 1.883 + break loop; 1.884 + } 1.885 + i--; 1.886 + } 1.887 + textStart = -1; 1.888 + return m.at(pos).Erroneous(newString(pos, i + 1), diagSource, code); 1.889 + } 1.890 + 1.891 + @SuppressWarnings("unchecked") 1.892 + <T> T getFirst(List<T>... lists) { 1.893 + for (List<T> list: lists) { 1.894 + if (list.nonEmpty()) 1.895 + return list.head; 1.896 + } 1.897 + return null; 1.898 + } 1.899 + 1.900 + protected boolean isIdentifierStart(char ch) { 1.901 + return Character.isUnicodeIdentifierStart(ch); 1.902 + } 1.903 + 1.904 + protected Name readIdentifier() { 1.905 + int start = bp; 1.906 + nextChar(); 1.907 + while (bp < buflen && Character.isUnicodeIdentifierPart(ch)) 1.908 + nextChar(); 1.909 + return names.fromChars(buf, start, bp - start); 1.910 + } 1.911 + 1.912 + protected Name readTagName() { 1.913 + int start = bp; 1.914 + nextChar(); 1.915 + while (bp < buflen && (Character.isUnicodeIdentifierPart(ch) || ch == '.')) 1.916 + nextChar(); 1.917 + return names.fromChars(buf, start, bp - start); 1.918 + } 1.919 + 1.920 + protected boolean isJavaIdentifierStart(char ch) { 1.921 + return Character.isJavaIdentifierStart(ch); 1.922 + } 1.923 + 1.924 + protected Name readJavaIdentifier() { 1.925 + int start = bp; 1.926 + nextChar(); 1.927 + while (bp < buflen && Character.isJavaIdentifierPart(ch)) 1.928 + nextChar(); 1.929 + return names.fromChars(buf, start, bp - start); 1.930 + } 1.931 + 1.932 + protected boolean isDecimalDigit(char ch) { 1.933 + return ('0' <= ch && ch <= '9'); 1.934 + } 1.935 + 1.936 + protected boolean isHexDigit(char ch) { 1.937 + return ('0' <= ch && ch <= '9') 1.938 + || ('a' <= ch && ch <= 'f') 1.939 + || ('A' <= ch && ch <= 'F'); 1.940 + } 1.941 + 1.942 + protected boolean isUnquotedAttrValueTerminator(char ch) { 1.943 + switch (ch) { 1.944 + case '\f': case '\n': case '\r': case '\t': 1.945 + case ' ': 1.946 + case '"': case '\'': case '`': 1.947 + case '=': case '<': case '>': 1.948 + return true; 1.949 + default: 1.950 + return false; 1.951 + } 1.952 + } 1.953 + 1.954 + protected boolean isWhitespace(char ch) { 1.955 + return Character.isWhitespace(ch); 1.956 + } 1.957 + 1.958 + protected void skipWhitespace() { 1.959 + while (isWhitespace(ch)) 1.960 + nextChar(); 1.961 + } 1.962 + 1.963 + protected int getSentenceBreak(String s) { 1.964 + if (sentenceBreaker != null) { 1.965 + sentenceBreaker.setText(s); 1.966 + int i = sentenceBreaker.next(); 1.967 + return (i == s.length()) ? -1 : i; 1.968 + } 1.969 + 1.970 + // scan for period followed by whitespace 1.971 + boolean period = false; 1.972 + for (int i = 0; i < s.length(); i++) { 1.973 + switch (s.charAt(i)) { 1.974 + case '.': 1.975 + period = true; 1.976 + break; 1.977 + 1.978 + case ' ': 1.979 + case '\f': 1.980 + case '\n': 1.981 + case '\r': 1.982 + case '\t': 1.983 + if (period) 1.984 + return i; 1.985 + break; 1.986 + 1.987 + default: 1.988 + period = false; 1.989 + break; 1.990 + } 1.991 + } 1.992 + return -1; 1.993 + } 1.994 + 1.995 + 1.996 + Set<String> htmlBlockTags = new HashSet<String>(Arrays.asList( 1.997 + "h1", "h2", "h3", "h4", "h5", "h6", "p", "pre")); 1.998 + 1.999 + protected boolean isSentenceBreak(Name n) { 1.1000 + return htmlBlockTags.contains(StringUtils.toLowerCase(n.toString())); 1.1001 + } 1.1002 + 1.1003 + protected boolean isSentenceBreak(DCTree t) { 1.1004 + switch (t.getKind()) { 1.1005 + case START_ELEMENT: 1.1006 + return isSentenceBreak(((DCStartElement) t).getName()); 1.1007 + 1.1008 + case END_ELEMENT: 1.1009 + return isSentenceBreak(((DCEndElement) t).getName()); 1.1010 + } 1.1011 + return false; 1.1012 + } 1.1013 + 1.1014 + /** 1.1015 + * @param start position of first character of string 1.1016 + * @param end position of character beyond last character to be included 1.1017 + */ 1.1018 + String newString(int start, int end) { 1.1019 + return new String(buf, start, end - start); 1.1020 + } 1.1021 + 1.1022 + static abstract class TagParser { 1.1023 + enum Kind { INLINE, BLOCK } 1.1024 + 1.1025 + Kind kind; 1.1026 + DCTree.Kind treeKind; 1.1027 + 1.1028 + TagParser(Kind k, DCTree.Kind tk) { 1.1029 + kind = k; 1.1030 + treeKind = tk; 1.1031 + } 1.1032 + 1.1033 + Kind getKind() { 1.1034 + return kind; 1.1035 + } 1.1036 + 1.1037 + DCTree.Kind getTreeKind() { 1.1038 + return treeKind; 1.1039 + } 1.1040 + 1.1041 + abstract DCTree parse(int pos) throws ParseException; 1.1042 + } 1.1043 + 1.1044 + /** 1.1045 + * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/tools/solaris/javadoc.html#javadoctags">Javadoc Tags</a> 1.1046 + */ 1.1047 + private void initTagParsers() { 1.1048 + TagParser[] parsers = { 1.1049 + // @author name-text 1.1050 + new TagParser(Kind.BLOCK, DCTree.Kind.AUTHOR) { 1.1051 + public DCTree parse(int pos) { 1.1052 + List<DCTree> name = blockContent(); 1.1053 + return m.at(pos).Author(name); 1.1054 + } 1.1055 + }, 1.1056 + 1.1057 + // {@code text} 1.1058 + new TagParser(Kind.INLINE, DCTree.Kind.CODE) { 1.1059 + public DCTree parse(int pos) throws ParseException { 1.1060 + DCTree text = inlineText(); 1.1061 + nextChar(); 1.1062 + return m.at(pos).Code((DCText) text); 1.1063 + } 1.1064 + }, 1.1065 + 1.1066 + // @deprecated deprecated-text 1.1067 + new TagParser(Kind.BLOCK, DCTree.Kind.DEPRECATED) { 1.1068 + public DCTree parse(int pos) { 1.1069 + List<DCTree> reason = blockContent(); 1.1070 + return m.at(pos).Deprecated(reason); 1.1071 + } 1.1072 + }, 1.1073 + 1.1074 + // {@docRoot} 1.1075 + new TagParser(Kind.INLINE, DCTree.Kind.DOC_ROOT) { 1.1076 + public DCTree parse(int pos) throws ParseException { 1.1077 + if (ch == '}') { 1.1078 + nextChar(); 1.1079 + return m.at(pos).DocRoot(); 1.1080 + } 1.1081 + inlineText(); // skip unexpected content 1.1082 + nextChar(); 1.1083 + throw new ParseException("dc.unexpected.content"); 1.1084 + } 1.1085 + }, 1.1086 + 1.1087 + // @exception class-name description 1.1088 + new TagParser(Kind.BLOCK, DCTree.Kind.EXCEPTION) { 1.1089 + public DCTree parse(int pos) throws ParseException { 1.1090 + skipWhitespace(); 1.1091 + DCReference ref = reference(false); 1.1092 + List<DCTree> description = blockContent(); 1.1093 + return m.at(pos).Exception(ref, description); 1.1094 + } 1.1095 + }, 1.1096 + 1.1097 + // {@inheritDoc} 1.1098 + new TagParser(Kind.INLINE, DCTree.Kind.INHERIT_DOC) { 1.1099 + public DCTree parse(int pos) throws ParseException { 1.1100 + if (ch == '}') { 1.1101 + nextChar(); 1.1102 + return m.at(pos).InheritDoc(); 1.1103 + } 1.1104 + inlineText(); // skip unexpected content 1.1105 + nextChar(); 1.1106 + throw new ParseException("dc.unexpected.content"); 1.1107 + } 1.1108 + }, 1.1109 + 1.1110 + // {@link package.class#member label} 1.1111 + new TagParser(Kind.INLINE, DCTree.Kind.LINK) { 1.1112 + public DCTree parse(int pos) throws ParseException { 1.1113 + DCReference ref = reference(true); 1.1114 + List<DCTree> label = inlineContent(); 1.1115 + return m.at(pos).Link(ref, label); 1.1116 + } 1.1117 + }, 1.1118 + 1.1119 + // {@linkplain package.class#member label} 1.1120 + new TagParser(Kind.INLINE, DCTree.Kind.LINK_PLAIN) { 1.1121 + public DCTree parse(int pos) throws ParseException { 1.1122 + DCReference ref = reference(true); 1.1123 + List<DCTree> label = inlineContent(); 1.1124 + return m.at(pos).LinkPlain(ref, label); 1.1125 + } 1.1126 + }, 1.1127 + 1.1128 + // {@literal text} 1.1129 + new TagParser(Kind.INLINE, DCTree.Kind.LITERAL) { 1.1130 + public DCTree parse(int pos) throws ParseException { 1.1131 + DCTree text = inlineText(); 1.1132 + nextChar(); 1.1133 + return m.at(pos).Literal((DCText) text); 1.1134 + } 1.1135 + }, 1.1136 + 1.1137 + // @param parameter-name description 1.1138 + new TagParser(Kind.BLOCK, DCTree.Kind.PARAM) { 1.1139 + public DCTree parse(int pos) throws ParseException { 1.1140 + skipWhitespace(); 1.1141 + 1.1142 + boolean typaram = false; 1.1143 + if (ch == '<') { 1.1144 + typaram = true; 1.1145 + nextChar(); 1.1146 + } 1.1147 + 1.1148 + DCIdentifier id = identifier(); 1.1149 + 1.1150 + if (typaram) { 1.1151 + if (ch != '>') 1.1152 + throw new ParseException("dc.gt.expected"); 1.1153 + nextChar(); 1.1154 + } 1.1155 + 1.1156 + skipWhitespace(); 1.1157 + List<DCTree> desc = blockContent(); 1.1158 + return m.at(pos).Param(typaram, id, desc); 1.1159 + } 1.1160 + }, 1.1161 + 1.1162 + // @return description 1.1163 + new TagParser(Kind.BLOCK, DCTree.Kind.RETURN) { 1.1164 + public DCTree parse(int pos) { 1.1165 + List<DCTree> description = blockContent(); 1.1166 + return m.at(pos).Return(description); 1.1167 + } 1.1168 + }, 1.1169 + 1.1170 + // @see reference | quoted-string | HTML 1.1171 + new TagParser(Kind.BLOCK, DCTree.Kind.SEE) { 1.1172 + public DCTree parse(int pos) throws ParseException { 1.1173 + skipWhitespace(); 1.1174 + switch (ch) { 1.1175 + case '"': 1.1176 + DCText string = quotedString(); 1.1177 + if (string != null) { 1.1178 + skipWhitespace(); 1.1179 + if (ch == '@' 1.1180 + || ch == EOI && bp == buf.length - 1) { 1.1181 + return m.at(pos).See(List.<DCTree>of(string)); 1.1182 + } 1.1183 + } 1.1184 + break; 1.1185 + 1.1186 + case '<': 1.1187 + List<DCTree> html = blockContent(); 1.1188 + if (html != null) 1.1189 + return m.at(pos).See(html); 1.1190 + break; 1.1191 + 1.1192 + case '@': 1.1193 + if (newline) 1.1194 + throw new ParseException("dc.no.content"); 1.1195 + break; 1.1196 + 1.1197 + case EOI: 1.1198 + if (bp == buf.length - 1) 1.1199 + throw new ParseException("dc.no.content"); 1.1200 + break; 1.1201 + 1.1202 + default: 1.1203 + if (isJavaIdentifierStart(ch) || ch == '#') { 1.1204 + DCReference ref = reference(true); 1.1205 + List<DCTree> description = blockContent(); 1.1206 + return m.at(pos).See(description.prepend(ref)); 1.1207 + } 1.1208 + } 1.1209 + throw new ParseException("dc.unexpected.content"); 1.1210 + } 1.1211 + }, 1.1212 + 1.1213 + // @serialData data-description 1.1214 + new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_DATA) { 1.1215 + public DCTree parse(int pos) { 1.1216 + List<DCTree> description = blockContent(); 1.1217 + return m.at(pos).SerialData(description); 1.1218 + } 1.1219 + }, 1.1220 + 1.1221 + // @serialField field-name field-type description 1.1222 + new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL_FIELD) { 1.1223 + public DCTree parse(int pos) throws ParseException { 1.1224 + skipWhitespace(); 1.1225 + DCIdentifier name = identifier(); 1.1226 + skipWhitespace(); 1.1227 + DCReference type = reference(false); 1.1228 + List<DCTree> description = null; 1.1229 + if (isWhitespace(ch)) { 1.1230 + skipWhitespace(); 1.1231 + description = blockContent(); 1.1232 + } 1.1233 + return m.at(pos).SerialField(name, type, description); 1.1234 + } 1.1235 + }, 1.1236 + 1.1237 + // @serial field-description | include | exclude 1.1238 + new TagParser(Kind.BLOCK, DCTree.Kind.SERIAL) { 1.1239 + public DCTree parse(int pos) { 1.1240 + List<DCTree> description = blockContent(); 1.1241 + return m.at(pos).Serial(description); 1.1242 + } 1.1243 + }, 1.1244 + 1.1245 + // @since since-text 1.1246 + new TagParser(Kind.BLOCK, DCTree.Kind.SINCE) { 1.1247 + public DCTree parse(int pos) { 1.1248 + List<DCTree> description = blockContent(); 1.1249 + return m.at(pos).Since(description); 1.1250 + } 1.1251 + }, 1.1252 + 1.1253 + // @throws class-name description 1.1254 + new TagParser(Kind.BLOCK, DCTree.Kind.THROWS) { 1.1255 + public DCTree parse(int pos) throws ParseException { 1.1256 + skipWhitespace(); 1.1257 + DCReference ref = reference(false); 1.1258 + List<DCTree> description = blockContent(); 1.1259 + return m.at(pos).Throws(ref, description); 1.1260 + } 1.1261 + }, 1.1262 + 1.1263 + // {@value package.class#field} 1.1264 + new TagParser(Kind.INLINE, DCTree.Kind.VALUE) { 1.1265 + public DCTree parse(int pos) throws ParseException { 1.1266 + DCReference ref = reference(true); 1.1267 + skipWhitespace(); 1.1268 + if (ch == '}') { 1.1269 + nextChar(); 1.1270 + return m.at(pos).Value(ref); 1.1271 + } 1.1272 + nextChar(); 1.1273 + throw new ParseException("dc.unexpected.content"); 1.1274 + } 1.1275 + }, 1.1276 + 1.1277 + // @version version-text 1.1278 + new TagParser(Kind.BLOCK, DCTree.Kind.VERSION) { 1.1279 + public DCTree parse(int pos) { 1.1280 + List<DCTree> description = blockContent(); 1.1281 + return m.at(pos).Version(description); 1.1282 + } 1.1283 + }, 1.1284 + }; 1.1285 + 1.1286 + tagParsers = new HashMap<Name,TagParser>(); 1.1287 + for (TagParser p: parsers) 1.1288 + tagParsers.put(names.fromString(p.getTreeKind().tagName), p); 1.1289 + 1.1290 + } 1.1291 +}