src/share/jaxws_classes/com/sun/xml/internal/dtdparser/DTDParser.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
equal deleted inserted replaced
-1:000000000000 0:373ffda63c9a
1 /*
2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.xml.internal.dtdparser;
27
28 import org.xml.sax.EntityResolver;
29 import org.xml.sax.InputSource;
30 import org.xml.sax.Locator;
31 import org.xml.sax.SAXException;
32 import org.xml.sax.SAXParseException;
33
34 import java.io.IOException;
35 import java.util.ArrayList;
36 import java.util.Enumeration;
37 import java.util.Hashtable;
38 import java.util.Locale;
39 import java.util.Set;
40 import java.util.Vector;
41
42 /**
43 * This implements parsing of XML 1.0 DTDs.
44 * <p/>
45 * This conforms to the portion of the XML 1.0 specification related
46 * to the external DTD subset.
47 * <p/>
48 * For multi-language applications (such as web servers using XML
49 * processing to create dynamic content), a method supports choosing
50 * a locale for parser diagnostics which is both understood by the
51 * message recipient and supported by the parser.
52 * <p/>
53 * This parser produces a stream of parse events. It supports some
54 * features (exposing comments, CDATA sections, and entity references)
55 * which are not required to be reported by conformant XML processors.
56 *
57 * @author David Brownell
58 * @author Janet Koenig
59 * @author Kohsuke KAWAGUCHI
60 * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $
61 */
62 public class DTDParser {
63 public final static String TYPE_CDATA = "CDATA";
64 public final static String TYPE_ID = "ID";
65 public final static String TYPE_IDREF = "IDREF";
66 public final static String TYPE_IDREFS = "IDREFS";
67 public final static String TYPE_ENTITY = "ENTITY";
68 public final static String TYPE_ENTITIES = "ENTITIES";
69 public final static String TYPE_NMTOKEN = "NMTOKEN";
70 public final static String TYPE_NMTOKENS = "NMTOKENS";
71 public final static String TYPE_NOTATION = "NOTATION";
72 public final static String TYPE_ENUMERATION = "ENUMERATION";
73
74
75 // stack of input entities being merged
76 private InputEntity in;
77
78 // temporaries reused during parsing
79 private StringBuffer strTmp;
80 private char nameTmp [];
81 private NameCache nameCache;
82 private char charTmp [] = new char[2];
83
84 // temporary DTD parsing state
85 private boolean doLexicalPE;
86
87 // DTD state, used during parsing
88 // private SimpleHashtable elements = new SimpleHashtable (47);
89 protected final Set declaredElements = new java.util.HashSet();
90 private SimpleHashtable params = new SimpleHashtable(7);
91
92 // exposed to package-private subclass
93 Hashtable notations = new Hashtable(7);
94 SimpleHashtable entities = new SimpleHashtable(17);
95
96 private SimpleHashtable ids = new SimpleHashtable();
97
98 // listeners for DTD parsing events
99 private DTDEventListener dtdHandler;
100
101 private EntityResolver resolver;
102 private Locale locale;
103
104 // string constants -- use these copies so "==" works
105 // package private
106 static final String strANY = "ANY";
107 static final String strEMPTY = "EMPTY";
108
109 /**
110 * Used by applications to request locale for diagnostics.
111 *
112 * @param l The locale to use, or null to use system defaults
113 * (which may include only message IDs).
114 */
115 public void setLocale(Locale l) throws SAXException {
116
117 if (l != null && !messages.isLocaleSupported(l.toString())) {
118 throw new SAXException(messages.getMessage(locale,
119 "P-078", new Object[]{l}));
120 }
121 locale = l;
122 }
123
124 /**
125 * Returns the diagnostic locale.
126 */
127 public Locale getLocale() {
128 return locale;
129 }
130
131 /**
132 * Chooses a client locale to use for diagnostics, using the first
133 * language specified in the list that is supported by this parser.
134 * That locale is then set using <a href="#setLocale(java.util.Locale)">
135 * setLocale()</a>. Such a list could be provided by a variety of user
136 * preference mechanisms, including the HTTP <em>Accept-Language</em>
137 * header field.
138 *
139 * @param languages Array of language specifiers, ordered with the most
140 * preferable one at the front. For example, "en-ca" then "fr-ca",
141 * followed by "zh_CN". Both RFC 1766 and Java styles are supported.
142 * @return The chosen locale, or null.
143 * @see MessageCatalog
144 */
145 public Locale chooseLocale(String languages [])
146 throws SAXException {
147
148 Locale l = messages.chooseLocale(languages);
149
150 if (l != null) {
151 setLocale(l);
152 }
153 return l;
154 }
155
156 /**
157 * Lets applications control entity resolution.
158 */
159 public void setEntityResolver(EntityResolver r) {
160
161 resolver = r;
162 }
163
164 /**
165 * Returns the object used to resolve entities
166 */
167 public EntityResolver getEntityResolver() {
168
169 return resolver;
170 }
171
172 /**
173 * Used by applications to set handling of DTD parsing events.
174 */
175 public void setDtdHandler(DTDEventListener handler) {
176 dtdHandler = handler;
177 if (handler != null)
178 handler.setDocumentLocator(new Locator() {
179 public String getPublicId() {
180 return DTDParser.this.getPublicId();
181 }
182
183 public String getSystemId() {
184 return DTDParser.this.getSystemId();
185 }
186
187 public int getLineNumber() {
188 return DTDParser.this.getLineNumber();
189 }
190
191 public int getColumnNumber() {
192 return DTDParser.this.getColumnNumber();
193 }
194 });
195 }
196
197 /**
198 * Returns the handler used to for DTD parsing events.
199 */
200 public DTDEventListener getDtdHandler() {
201 return dtdHandler;
202 }
203
204 /**
205 * Parse a DTD.
206 */
207 public void parse(InputSource in)
208 throws IOException, SAXException {
209 init();
210 parseInternal(in);
211 }
212
213 /**
214 * Parse a DTD.
215 */
216 public void parse(String uri)
217 throws IOException, SAXException {
218 InputSource in;
219
220 init();
221 // System.out.println ("parse (\"" + uri + "\")");
222 in = resolver.resolveEntity(null, uri);
223
224 // If custom resolver punts resolution to parser, handle it ...
225 if (in == null) {
226 in = Resolver.createInputSource(new java.net.URL(uri), false);
227
228 // ... or if custom resolver doesn't correctly construct the
229 // input entity, patch it up enough so relative URIs work, and
230 // issue a warning to minimize later confusion.
231 } else if (in.getSystemId() == null) {
232 warning("P-065", null);
233 in.setSystemId(uri);
234 }
235
236 parseInternal(in);
237 }
238
239 // makes sure the parser is reset to "before a document"
240 private void init() {
241 in = null;
242
243 // alloc temporary data used in parsing
244 strTmp = new StringBuffer();
245 nameTmp = new char[20];
246 nameCache = new NameCache();
247
248 // reset doc info
249 // isInAttribute = false;
250
251 doLexicalPE = false;
252
253 entities.clear();
254 notations.clear();
255 params.clear();
256 // elements.clear ();
257 declaredElements.clear();
258
259 // initialize predefined references ... re-interpreted later
260 builtin("amp", "&#38;");
261 builtin("lt", "&#60;");
262 builtin("gt", ">");
263 builtin("quot", "\"");
264 builtin("apos", "'");
265
266 if (locale == null)
267 locale = Locale.getDefault();
268 if (resolver == null)
269 resolver = new Resolver();
270 if (dtdHandler == null)
271 dtdHandler = new DTDHandlerBase();
272 }
273
274 private void builtin(String entityName, String entityValue) {
275 InternalEntity entity;
276 entity = new InternalEntity(entityName, entityValue.toCharArray());
277 entities.put(entityName, entity);
278 }
279
280
281 ////////////////////////////////////////////////////////////////
282 //
283 // parsing is by recursive descent, code roughly
284 // following the BNF rules except tweaked for simple
285 // lookahead. rules are more or less in numeric order,
286 // except where code sharing suggests other structures.
287 //
288 // a classic benefit of recursive descent parsers: it's
289 // relatively easy to get diagnostics that make sense.
290 //
291 ////////////////////////////////////////////////////////////////
292
293
294 private void parseInternal(InputSource input)
295 throws IOException, SAXException {
296
297 if (input == null)
298 fatal("P-000");
299
300 try {
301 in = InputEntity.getInputEntity(dtdHandler, locale);
302 in.init(input, null, null, false);
303
304 dtdHandler.startDTD(in);
305
306 // [30] extSubset ::= TextDecl? extSubsetDecl
307 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
308 // | PEReference | S )*
309 // ... same as [79] extPE, which is where the code is
310
311 ExternalEntity externalSubset = new ExternalEntity(in);
312 externalParameterEntity(externalSubset);
313
314 if (!in.isEOF()) {
315 fatal("P-001", new Object[]
316 {Integer.toHexString(((int) getc()))});
317 }
318 afterRoot();
319 dtdHandler.endDTD();
320
321 } catch (EndOfInputException e) {
322 if (!in.isDocument()) {
323 String name = in.getName();
324 do { // force a relevant URI and line number
325 in = in.pop();
326 } while (in.isInternal());
327 fatal("P-002", new Object[]{name});
328 } else {
329 fatal("P-003", null);
330 }
331 } catch (RuntimeException e) {
332 // Don't discard location that triggered the exception
333 // ## Should properly wrap exception
334 System.err.print("Internal DTD parser error: "); // ##
335 e.printStackTrace();
336 throw new SAXParseException(e.getMessage() != null
337 ? e.getMessage() : e.getClass().getName(),
338 getPublicId(), getSystemId(),
339 getLineNumber(), getColumnNumber());
340
341 } finally {
342 // recycle temporary data used during parsing
343 strTmp = null;
344 nameTmp = null;
345 nameCache = null;
346
347 // ditto input sources etc
348 if (in != null) {
349 in.close();
350 in = null;
351 }
352
353 // get rid of all DTD info ... some of it would be
354 // useful for editors etc, investigate later.
355
356 params.clear();
357 entities.clear();
358 notations.clear();
359 declaredElements.clear();
360 // elements.clear();
361 ids.clear();
362 }
363 }
364
365 void afterRoot() throws SAXException {
366 // Make sure all IDREFs match declared ID attributes. We scan
367 // after the document element is parsed, since XML allows forward
368 // references, and only now can we know if they're all resolved.
369
370 for (Enumeration e = ids.keys();
371 e.hasMoreElements();
372 ) {
373 String id = (String) e.nextElement();
374 Boolean value = (Boolean) ids.get(id);
375 if (Boolean.FALSE == value)
376 error("V-024", new Object[]{id});
377 }
378 }
379
380
381 // role is for diagnostics
382 private void whitespace(String roleId)
383 throws IOException, SAXException {
384
385 // [3] S ::= (#x20 | #x9 | #xd | #xa)+
386 if (!maybeWhitespace()) {
387 fatal("P-004", new Object[]
388 {messages.getMessage(locale, roleId)});
389 }
390 }
391
392 // S?
393 private boolean maybeWhitespace()
394 throws IOException, SAXException {
395
396 if (!doLexicalPE)
397 return in.maybeWhitespace();
398
399 // see getc() for the PE logic -- this lets us splice
400 // expansions of PEs in "anywhere". getc() has smarts,
401 // so for external PEs we don't bypass it.
402
403 // XXX we can marginally speed PE handling, and certainly
404 // be cleaner (hence potentially more correct), by using
405 // the observations that expanded PEs only start and stop
406 // where whitespace is allowed. getc wouldn't need any
407 // "lexical" PE expansion logic, and no other method needs
408 // to handle termination of PEs. (parsing of literals would
409 // still need to pop entities, but not parsing of references
410 // in content.)
411
412 char c = getc();
413 boolean saw = false;
414
415 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
416 saw = true;
417
418 // this gracefully ends things when we stop playing
419 // with internal parameters. caller should have a
420 // grammar rule allowing whitespace at end of entity.
421 if (in.isEOF() && !in.isInternal())
422 return saw;
423 c = getc();
424 }
425 ungetc();
426 return saw;
427 }
428
429 private String maybeGetName()
430 throws IOException, SAXException {
431
432 NameCacheEntry entry = maybeGetNameCacheEntry();
433 return (entry == null) ? null : entry.name;
434 }
435
436 private NameCacheEntry maybeGetNameCacheEntry()
437 throws IOException, SAXException {
438
439 // [5] Name ::= (Letter|'_'|':') (Namechar)*
440 char c = getc();
441
442 if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
443 ungetc();
444 return null;
445 }
446 return nameCharString(c);
447 }
448
449 // Used when parsing enumerations
450 private String getNmtoken()
451 throws IOException, SAXException {
452
453 // [7] Nmtoken ::= (Namechar)+
454 char c = getc();
455 if (!XmlChars.isNameChar(c))
456 fatal("P-006", new Object[]{new Character(c)});
457 return nameCharString(c).name;
458 }
459
460 // n.b. this gets used when parsing attribute values (for
461 // internal references) so we can't use strTmp; it's also
462 // a hotspot for CPU and memory in the parser (called at least
463 // once for each element) so this has been optimized a bit.
464
465 private NameCacheEntry nameCharString(char c)
466 throws IOException, SAXException {
467
468 int i = 1;
469
470 nameTmp[0] = c;
471 for (; ;) {
472 if ((c = in.getNameChar()) == 0)
473 break;
474 if (i >= nameTmp.length) {
475 char tmp [] = new char[nameTmp.length + 10];
476 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
477 nameTmp = tmp;
478 }
479 nameTmp[i++] = c;
480 }
481 return nameCache.lookupEntry(nameTmp, i);
482 }
483
484 //
485 // much similarity between parsing entity values in DTD
486 // and attribute values (in DTD or content) ... both follow
487 // literal parsing rules, newline canonicalization, etc
488 //
489 // leaves value in 'strTmp' ... either a "replacement text" (4.5),
490 // or else partially normalized attribute value (the first bit
491 // of 3.3.3's spec, without the "if not CDATA" bits).
492 //
493 private void parseLiteral(boolean isEntityValue)
494 throws IOException, SAXException {
495
496 // [9] EntityValue ::=
497 // '"' ([^"&%] | Reference | PEReference)* '"'
498 // | "'" ([^'&%] | Reference | PEReference)* "'"
499 // [10] AttValue ::=
500 // '"' ([^"&] | Reference )* '"'
501 // | "'" ([^'&] | Reference )* "'"
502 char quote = getc();
503 char c;
504 InputEntity source = in;
505
506 if (quote != '\'' && quote != '"') {
507 fatal("P-007");
508 }
509
510 // don't report entity expansions within attributes,
511 // they're reported "fully expanded" via SAX
512 // isInAttribute = !isEntityValue;
513
514 // get value into strTmp
515 strTmp = new StringBuffer();
516
517 // scan, allowing entity push/pop wherever ...
518 // expanded entities can't terminate the literal!
519 for (; ;) {
520 if (in != source && in.isEOF()) {
521 // we don't report end of parsed entities
522 // within attributes (no SAX hooks)
523 in = in.pop();
524 continue;
525 }
526 if ((c = getc()) == quote && in == source) {
527 break;
528 }
529
530 //
531 // Basically the "reference in attribute value"
532 // row of the chart in section 4.4 of the spec
533 //
534 if (c == '&') {
535 String entityName = maybeGetName();
536
537 if (entityName != null) {
538 nextChar(';', "F-020", entityName);
539
540 // 4.4 says: bypass these here ... we'll catch
541 // forbidden refs to unparsed entities on use
542 if (isEntityValue) {
543 strTmp.append('&');
544 strTmp.append(entityName);
545 strTmp.append(';');
546 continue;
547 }
548 expandEntityInLiteral(entityName, entities, isEntityValue);
549
550
551 // character references are always included immediately
552 } else if ((c = getc()) == '#') {
553 int tmp = parseCharNumber();
554
555 if (tmp > 0xffff) {
556 tmp = surrogatesToCharTmp(tmp);
557 strTmp.append(charTmp[0]);
558 if (tmp == 2)
559 strTmp.append(charTmp[1]);
560 } else
561 strTmp.append((char) tmp);
562 } else
563 fatal("P-009");
564 continue;
565
566 }
567
568 // expand parameter entities only within entity value literals
569 if (c == '%' && isEntityValue) {
570 String entityName = maybeGetName();
571
572 if (entityName != null) {
573 nextChar(';', "F-021", entityName);
574 expandEntityInLiteral(entityName, params, isEntityValue);
575 continue;
576 } else
577 fatal("P-011");
578 }
579
580 // For attribute values ...
581 if (!isEntityValue) {
582 // 3.3.3 says whitespace normalizes to space...
583 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
584 strTmp.append(' ');
585 continue;
586 }
587
588 // "<" not legal in parsed literals ...
589 if (c == '<')
590 fatal("P-012");
591 }
592
593 strTmp.append(c);
594 }
595 // isInAttribute = false;
596 }
597
598 // does a SINGLE expansion of the entity (often reparsed later)
599 private void expandEntityInLiteral(String name, SimpleHashtable table,
600 boolean isEntityValue)
601 throws IOException, SAXException {
602
603 Object entity = table.get(name);
604
605 if (entity instanceof InternalEntity) {
606 InternalEntity value = (InternalEntity) entity;
607 pushReader(value.buf, name, !value.isPE);
608
609 } else if (entity instanceof ExternalEntity) {
610 if (!isEntityValue) // must be a PE ...
611 fatal("P-013", new Object[]{name});
612 // XXX if this returns false ...
613 pushReader((ExternalEntity) entity);
614
615 } else if (entity == null) {
616 //
617 // Note: much confusion about whether spec requires such
618 // errors to be fatal in many cases, but none about whether
619 // it allows "normal" errors to be unrecoverable!
620 //
621 fatal((table == params) ? "V-022" : "P-014",
622 new Object[]{name});
623 }
624 }
625
626 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
627 // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
628
629 // NOTE: XML spec should explicitly say that PE ref syntax is
630 // ignored in PIs, comments, SystemLiterals, and Pubid Literal
631 // values ... can't process the XML spec's own DTD without doing
632 // that for comments.
633
634 private String getQuotedString(String type, String extra)
635 throws IOException, SAXException {
636
637 // use in.getc to bypass PE processing
638 char quote = in.getc();
639
640 if (quote != '\'' && quote != '"')
641 fatal("P-015", new Object[]{
642 messages.getMessage(locale, type, new Object[]{extra})
643 });
644
645 char c;
646
647 strTmp = new StringBuffer();
648 while ((c = in.getc()) != quote)
649 strTmp.append((char) c);
650 return strTmp.toString();
651 }
652
653
654 private String parsePublicId() throws IOException, SAXException {
655
656 // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
657 // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
658 String retval = getQuotedString("F-033", null);
659 for (int i = 0; i < retval.length(); i++) {
660 char c = retval.charAt(i);
661 if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
662 && !(c >= 'A' && c <= 'Z')
663 && !(c >= 'a' && c <= 'z'))
664 fatal("P-016", new Object[]{new Character(c)});
665 }
666 strTmp = new StringBuffer();
667 strTmp.append(retval);
668 return normalize(false);
669 }
670
671 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
672 // handled by: InputEntity.parsedContent()
673
674 private boolean maybeComment(boolean skipStart)
675 throws IOException, SAXException {
676
677 // [15] Comment ::= '<!--'
678 // ( (Char - '-') | ('-' (Char - '-'))*
679 // '-->'
680 if (!in.peek(skipStart ? "!--" : "<!--", null))
681 return false;
682
683 boolean savedLexicalPE = doLexicalPE;
684 boolean saveCommentText;
685
686 doLexicalPE = false;
687 saveCommentText = false;
688 if (saveCommentText)
689 strTmp = new StringBuffer();
690
691 oneComment:
692 for (; ;) {
693 try {
694 // bypass PE expansion, but permit PEs
695 // to complete ... valid docs won't care.
696 for (; ;) {
697 int c = getc();
698 if (c == '-') {
699 c = getc();
700 if (c != '-') {
701 if (saveCommentText)
702 strTmp.append('-');
703 ungetc();
704 continue;
705 }
706 nextChar('>', "F-022", null);
707 break oneComment;
708 }
709 if (saveCommentText)
710 strTmp.append((char) c);
711 }
712 } catch (EndOfInputException e) {
713 //
714 // This is fatal EXCEPT when we're processing a PE...
715 // in which case a validating processor reports an error.
716 // External PEs are easy to detect; internal ones we
717 // infer by being an internal entity outside an element.
718 //
719 if (in.isInternal()) {
720 error("V-021", null);
721 }
722 fatal("P-017");
723 }
724 }
725 doLexicalPE = savedLexicalPE;
726 if (saveCommentText)
727 dtdHandler.comment(strTmp.toString());
728 return true;
729 }
730
731 private boolean maybePI(boolean skipStart)
732 throws IOException, SAXException {
733
734 // [16] PI ::= '<?' PITarget
735 // (S (Char* - (Char* '?>' Char*)))?
736 // '?>'
737 // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
738 boolean savedLexicalPE = doLexicalPE;
739
740 if (!in.peek(skipStart ? "?" : "<?", null))
741 return false;
742 doLexicalPE = false;
743
744 String target = maybeGetName();
745
746 if (target == null) {
747 fatal("P-018");
748 }
749 if ("xml".equals(target)) {
750 fatal("P-019");
751 }
752 if ("xml".equalsIgnoreCase(target)) {
753 fatal("P-020", new Object[]{target});
754 }
755
756 if (maybeWhitespace()) {
757 strTmp = new StringBuffer();
758 try {
759 for (; ;) {
760 // use in.getc to bypass PE processing
761 char c = in.getc();
762 //Reached the end of PI.
763 if (c == '?' && in.peekc('>'))
764 break;
765 strTmp.append(c);
766 }
767 } catch (EndOfInputException e) {
768 fatal("P-021");
769 }
770 dtdHandler.processingInstruction(target, strTmp.toString());
771 } else {
772 if (!in.peek("?>", null)) {
773 fatal("P-022");
774 }
775 dtdHandler.processingInstruction(target, "");
776 }
777
778 doLexicalPE = savedLexicalPE;
779 return true;
780 }
781
782 // [18] CDSect ::= CDStart CData CDEnd
783 // [19] CDStart ::= '<![CDATA['
784 // [20] CData ::= (Char* - (Char* ']]>' Char*))
785 // [21] CDEnd ::= ']]>'
786 //
787 // ... handled by InputEntity.unparsedContent()
788
789 // collapsing several rules together ...
790 // simpler than attribute literals -- no reference parsing!
791 private String maybeReadAttribute(String name, boolean must)
792 throws IOException, SAXException {
793
794 // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
795 // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
796 // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\"
797 if (!maybeWhitespace()) {
798 if (!must) {
799 return null;
800 }
801 fatal("P-024", new Object[]{name});
802 // NOTREACHED
803 }
804
805 if (!peek(name)) {
806 if (must) {
807 fatal("P-024", new Object[]{name});
808 } else {
809 // To ensure that the whitespace is there so that when we
810 // check for the next attribute we assure that the
811 // whitespace still exists.
812 ungetc();
813 return null;
814 }
815 }
816
817 // [25] Eq ::= S? '=' S?
818 maybeWhitespace();
819 nextChar('=', "F-023", null);
820 maybeWhitespace();
821
822 return getQuotedString("F-035", name);
823 }
824
825 private void readVersion(boolean must, String versionNum)
826 throws IOException, SAXException {
827
828 String value = maybeReadAttribute("version", must);
829
830 // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
831
832 if (must && value == null)
833 fatal("P-025", new Object[]{versionNum});
834 if (value != null) {
835 int length = value.length();
836 for (int i = 0; i < length; i++) {
837 char c = value.charAt(i);
838 if (!((c >= '0' && c <= '9')
839 || c == '_' || c == '.'
840 || (c >= 'a' && c <= 'z')
841 || (c >= 'A' && c <= 'Z')
842 || c == ':' || c == '-')
843 )
844 fatal("P-026", new Object[]{value});
845 }
846 }
847 if (value != null && !value.equals(versionNum))
848 error("P-027", new Object[]{versionNum, value});
849 }
850
851 // common code used by most markup declarations
852 // ... S (Q)Name ...
853 private String getMarkupDeclname(String roleId, boolean qname)
854 throws IOException, SAXException {
855
856 String name;
857
858 whitespace(roleId);
859 name = maybeGetName();
860 if (name == null)
861 fatal("P-005", new Object[]
862 {messages.getMessage(locale, roleId)});
863 return name;
864 }
865
866 private boolean maybeMarkupDecl()
867 throws IOException, SAXException {
868
869 // [29] markupdecl ::= elementdecl | Attlistdecl
870 // | EntityDecl | NotationDecl | PI | Comment
871 return maybeElementDecl()
872 || maybeAttlistDecl()
873 || maybeEntityDecl()
874 || maybeNotationDecl()
875 || maybePI(false)
876 || maybeComment(false);
877 }
878
879 private static final String XmlLang = "xml:lang";
880
881 private boolean isXmlLang(String value) {
882
883 // [33] LanguageId ::= Langcode ('-' Subcode)*
884 // [34] Langcode ::= ISO639Code | IanaCode | UserCode
885 // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
886 // [36] IanaCode ::= [iI] '-' SubCode
887 // [37] UserCode ::= [xX] '-' SubCode
888 // [38] SubCode ::= [a-zA-Z]+
889
890 // the ISO and IANA codes (and subcodes) are registered,
891 // but that's neither a WF nor a validity constraint.
892
893 int nextSuffix;
894 char c;
895
896 if (value.length() < 2)
897 return false;
898 c = value.charAt(1);
899 if (c == '-') { // IANA, or user, code
900 c = value.charAt(0);
901 if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
902 return false;
903 nextSuffix = 1;
904 } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
905 // 2 letter ISO code, or error
906 c = value.charAt(0);
907 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
908 return false;
909 nextSuffix = 2;
910 } else
911 return false;
912
913 // here "suffix" ::= '-' [a-zA-Z]+ suffix*
914 while (nextSuffix < value.length()) {
915 c = value.charAt(nextSuffix);
916 if (c != '-')
917 break;
918 while (++nextSuffix < value.length()) {
919 c = value.charAt(nextSuffix);
920 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
921 break;
922 }
923 }
924 return value.length() == nextSuffix && c != '-';
925 }
926
927
928 //
929 // CHAPTER 3: Logical Structures
930 //
931
932 /**
933 * To validate, subclassers should at this time make sure that
934 * values are of the declared types:<UL>
935 * <LI> ID and IDREF(S) values are Names
936 * <LI> NMTOKEN(S) are Nmtokens
937 * <LI> ENUMERATION values match one of the tokens
938 * <LI> NOTATION values match a notation name
939 * <LI> ENTITIY(IES) values match an unparsed external entity
940 * </UL>
941 * <p/>
942 * <P> Separately, make sure IDREF values match some ID
943 * provided in the document (in the afterRoot method).
944 */
945 /* void validateAttributeSyntax (Attribute attr, String value)
946 throws DTDParseException {
947 // ID, IDREF(S) ... values are Names
948 if (Attribute.ID == attr.type()) {
949 if (!XmlNames.isName (value))
950 error ("V-025", new Object [] { value });
951
952 Boolean b = (Boolean) ids.getNonInterned (value);
953 if (b == null || b.equals (Boolean.FALSE))
954 ids.put (value.intern (), Boolean.TRUE);
955 else
956 error ("V-026", new Object [] { value });
957
958 } else if (Attribute.IDREF == attr.type()) {
959 if (!XmlNames.isName (value))
960 error ("V-027", new Object [] { value });
961
962 Boolean b = (Boolean) ids.getNonInterned (value);
963 if (b == null)
964 ids.put (value.intern (), Boolean.FALSE);
965
966 } else if (Attribute.IDREFS == attr.type()) {
967 StringTokenizer tokenizer = new StringTokenizer (value);
968 Boolean b;
969 boolean sawValue = false;
970
971 while (tokenizer.hasMoreTokens ()) {
972 value = tokenizer.nextToken ();
973 if (!XmlNames.isName (value))
974 error ("V-027", new Object [] { value });
975 b = (Boolean) ids.getNonInterned (value);
976 if (b == null)
977 ids.put (value.intern (), Boolean.FALSE);
978 sawValue = true;
979 }
980 if (!sawValue)
981 error ("V-039", null);
982
983
984 // NMTOKEN(S) ... values are Nmtoken(s)
985 } else if (Attribute.NMTOKEN == attr.type()) {
986 if (!XmlNames.isNmtoken (value))
987 error ("V-028", new Object [] { value });
988
989 } else if (Attribute.NMTOKENS == attr.type()) {
990 StringTokenizer tokenizer = new StringTokenizer (value);
991 boolean sawValue = false;
992
993 while (tokenizer.hasMoreTokens ()) {
994 value = tokenizer.nextToken ();
995 if (!XmlNames.isNmtoken (value))
996 error ("V-028", new Object [] { value });
997 sawValue = true;
998 }
999 if (!sawValue)
1000 error ("V-032", null);
1001
1002 // ENUMERATION ... values match one of the tokens
1003 } else if (Attribute.ENUMERATION == attr.type()) {
1004 for (int i = 0; i < attr.values().length; i++)
1005 if (value.equals (attr.values()[i]))
1006 return;
1007 error ("V-029", new Object [] { value });
1008
1009 // NOTATION values match a notation name
1010 } else if (Attribute.NOTATION == attr.type()) {
1011 //
1012 // XXX XML 1.0 spec should probably list references to
1013 // externally defined notations in standalone docs as
1014 // validity errors. Ditto externally defined unparsed
1015 // entities; neither should show up in attributes, else
1016 // one needs to read the external declarations in order
1017 // to make sense of the document (exactly what tagging
1018 // a doc as "standalone" intends you won't need to do).
1019 //
1020 for (int i = 0; i < attr.values().length; i++)
1021 if (value.equals (attr.values()[i]))
1022 return;
1023 error ("V-030", new Object [] { value });
1024
1025 // ENTITY(IES) values match an unparsed entity(ies)
1026 } else if (Attribute.ENTITY == attr.type()) {
1027 // see note above re standalone
1028 if (!isUnparsedEntity (value))
1029 error ("V-031", new Object [] { value });
1030
1031 } else if (Attribute.ENTITIES == attr.type()) {
1032 StringTokenizer tokenizer = new StringTokenizer (value);
1033 boolean sawValue = false;
1034
1035 while (tokenizer.hasMoreTokens ()) {
1036 value = tokenizer.nextToken ();
1037 // see note above re standalone
1038 if (!isUnparsedEntity (value))
1039 error ("V-031", new Object [] { value });
1040 sawValue = true;
1041 }
1042 if (!sawValue)
1043 error ("V-040", null);
1044
1045 } else if (Attribute.CDATA != attr.type())
1046 throw new InternalError (attr.type());
1047 }
1048 */
1049 /*
1050 private boolean isUnparsedEntity (String name)
1051 {
1052 Object e = entities.getNonInterned (name);
1053 if (e == null || !(e instanceof ExternalEntity))
1054 return false;
1055 return ((ExternalEntity)e).notation != null;
1056 }
1057 */
1058 private boolean maybeElementDecl()
1059 throws IOException, SAXException {
1060
1061 // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1062 // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1063 InputEntity start = peekDeclaration("!ELEMENT");
1064
1065 if (start == null)
1066 return false;
1067
1068 // n.b. for content models where inter-element whitespace is
1069 // ignorable, we mark that fact here.
1070 String name = getMarkupDeclname("F-015", true);
1071 // Element element = (Element) elements.get (name);
1072 // boolean declEffective = false;
1073
1074 /*
1075 if (element != null) {
1076 if (element.contentModel() != null) {
1077 error ("V-012", new Object [] { name });
1078 } // else <!ATTLIST name ...> came first
1079 } else {
1080 element = new Element(name);
1081 elements.put (element.name(), element);
1082 declEffective = true;
1083 }
1084 */
1085 if (declaredElements.contains(name))
1086 error("V-012", new Object[]{name});
1087 else {
1088 declaredElements.add(name);
1089 // declEffective = true;
1090 }
1091
1092 short modelType;
1093 whitespace("F-000");
1094 if (peek(strEMPTY)) {
1095 /// // leave element.contentModel as null for this case.
1096 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1097 } else if (peek(strANY)) {
1098 /// element.setContentModel(new StringModel(StringModelType.ANY));
1099 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY);
1100 } else {
1101 modelType = getMixedOrChildren(name);
1102 }
1103
1104 dtdHandler.endContentModel(name, modelType);
1105
1106 maybeWhitespace();
1107 char c = getc();
1108 if (c != '>')
1109 fatal("P-036", new Object[]{name, new Character(c)});
1110 if (start != in)
1111 error("V-013", null);
1112
1113 /// dtdHandler.elementDecl(element);
1114
1115 return true;
1116 }
1117
1118 // We're leaving the content model as a regular expression;
1119 // it's an efficient natural way to express such things, and
1120 // libraries often interpret them. No whitespace in the
1121 // model we store, though!
1122
1123 /**
1124 * returns content model type.
1125 */
1126 private short getMixedOrChildren(String elementName/*Element element*/)
1127 throws IOException, SAXException {
1128
1129 InputEntity start;
1130
1131 // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1132 strTmp = new StringBuffer();
1133
1134 nextChar('(', "F-028", elementName);
1135 start = in;
1136 maybeWhitespace();
1137 strTmp.append('(');
1138
1139 short modelType;
1140 if (peek("#PCDATA")) {
1141 strTmp.append("#PCDATA");
1142 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED);
1143 getMixed(elementName, start);
1144 } else {
1145 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
1146 getcps(elementName, start);
1147 }
1148
1149 return modelType;
1150 }
1151
1152 // '(' S? already consumed
1153 // matching ')' must be in "start" entity if validating
1154 private void getcps(/*Element element,*/String elementName, InputEntity start)
1155 throws IOException, SAXException {
1156
1157 // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
1158 // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
1159 // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
1160 boolean decided = false;
1161 char type = 0;
1162 // ContentModel retval, temp, current;
1163
1164 // retval = temp = current = null;
1165
1166 dtdHandler.startModelGroup();
1167
1168 do {
1169 String tag;
1170
1171 tag = maybeGetName();
1172 if (tag != null) {
1173 strTmp.append(tag);
1174 // temp = new ElementModel(tag);
1175 // getFrequency((RepeatableContent)temp);
1176 ///->
1177 dtdHandler.childElement(tag, getFrequency());
1178 ///<-
1179 } else if (peek("(")) {
1180 InputEntity next = in;
1181 strTmp.append('(');
1182 maybeWhitespace();
1183 // temp = getcps(element, next);
1184 // getFrequency(temp);
1185 ///->
1186 getcps(elementName, next);
1187 /// getFrequency(); <- this looks like a bug
1188 ///<-
1189 } else
1190 fatal((type == 0) ? "P-039" :
1191 ((type == ',') ? "P-037" : "P-038"),
1192 new Object[]{new Character(getc())});
1193
1194 maybeWhitespace();
1195 if (decided) {
1196 char c = getc();
1197
1198 // if (current != null) {
1199 // current.addChild(temp);
1200 // }
1201 if (c == type) {
1202 strTmp.append(type);
1203 maybeWhitespace();
1204 reportConnector(type);
1205 continue;
1206 } else if (c == '\u0029') { // rparen
1207 ungetc();
1208 continue;
1209 } else {
1210 fatal((type == 0) ? "P-041" : "P-040",
1211 new Object[]{
1212 new Character(c),
1213 new Character(type)
1214 });
1215 }
1216 } else {
1217 type = getc();
1218 switch (type) {
1219 case '|':
1220 case ',':
1221 reportConnector(type);
1222 break;
1223 default:
1224 // retval = temp;
1225 ungetc();
1226 continue;
1227 }
1228 // retval = (ContentModel)current;
1229 decided = true;
1230 // current.addChild(temp);
1231 strTmp.append(type);
1232 }
1233 maybeWhitespace();
1234 } while (!peek(")"));
1235
1236 if (in != start)
1237 error("V-014", new Object[]{elementName});
1238 strTmp.append(')');
1239
1240 dtdHandler.endModelGroup(getFrequency());
1241 // return retval;
1242 }
1243
1244 private void reportConnector(char type) throws SAXException {
1245 switch (type) {
1246 case '|':
1247 dtdHandler.connector(DTDEventListener.CHOICE); ///<-
1248 return;
1249 case ',':
1250 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1251 return;
1252 default:
1253 throw new Error(); //assertion failed.
1254 }
1255 }
1256
1257 private short getFrequency()
1258 throws IOException, SAXException {
1259
1260 final char c = getc();
1261
1262 if (c == '?') {
1263 strTmp.append(c);
1264 return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
1265 // original.setRepeat(Repeat.ZERO_OR_ONE);
1266 } else if (c == '+') {
1267 strTmp.append(c);
1268 return DTDEventListener.OCCURENCE_ONE_OR_MORE;
1269 // original.setRepeat(Repeat.ONE_OR_MORE);
1270 } else if (c == '*') {
1271 strTmp.append(c);
1272 return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1273 // original.setRepeat(Repeat.ZERO_OR_MORE);
1274 } else {
1275 ungetc();
1276 return DTDEventListener.OCCURENCE_ONCE;
1277 }
1278 }
1279
1280 // '(' S? '#PCDATA' already consumed
1281 // matching ')' must be in "start" entity if validating
1282 private void getMixed(String elementName, /*Element element,*/ InputEntity start)
1283 throws IOException, SAXException {
1284
1285 // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1286 // | '(' S? '#PCDATA' S? ')'
1287 maybeWhitespace();
1288 if (peek("\u0029*") || peek("\u0029")) {
1289 if (in != start)
1290 error("V-014", new Object[]{elementName});
1291 strTmp.append(')');
1292 // element.setContentModel(new StringModel(StringModelType.PCDATA));
1293 return;
1294 }
1295
1296 ArrayList l = new ArrayList();
1297 // l.add(new StringModel(StringModelType.PCDATA));
1298
1299
1300 while (peek("|")) {
1301 String name;
1302
1303 strTmp.append('|');
1304 maybeWhitespace();
1305
1306 doLexicalPE = true;
1307 name = maybeGetName();
1308 if (name == null)
1309 fatal("P-042", new Object[]
1310 {elementName, Integer.toHexString(getc())});
1311 if (l.contains(name)) {
1312 error("V-015", new Object[]{name});
1313 } else {
1314 l.add(name);
1315 dtdHandler.mixedElement(name);
1316 }
1317 strTmp.append(name);
1318 maybeWhitespace();
1319 }
1320
1321 if (!peek("\u0029*")) // right paren
1322 fatal("P-043", new Object[]
1323 {elementName, new Character(getc())});
1324 if (in != start)
1325 error("V-014", new Object[]{elementName});
1326 strTmp.append(')');
1327 // ChoiceModel cm = new ChoiceModel((Collection)l);
1328 // cm.setRepeat(Repeat.ZERO_OR_MORE);
1329 // element.setContentModel(cm);
1330 }
1331
1332 private boolean maybeAttlistDecl()
1333 throws IOException, SAXException {
1334
1335 // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1336 InputEntity start = peekDeclaration("!ATTLIST");
1337
1338 if (start == null)
1339 return false;
1340
1341 String elementName = getMarkupDeclname("F-016", true);
1342 // Element element = (Element) elements.get (name);
1343
1344 // if (element == null) {
1345 // // not yet declared -- no problem.
1346 // element = new Element(name);
1347 // elements.put(name, element);
1348 // }
1349
1350 while (!peek(">")) {
1351
1352 // [53] AttDef ::= S Name S AttType S DefaultDecl
1353 // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1354
1355 // look for global attribute definitions, don't expand for now...
1356 maybeWhitespace();
1357 char c = getc();
1358 if (c == '%') {
1359 String entityName = maybeGetName();
1360 if (entityName != null) {
1361 nextChar(';', "F-021", entityName);
1362 whitespace("F-021");
1363 continue;
1364 } else
1365 fatal("P-011");
1366 }
1367
1368 ungetc();
1369 // look for attribute name otherwise
1370 String attName = maybeGetName();
1371 if (attName == null) {
1372 fatal("P-044", new Object[]{new Character(getc())});
1373 }
1374 whitespace("F-001");
1375
1376 /// Attribute a = new Attribute (name);
1377
1378 String typeName;
1379 Vector values = null; // notation/enumeration values
1380
1381 // Note: use the type constants from Attribute
1382 // so that "==" may be used (faster)
1383
1384 // [55] StringType ::= 'CDATA'
1385 if (peek(TYPE_CDATA))
1386 /// a.setType(Attribute.CDATA);
1387 typeName = TYPE_CDATA;
1388
1389 // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1390 // | 'ENTITY' | 'ENTITIES'
1391 // | 'NMTOKEN' | 'NMTOKENS'
1392 // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1393 // match peekahead ... so this order matters!
1394 else if (peek(TYPE_IDREFS))
1395 typeName = TYPE_IDREFS;
1396 else if (peek(TYPE_IDREF))
1397 typeName = TYPE_IDREF;
1398 else if (peek(TYPE_ID)) {
1399 typeName = TYPE_ID;
1400 // TODO: should implement this error check?
1401 /// if (element.id() != null) {
1402 /// error ("V-016", new Object [] { element.id() });
1403 /// } else
1404 /// element.setId(name);
1405 } else if (peek(TYPE_ENTITY))
1406 typeName = TYPE_ENTITY;
1407 else if (peek(TYPE_ENTITIES))
1408 typeName = TYPE_ENTITIES;
1409 else if (peek(TYPE_NMTOKENS))
1410 typeName = TYPE_NMTOKENS;
1411 else if (peek(TYPE_NMTOKEN))
1412 typeName = TYPE_NMTOKEN;
1413
1414 // [57] EnumeratedType ::= NotationType | Enumeration
1415 // [58] NotationType ::= 'NOTATION' S '(' S? Name
1416 // (S? '|' S? Name)* S? ')'
1417 else if (peek(TYPE_NOTATION)) {
1418 typeName = TYPE_NOTATION;
1419 whitespace("F-002");
1420 nextChar('(', "F-029", null);
1421 maybeWhitespace();
1422
1423 values = new Vector();
1424 do {
1425 String name;
1426 if ((name = maybeGetName()) == null)
1427 fatal("P-068");
1428 // permit deferred declarations
1429 if (notations.get(name) == null)
1430 notations.put(name, name);
1431 values.addElement(name);
1432 maybeWhitespace();
1433 if (peek("|"))
1434 maybeWhitespace();
1435 } while (!peek(")"));
1436 /// a.setValues(new String [v.size ()]);
1437 /// for (int i = 0; i < v.size (); i++)
1438 /// a.setValue(i, (String)v.elementAt(i));
1439
1440 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1441 } else if (peek("(")) {
1442 /// a.setType(Attribute.ENUMERATION);
1443 typeName = TYPE_ENUMERATION;
1444
1445 maybeWhitespace();
1446
1447 /// Vector v = new Vector ();
1448 values = new Vector();
1449 do {
1450 String name = getNmtoken();
1451 /// v.addElement (name);
1452 values.addElement(name);
1453 maybeWhitespace();
1454 if (peek("|"))
1455 maybeWhitespace();
1456 } while (!peek(")"));
1457 /// a.setValues(new String [v.size ()]);
1458 /// for (int i = 0; i < v.size (); i++)
1459 /// a.setValue(i, (String)v.elementAt(i));
1460 } else {
1461 fatal("P-045",
1462 new Object[]{attName, new Character(getc())});
1463 typeName = null;
1464 }
1465
1466 short attributeUse;
1467 String defaultValue = null;
1468
1469 // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1470 // | (('#FIXED' S)? AttValue)
1471 whitespace("F-003");
1472 if (peek("#REQUIRED"))
1473 attributeUse = DTDEventListener.USE_REQUIRED;
1474 /// a.setIsRequired(true);
1475 else if (peek("#FIXED")) {
1476 /// if (a.type() == Attribute.ID)
1477 if (typeName == TYPE_ID)
1478 error("V-017", new Object[]{attName});
1479 /// a.setIsFixed(true);
1480 attributeUse = DTDEventListener.USE_FIXED;
1481 whitespace("F-004");
1482 parseLiteral(false);
1483 /// if (a.type() != Attribute.CDATA)
1484 /// a.setDefaultValue(normalize(false));
1485 /// else
1486 /// a.setDefaultValue(strTmp.toString());
1487
1488 if (typeName == TYPE_CDATA)
1489 defaultValue = normalize(false);
1490 else
1491 defaultValue = strTmp.toString();
1492
1493 // TODO: implement this check
1494 /// if (a.type() != Attribute.CDATA)
1495 /// validateAttributeSyntax (a, a.defaultValue());
1496 } else if (!peek("#IMPLIED")) {
1497 attributeUse = DTDEventListener.USE_IMPLIED;
1498
1499 /// if (a.type() == Attribute.ID)
1500 if (typeName == TYPE_ID)
1501 error("V-018", new Object[]{attName});
1502 parseLiteral(false);
1503 /// if (a.type() != Attribute.CDATA)
1504 /// a.setDefaultValue(normalize(false));
1505 /// else
1506 /// a.setDefaultValue(strTmp.toString());
1507 if (typeName == TYPE_CDATA)
1508 defaultValue = normalize(false);
1509 else
1510 defaultValue = strTmp.toString();
1511
1512 // TODO: implement this check
1513 /// if (a.type() != Attribute.CDATA)
1514 /// validateAttributeSyntax (a, a.defaultValue());
1515 } else {
1516 // TODO: this looks like an fatal error.
1517 attributeUse = DTDEventListener.USE_NORMAL;
1518 }
1519
1520 if (XmlLang.equals(attName)
1521 && defaultValue/* a.defaultValue()*/ != null
1522 && !isXmlLang(defaultValue/*a.defaultValue()*/))
1523 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/});
1524
1525 // TODO: isn't it an error to specify the same attribute twice?
1526 /// if (!element.attributes().contains(a)) {
1527 /// element.addAttribute(a);
1528 /// dtdHandler.attributeDecl(a);
1529 /// }
1530
1531 String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null;
1532 dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue);
1533 maybeWhitespace();
1534 }
1535 if (start != in)
1536 error("V-013", null);
1537 return true;
1538 }
1539
1540 // used when parsing literal attribute values,
1541 // or public identifiers.
1542 //
1543 // input in strTmp
1544 private String normalize(boolean invalidIfNeeded) {
1545
1546 // this can allocate an extra string...
1547
1548 String s = strTmp.toString();
1549 String s2 = s.trim();
1550 boolean didStrip = false;
1551
1552 if (s != s2) {
1553 s = s2;
1554 s2 = null;
1555 didStrip = true;
1556 }
1557 strTmp = new StringBuffer();
1558 for (int i = 0; i < s.length(); i++) {
1559 char c = s.charAt(i);
1560 if (!XmlChars.isSpace(c)) {
1561 strTmp.append(c);
1562 continue;
1563 }
1564 strTmp.append(' ');
1565 while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
1566 didStrip = true;
1567 i--;
1568 }
1569 if (didStrip)
1570 return strTmp.toString();
1571 else
1572 return s;
1573 }
1574
1575 private boolean maybeConditionalSect()
1576 throws IOException, SAXException {
1577
1578 // [61] conditionalSect ::= includeSect | ignoreSect
1579
1580 if (!peek("<!["))
1581 return false;
1582
1583 String keyword;
1584 InputEntity start = in;
1585
1586 maybeWhitespace();
1587
1588 if ((keyword = maybeGetName()) == null)
1589 fatal("P-046");
1590 maybeWhitespace();
1591 nextChar('[', "F-030", null);
1592
1593 // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1594 // extSubsetDecl ']]>'
1595 if ("INCLUDE".equals(keyword)) {
1596 for (; ;) {
1597 while (in.isEOF() && in != start)
1598 in = in.pop();
1599 if (in.isEOF()) {
1600 error("V-020", null);
1601 }
1602 if (peek("]]>"))
1603 break;
1604
1605 doLexicalPE = false;
1606 if (maybeWhitespace())
1607 continue;
1608 if (maybePEReference())
1609 continue;
1610 doLexicalPE = true;
1611 if (maybeMarkupDecl() || maybeConditionalSect())
1612 continue;
1613
1614 fatal("P-047");
1615 }
1616
1617 // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1618 // ignoreSectcontents ']]>'
1619 // [64] ignoreSectcontents ::= Ignore ('<!['
1620 // ignoreSectcontents ']]>' Ignore)*
1621 // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1622 } else if ("IGNORE".equals(keyword)) {
1623 int nestlevel = 1;
1624 // ignoreSectcontents
1625 doLexicalPE = false;
1626 while (nestlevel > 0) {
1627 char c = getc(); // will pop input entities
1628 if (c == '<') {
1629 if (peek("!["))
1630 nestlevel++;
1631 } else if (c == ']') {
1632 if (peek("]>"))
1633 nestlevel--;
1634 } else
1635 continue;
1636 }
1637 } else
1638 fatal("P-048", new Object[]{keyword});
1639 return true;
1640 }
1641
1642
1643 //
1644 // CHAPTER 4: Physical Structures
1645 //
1646
1647 // parse decimal or hex numeric character reference
1648 private int parseCharNumber()
1649 throws IOException, SAXException {
1650
1651 char c;
1652 int retval = 0;
1653
1654 // n.b. we ignore overflow ...
1655 if (getc() != 'x') {
1656 ungetc();
1657 for (; ;) {
1658 c = getc();
1659 if (c >= '0' && c <= '9') {
1660 retval *= 10;
1661 retval += (c - '0');
1662 continue;
1663 }
1664 if (c == ';')
1665 return retval;
1666 fatal("P-049");
1667 }
1668 } else
1669 for (; ;) {
1670 c = getc();
1671 if (c >= '0' && c <= '9') {
1672 retval <<= 4;
1673 retval += (c - '0');
1674 continue;
1675 }
1676 if (c >= 'a' && c <= 'f') {
1677 retval <<= 4;
1678 retval += 10 + (c - 'a');
1679 continue;
1680 }
1681 if (c >= 'A' && c <= 'F') {
1682 retval <<= 4;
1683 retval += 10 + (c - 'A');
1684 continue;
1685 }
1686 if (c == ';')
1687 return retval;
1688 fatal("P-050");
1689 }
1690 }
1691
1692 // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1693 // though still subject to the 'Char' construct in XML
1694 private int surrogatesToCharTmp(int ucs4)
1695 throws SAXException {
1696
1697 if (ucs4 <= 0xffff) {
1698 if (XmlChars.isChar(ucs4)) {
1699 charTmp[0] = (char) ucs4;
1700 return 1;
1701 }
1702 } else if (ucs4 <= 0x0010ffff) {
1703 // we represent these as UNICODE surrogate pairs
1704 ucs4 -= 0x10000;
1705 charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1706 charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1707 return 2;
1708 }
1709 fatal("P-051", new Object[]{Integer.toHexString(ucs4)});
1710 // NOTREACHED
1711 return -1;
1712 }
1713
1714 private boolean maybePEReference()
1715 throws IOException, SAXException {
1716
1717 // This is the SYNTACTIC version of this construct.
1718 // When processing external entities, there is also
1719 // a LEXICAL version; see getc() and doLexicalPE.
1720
1721 // [69] PEReference ::= '%' Name ';'
1722 if (!in.peekc('%'))
1723 return false;
1724
1725 String name = maybeGetName();
1726 Object entity;
1727
1728 if (name == null)
1729 fatal("P-011");
1730 nextChar(';', "F-021", name);
1731 entity = params.get(name);
1732
1733 if (entity instanceof InternalEntity) {
1734 InternalEntity value = (InternalEntity) entity;
1735 pushReader(value.buf, name, false);
1736
1737 } else if (entity instanceof ExternalEntity) {
1738 pushReader((ExternalEntity) entity);
1739 externalParameterEntity((ExternalEntity) entity);
1740
1741 } else if (entity == null) {
1742 error("V-022", new Object[]{name});
1743 }
1744 return true;
1745 }
1746
1747 private boolean maybeEntityDecl()
1748 throws IOException, SAXException {
1749
1750 // [70] EntityDecl ::= GEDecl | PEDecl
1751 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1752 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>'
1753 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1754 // [74] PEDef ::= EntityValue | ExternalID
1755 //
1756 InputEntity start = peekDeclaration("!ENTITY");
1757
1758 if (start == null)
1759 return false;
1760
1761 String entityName;
1762 SimpleHashtable defns;
1763 ExternalEntity externalId;
1764 boolean doStore;
1765
1766 // PE expansion gets selectively turned off several places:
1767 // in ENTITY declarations (here), in comments, in PIs.
1768
1769 // Here, we allow PE entities to be declared, and allows
1770 // literals to include PE refs without the added spaces
1771 // required with their expansion in markup decls.
1772
1773 doLexicalPE = false;
1774 whitespace("F-005");
1775 if (in.peekc('%')) {
1776 whitespace("F-006");
1777 defns = params;
1778 } else
1779 defns = entities;
1780
1781 ungetc(); // leave some whitespace
1782 doLexicalPE = true;
1783 entityName = getMarkupDeclname("F-017", false);
1784 whitespace("F-007");
1785 externalId = maybeExternalID();
1786
1787 //
1788 // first definition sticks ... e.g. internal subset PEs are used
1789 // to override DTD defaults. It's also an "error" to incorrectly
1790 // redefine builtin internal entities, but since reporting such
1791 // errors is optional we only give warnings ("just in case") for
1792 // non-parameter entities.
1793 //
1794 doStore = (defns.get(entityName) == null);
1795 if (!doStore && defns == entities)
1796 warning("P-054", new Object[]{entityName});
1797
1798 // internal entities
1799 if (externalId == null) {
1800 char value [];
1801 InternalEntity entity;
1802
1803 doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
1804 parseLiteral(true);
1805 doLexicalPE = true;
1806 if (doStore) {
1807 value = new char[strTmp.length()];
1808 if (value.length != 0)
1809 strTmp.getChars(0, value.length, value, 0);
1810 entity = new InternalEntity(entityName, value);
1811 entity.isPE = (defns == params);
1812 entity.isFromInternalSubset = false;
1813 defns.put(entityName, entity);
1814 if (defns == entities)
1815 dtdHandler.internalGeneralEntityDecl(entityName,
1816 new String(value));
1817 }
1818
1819 // external entities (including unparsed)
1820 } else {
1821 // [76] NDataDecl ::= S 'NDATA' S Name
1822 if (defns == entities && maybeWhitespace()
1823 && peek("NDATA")) {
1824 externalId.notation = getMarkupDeclname("F-018", false);
1825
1826 // flag undeclared notation for checking after
1827 // the DTD is fully processed
1828 if (notations.get(externalId.notation) == null)
1829 notations.put(externalId.notation, Boolean.TRUE);
1830 }
1831 externalId.name = entityName;
1832 externalId.isPE = (defns == params);
1833 externalId.isFromInternalSubset = false;
1834 if (doStore) {
1835 defns.put(entityName, externalId);
1836 if (externalId.notation != null)
1837 dtdHandler.unparsedEntityDecl(entityName,
1838 externalId.publicId, externalId.systemId,
1839 externalId.notation);
1840 else if (defns == entities)
1841 dtdHandler.externalGeneralEntityDecl(entityName,
1842 externalId.publicId, externalId.systemId);
1843 }
1844 }
1845 maybeWhitespace();
1846 nextChar('>', "F-031", entityName);
1847 if (start != in)
1848 error("V-013", null);
1849 return true;
1850 }
1851
1852 private ExternalEntity maybeExternalID()
1853 throws IOException, SAXException {
1854
1855 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1856 // | 'PUBLIC' S' PubidLiteral S Systemliteral
1857 String temp = null;
1858 ExternalEntity retval;
1859
1860 if (peek("PUBLIC")) {
1861 whitespace("F-009");
1862 temp = parsePublicId();
1863 } else if (!peek("SYSTEM"))
1864 return null;
1865
1866 retval = new ExternalEntity(in);
1867 retval.publicId = temp;
1868 whitespace("F-008");
1869 retval.systemId = parseSystemId();
1870 return retval;
1871 }
1872
1873 private String parseSystemId()
1874 throws IOException, SAXException {
1875
1876 String uri = getQuotedString("F-034", null);
1877 int temp = uri.indexOf(':');
1878
1879 // resolve relative URIs ... must do it here since
1880 // it's relative to the source file holding the URI!
1881
1882 // "new java.net.URL (URL, string)" conforms to RFC 1630,
1883 // but we can't use that except when the URI is a URL.
1884 // The entity resolver is allowed to handle URIs that are
1885 // not URLs, so we pass URIs through with scheme intact
1886 if (temp == -1 || uri.indexOf('/') < temp) {
1887 String baseURI;
1888
1889 baseURI = in.getSystemId();
1890 if (baseURI == null)
1891 fatal("P-055", new Object[]{uri});
1892 if (uri.length() == 0)
1893 uri = ".";
1894 baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1);
1895 if (uri.charAt(0) != '/')
1896 uri = baseURI + uri;
1897 else {
1898 // XXX slashes at the beginning of a relative URI are
1899 // a special case we don't handle.
1900 throw new InternalError();
1901 }
1902
1903 // letting other code map any "/xxx/../" or "/./" to "/",
1904 // since all URIs must handle it the same.
1905 }
1906 // check for fragment ID in URI
1907 if (uri.indexOf('#') != -1)
1908 error("P-056", new Object[]{uri});
1909 return uri;
1910 }
1911
1912 private void maybeTextDecl()
1913 throws IOException, SAXException {
1914
1915 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1916 if (peek("<?xml")) {
1917 readVersion(false, "1.0");
1918 readEncoding(true);
1919 maybeWhitespace();
1920 if (!peek("?>"))
1921 fatal("P-057");
1922 }
1923 }
1924
1925 private void externalParameterEntity(ExternalEntity next)
1926 throws IOException, SAXException {
1927
1928 //
1929 // Reap the intended benefits of standalone declarations:
1930 // don't deal with external parameter entities, except to
1931 // validate the standalone declaration.
1932 //
1933
1934 // n.b. "in external parameter entities" (and external
1935 // DTD subset, same grammar) parameter references can
1936 // occur "within" markup declarations ... expansions can
1937 // cross syntax rules. Flagged here; affects getc().
1938
1939 // [79] ExtPE ::= TextDecl? extSubsetDecl
1940 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1941 // | PEReference | S )*
1942 InputEntity pe;
1943
1944 // XXX if this returns false ...
1945
1946 pe = in;
1947 maybeTextDecl();
1948 while (!pe.isEOF()) {
1949 // pop internal PEs (and whitespace before/after)
1950 if (in.isEOF()) {
1951 in = in.pop();
1952 continue;
1953 }
1954 doLexicalPE = false;
1955 if (maybeWhitespace())
1956 continue;
1957 if (maybePEReference())
1958 continue;
1959 doLexicalPE = true;
1960 if (maybeMarkupDecl() || maybeConditionalSect())
1961 continue;
1962 break;
1963 }
1964 // if (in != pe) throw new InternalError("who popped my PE?");
1965 if (!pe.isEOF())
1966 fatal("P-059", new Object[]{in.getName()});
1967 }
1968
1969 private void readEncoding(boolean must)
1970 throws IOException, SAXException {
1971
1972 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1973 String name = maybeReadAttribute("encoding", must);
1974
1975 if (name == null)
1976 return;
1977 for (int i = 0; i < name.length(); i++) {
1978 char c = name.charAt(i);
1979 if ((c >= 'A' && c <= 'Z')
1980 || (c >= 'a' && c <= 'z'))
1981 continue;
1982 if (i != 0
1983 && ((c >= '0' && c <= '9')
1984 || c == '-'
1985 || c == '_'
1986 || c == '.'
1987 ))
1988 continue;
1989 fatal("P-060", new Object[]{new Character(c)});
1990 }
1991
1992 //
1993 // This should be the encoding in use, and it's even an error for
1994 // it to be anything else (in certain cases that are impractical to
1995 // to test, and may even be insufficient). So, we do the best we
1996 // can, and warn if things look suspicious. Note that Java doesn't
1997 // uniformly expose the encodings, and that the names it uses
1998 // internally are nonstandard. Also, that the XML spec allows
1999 // such "errors" not to be reported at all.
2000 //
2001 String currentEncoding = in.getEncoding();
2002
2003 if (currentEncoding != null
2004 && !name.equalsIgnoreCase(currentEncoding))
2005 warning("P-061", new Object[]{name, currentEncoding});
2006 }
2007
2008 private boolean maybeNotationDecl()
2009 throws IOException, SAXException {
2010
2011 // [82] NotationDecl ::= '<!NOTATION' S Name S
2012 // (ExternalID | PublicID) S? '>'
2013 // [83] PublicID ::= 'PUBLIC' S PubidLiteral
2014 InputEntity start = peekDeclaration("!NOTATION");
2015
2016 if (start == null)
2017 return false;
2018
2019 String name = getMarkupDeclname("F-019", false);
2020 ExternalEntity entity = new ExternalEntity(in);
2021
2022 whitespace("F-011");
2023 if (peek("PUBLIC")) {
2024 whitespace("F-009");
2025 entity.publicId = parsePublicId();
2026 if (maybeWhitespace()) {
2027 if (!peek(">"))
2028 entity.systemId = parseSystemId();
2029 else
2030 ungetc();
2031 }
2032 } else if (peek("SYSTEM")) {
2033 whitespace("F-008");
2034 entity.systemId = parseSystemId();
2035 } else
2036 fatal("P-062");
2037 maybeWhitespace();
2038 nextChar('>', "F-032", name);
2039 if (start != in)
2040 error("V-013", null);
2041 if (entity.systemId != null && entity.systemId.indexOf('#') != -1)
2042 error("P-056", new Object[]{entity.systemId});
2043
2044 Object value = notations.get(name);
2045 if (value != null && value instanceof ExternalEntity)
2046 warning("P-063", new Object[]{name});
2047
2048 else {
2049 notations.put(name, entity);
2050 dtdHandler.notationDecl(name, entity.publicId,
2051 entity.systemId);
2052 }
2053 return true;
2054 }
2055
2056
2057 ////////////////////////////////////////////////////////////////
2058 //
2059 // UTILITIES
2060 //
2061 ////////////////////////////////////////////////////////////////
2062
2063 private char getc() throws IOException, SAXException {
2064
2065 if (!doLexicalPE) {
2066 char c = in.getc();
2067 return c;
2068 }
2069
2070 //
2071 // External parameter entities get funky processing of '%param;'
2072 // references. It's not clearly defined in the XML spec; but it
2073 // boils down to having those refs be _lexical_ in most cases to
2074 // include partial syntax productions. It also needs selective
2075 // enabling; "<!ENTITY % foo ...>" must work, for example, and
2076 // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2077 // if it's expanded in a literal, else "ab cd". PEs also do
2078 // not expand within comments or PIs, and external PEs are only
2079 // allowed to have markup decls (and so aren't handled lexically).
2080 //
2081 // This PE handling should be merged into maybeWhitespace, where
2082 // it can be dealt with more consistently.
2083 //
2084 // Also, there are some validity constraints in this area.
2085 //
2086 char c;
2087
2088 while (in.isEOF()) {
2089 if (in.isInternal() || (doLexicalPE && !in.isDocument()))
2090 in = in.pop();
2091 else {
2092 fatal("P-064", new Object[]{in.getName()});
2093 }
2094 }
2095 if ((c = in.getc()) == '%' && doLexicalPE) {
2096 // PE ref ::= '%' name ';'
2097 String name = maybeGetName();
2098 Object entity;
2099
2100 if (name == null)
2101 fatal("P-011");
2102 nextChar(';', "F-021", name);
2103 entity = params.get(name);
2104
2105 // push a magic "entity" before and after the
2106 // real one, so ungetc() behaves uniformly
2107 pushReader(" ".toCharArray(), null, false);
2108 if (entity instanceof InternalEntity)
2109 pushReader(((InternalEntity) entity).buf, name, false);
2110 else if (entity instanceof ExternalEntity)
2111 // PEs can't be unparsed!
2112 // XXX if this returns false ...
2113 pushReader((ExternalEntity) entity);
2114 else if (entity == null)
2115 // see note in maybePEReference re making this be nonfatal.
2116 fatal("V-022");
2117 else
2118 throw new InternalError();
2119 pushReader(" ".toCharArray(), null, false);
2120 return in.getc();
2121 }
2122 return c;
2123 }
2124
2125 private void ungetc() {
2126
2127 in.ungetc();
2128 }
2129
2130 private boolean peek(String s)
2131 throws IOException, SAXException {
2132
2133 return in.peek(s, null);
2134 }
2135
2136 // Return the entity starting the specified declaration
2137 // (for validating declaration nesting) else null.
2138
2139 private InputEntity peekDeclaration(String s)
2140 throws IOException, SAXException {
2141
2142 InputEntity start;
2143
2144 if (!in.peekc('<'))
2145 return null;
2146 start = in;
2147 if (in.peek(s, null))
2148 return start;
2149 in.ungetc();
2150 return null;
2151 }
2152
2153 private void nextChar(char c, String location, String near)
2154 throws IOException, SAXException {
2155
2156 while (in.isEOF() && !in.isDocument())
2157 in = in.pop();
2158 if (!in.peekc(c))
2159 fatal("P-008", new Object[]
2160 {new Character(c),
2161 messages.getMessage(locale, location),
2162 (near == null ? "" : ('"' + near + '"'))});
2163 }
2164
2165
2166 private void pushReader(char buf [], String name, boolean isGeneral)
2167 throws SAXException {
2168
2169 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2170 r.init(buf, name, in, !isGeneral);
2171 in = r;
2172 }
2173
2174 private boolean pushReader(ExternalEntity next)
2175 throws IOException, SAXException {
2176
2177 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2178 InputSource s;
2179 try {
2180 s = next.getInputSource(resolver);
2181 } catch (IOException e) {
2182 String msg =
2183 "unable to open the external entity from :" + next.systemId;
2184 if (next.publicId != null)
2185 msg += " (public id:" + next.publicId + ")";
2186
2187 SAXParseException spe = new SAXParseException(msg,
2188 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
2189 dtdHandler.fatalError(spe);
2190 throw e;
2191 }
2192
2193 r.init(s, next.name, in, next.isPE);
2194 in = r;
2195 return true;
2196 }
2197
2198 public String getPublicId() {
2199
2200 return (in == null) ? null : in.getPublicId();
2201 }
2202
2203 public String getSystemId() {
2204
2205 return (in == null) ? null : in.getSystemId();
2206 }
2207
2208 public int getLineNumber() {
2209
2210 return (in == null) ? -1 : in.getLineNumber();
2211 }
2212
2213 public int getColumnNumber() {
2214
2215 return (in == null) ? -1 : in.getColumnNumber();
2216 }
2217
2218 // error handling convenience routines
2219
2220 private void warning(String messageId, Object parameters [])
2221 throws SAXException {
2222
2223 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2224 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2225
2226 dtdHandler.warning(e);
2227 }
2228
2229 void error(String messageId, Object parameters [])
2230 throws SAXException {
2231
2232 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2233 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2234
2235 dtdHandler.error(e);
2236 }
2237
2238 private void fatal(String messageId) throws SAXException {
2239
2240 fatal(messageId, null);
2241 }
2242
2243 private void fatal(String messageId, Object parameters [])
2244 throws SAXException {
2245
2246 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters),
2247 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());
2248
2249 dtdHandler.fatalError(e);
2250
2251 throw e;
2252 }
2253
2254 //
2255 // Map char arrays to strings ... cuts down both on memory and
2256 // CPU usage for element/attribute/other names that are reused.
2257 //
2258 // Documents typically repeat names a lot, so we more or less
2259 // intern all the strings within the document; since some strings
2260 // are repeated in multiple documents (e.g. stylesheets) we go
2261 // a bit further, and intern globally.
2262 //
2263 static class NameCache {
2264 //
2265 // Unless we auto-grow this, the default size should be a
2266 // reasonable bit larger than needed for most XML files
2267 // we've yet seen (and be prime). If it's too small, the
2268 // penalty is just excess cache collisions.
2269 //
2270 NameCacheEntry hashtable [] = new NameCacheEntry[541];
2271
2272 //
2273 // Usually we just want to get the 'symbol' for these chars
2274 //
2275 String lookup(char value [], int len) {
2276
2277 return lookupEntry(value, len).name;
2278 }
2279
2280 //
2281 // Sometimes we need to scan the chars in the resulting
2282 // string, so there's an accessor which exposes them.
2283 // (Mostly for element end tags.)
2284 //
2285 NameCacheEntry lookupEntry(char value [], int len) {
2286
2287 int index = 0;
2288 NameCacheEntry entry;
2289
2290 // hashing to get index
2291 for (int i = 0; i < len; i++)
2292 index = index * 31 + value[i];
2293 index &= 0x7fffffff;
2294 index %= hashtable.length;
2295
2296 // return entry if one's there ...
2297 for (entry = hashtable[index];
2298 entry != null;
2299 entry = entry.next) {
2300 if (entry.matches(value, len))
2301 return entry;
2302 }
2303
2304 // else create new one
2305 entry = new NameCacheEntry();
2306 entry.chars = new char[len];
2307 System.arraycopy(value, 0, entry.chars, 0, len);
2308 entry.name = new String(entry.chars);
2309 //
2310 // NOTE: JDK 1.1 has a fixed size string intern table,
2311 // with non-GC'd entries. It can panic here; that's a
2312 // JDK problem, use 1.2 or later with many identifiers.
2313 //
2314 entry.name = entry.name.intern(); // "global" intern
2315 entry.next = hashtable[index];
2316 hashtable[index] = entry;
2317 return entry;
2318 }
2319 }
2320
2321 static class NameCacheEntry {
2322
2323 String name;
2324 char chars [];
2325 NameCacheEntry next;
2326
2327 boolean matches(char value [], int len) {
2328
2329 if (chars.length != len)
2330 return false;
2331 for (int i = 0; i < len; i++)
2332 if (value[i] != chars[i])
2333 return false;
2334 return true;
2335 }
2336 }
2337
2338 //
2339 // Message catalog for diagnostics.
2340 //
2341 static final Catalog messages = new Catalog();
2342
2343 static final class Catalog extends MessageCatalog {
2344
2345 Catalog() {
2346 super(DTDParser.class);
2347 }
2348 }
2349
2350 }

mercurial