|
1 /* |
|
2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package com.sun.xml.internal.dtdparser; |
|
27 |
|
28 import org.xml.sax.EntityResolver; |
|
29 import org.xml.sax.InputSource; |
|
30 import org.xml.sax.Locator; |
|
31 import org.xml.sax.SAXException; |
|
32 import org.xml.sax.SAXParseException; |
|
33 |
|
34 import java.io.IOException; |
|
35 import java.util.ArrayList; |
|
36 import java.util.Enumeration; |
|
37 import java.util.Hashtable; |
|
38 import java.util.Locale; |
|
39 import java.util.Set; |
|
40 import java.util.Vector; |
|
41 |
|
42 /** |
|
43 * This implements parsing of XML 1.0 DTDs. |
|
44 * <p/> |
|
45 * This conforms to the portion of the XML 1.0 specification related |
|
46 * to the external DTD subset. |
|
47 * <p/> |
|
48 * For multi-language applications (such as web servers using XML |
|
49 * processing to create dynamic content), a method supports choosing |
|
50 * a locale for parser diagnostics which is both understood by the |
|
51 * message recipient and supported by the parser. |
|
52 * <p/> |
|
53 * This parser produces a stream of parse events. It supports some |
|
54 * features (exposing comments, CDATA sections, and entity references) |
|
55 * which are not required to be reported by conformant XML processors. |
|
56 * |
|
57 * @author David Brownell |
|
58 * @author Janet Koenig |
|
59 * @author Kohsuke KAWAGUCHI |
|
60 * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $ |
|
61 */ |
|
62 public class DTDParser { |
|
63 public final static String TYPE_CDATA = "CDATA"; |
|
64 public final static String TYPE_ID = "ID"; |
|
65 public final static String TYPE_IDREF = "IDREF"; |
|
66 public final static String TYPE_IDREFS = "IDREFS"; |
|
67 public final static String TYPE_ENTITY = "ENTITY"; |
|
68 public final static String TYPE_ENTITIES = "ENTITIES"; |
|
69 public final static String TYPE_NMTOKEN = "NMTOKEN"; |
|
70 public final static String TYPE_NMTOKENS = "NMTOKENS"; |
|
71 public final static String TYPE_NOTATION = "NOTATION"; |
|
72 public final static String TYPE_ENUMERATION = "ENUMERATION"; |
|
73 |
|
74 |
|
75 // stack of input entities being merged |
|
76 private InputEntity in; |
|
77 |
|
78 // temporaries reused during parsing |
|
79 private StringBuffer strTmp; |
|
80 private char nameTmp []; |
|
81 private NameCache nameCache; |
|
82 private char charTmp [] = new char[2]; |
|
83 |
|
84 // temporary DTD parsing state |
|
85 private boolean doLexicalPE; |
|
86 |
|
87 // DTD state, used during parsing |
|
88 // private SimpleHashtable elements = new SimpleHashtable (47); |
|
89 protected final Set declaredElements = new java.util.HashSet(); |
|
90 private SimpleHashtable params = new SimpleHashtable(7); |
|
91 |
|
92 // exposed to package-private subclass |
|
93 Hashtable notations = new Hashtable(7); |
|
94 SimpleHashtable entities = new SimpleHashtable(17); |
|
95 |
|
96 private SimpleHashtable ids = new SimpleHashtable(); |
|
97 |
|
98 // listeners for DTD parsing events |
|
99 private DTDEventListener dtdHandler; |
|
100 |
|
101 private EntityResolver resolver; |
|
102 private Locale locale; |
|
103 |
|
104 // string constants -- use these copies so "==" works |
|
105 // package private |
|
106 static final String strANY = "ANY"; |
|
107 static final String strEMPTY = "EMPTY"; |
|
108 |
|
109 /** |
|
110 * Used by applications to request locale for diagnostics. |
|
111 * |
|
112 * @param l The locale to use, or null to use system defaults |
|
113 * (which may include only message IDs). |
|
114 */ |
|
115 public void setLocale(Locale l) throws SAXException { |
|
116 |
|
117 if (l != null && !messages.isLocaleSupported(l.toString())) { |
|
118 throw new SAXException(messages.getMessage(locale, |
|
119 "P-078", new Object[]{l})); |
|
120 } |
|
121 locale = l; |
|
122 } |
|
123 |
|
124 /** |
|
125 * Returns the diagnostic locale. |
|
126 */ |
|
127 public Locale getLocale() { |
|
128 return locale; |
|
129 } |
|
130 |
|
131 /** |
|
132 * Chooses a client locale to use for diagnostics, using the first |
|
133 * language specified in the list that is supported by this parser. |
|
134 * That locale is then set using <a href="#setLocale(java.util.Locale)"> |
|
135 * setLocale()</a>. Such a list could be provided by a variety of user |
|
136 * preference mechanisms, including the HTTP <em>Accept-Language</em> |
|
137 * header field. |
|
138 * |
|
139 * @param languages Array of language specifiers, ordered with the most |
|
140 * preferable one at the front. For example, "en-ca" then "fr-ca", |
|
141 * followed by "zh_CN". Both RFC 1766 and Java styles are supported. |
|
142 * @return The chosen locale, or null. |
|
143 * @see MessageCatalog |
|
144 */ |
|
145 public Locale chooseLocale(String languages []) |
|
146 throws SAXException { |
|
147 |
|
148 Locale l = messages.chooseLocale(languages); |
|
149 |
|
150 if (l != null) { |
|
151 setLocale(l); |
|
152 } |
|
153 return l; |
|
154 } |
|
155 |
|
156 /** |
|
157 * Lets applications control entity resolution. |
|
158 */ |
|
159 public void setEntityResolver(EntityResolver r) { |
|
160 |
|
161 resolver = r; |
|
162 } |
|
163 |
|
164 /** |
|
165 * Returns the object used to resolve entities |
|
166 */ |
|
167 public EntityResolver getEntityResolver() { |
|
168 |
|
169 return resolver; |
|
170 } |
|
171 |
|
172 /** |
|
173 * Used by applications to set handling of DTD parsing events. |
|
174 */ |
|
175 public void setDtdHandler(DTDEventListener handler) { |
|
176 dtdHandler = handler; |
|
177 if (handler != null) |
|
178 handler.setDocumentLocator(new Locator() { |
|
179 public String getPublicId() { |
|
180 return DTDParser.this.getPublicId(); |
|
181 } |
|
182 |
|
183 public String getSystemId() { |
|
184 return DTDParser.this.getSystemId(); |
|
185 } |
|
186 |
|
187 public int getLineNumber() { |
|
188 return DTDParser.this.getLineNumber(); |
|
189 } |
|
190 |
|
191 public int getColumnNumber() { |
|
192 return DTDParser.this.getColumnNumber(); |
|
193 } |
|
194 }); |
|
195 } |
|
196 |
|
197 /** |
|
198 * Returns the handler used to for DTD parsing events. |
|
199 */ |
|
200 public DTDEventListener getDtdHandler() { |
|
201 return dtdHandler; |
|
202 } |
|
203 |
|
204 /** |
|
205 * Parse a DTD. |
|
206 */ |
|
207 public void parse(InputSource in) |
|
208 throws IOException, SAXException { |
|
209 init(); |
|
210 parseInternal(in); |
|
211 } |
|
212 |
|
213 /** |
|
214 * Parse a DTD. |
|
215 */ |
|
216 public void parse(String uri) |
|
217 throws IOException, SAXException { |
|
218 InputSource in; |
|
219 |
|
220 init(); |
|
221 // System.out.println ("parse (\"" + uri + "\")"); |
|
222 in = resolver.resolveEntity(null, uri); |
|
223 |
|
224 // If custom resolver punts resolution to parser, handle it ... |
|
225 if (in == null) { |
|
226 in = Resolver.createInputSource(new java.net.URL(uri), false); |
|
227 |
|
228 // ... or if custom resolver doesn't correctly construct the |
|
229 // input entity, patch it up enough so relative URIs work, and |
|
230 // issue a warning to minimize later confusion. |
|
231 } else if (in.getSystemId() == null) { |
|
232 warning("P-065", null); |
|
233 in.setSystemId(uri); |
|
234 } |
|
235 |
|
236 parseInternal(in); |
|
237 } |
|
238 |
|
239 // makes sure the parser is reset to "before a document" |
|
240 private void init() { |
|
241 in = null; |
|
242 |
|
243 // alloc temporary data used in parsing |
|
244 strTmp = new StringBuffer(); |
|
245 nameTmp = new char[20]; |
|
246 nameCache = new NameCache(); |
|
247 |
|
248 // reset doc info |
|
249 // isInAttribute = false; |
|
250 |
|
251 doLexicalPE = false; |
|
252 |
|
253 entities.clear(); |
|
254 notations.clear(); |
|
255 params.clear(); |
|
256 // elements.clear (); |
|
257 declaredElements.clear(); |
|
258 |
|
259 // initialize predefined references ... re-interpreted later |
|
260 builtin("amp", "&"); |
|
261 builtin("lt", "<"); |
|
262 builtin("gt", ">"); |
|
263 builtin("quot", "\""); |
|
264 builtin("apos", "'"); |
|
265 |
|
266 if (locale == null) |
|
267 locale = Locale.getDefault(); |
|
268 if (resolver == null) |
|
269 resolver = new Resolver(); |
|
270 if (dtdHandler == null) |
|
271 dtdHandler = new DTDHandlerBase(); |
|
272 } |
|
273 |
|
274 private void builtin(String entityName, String entityValue) { |
|
275 InternalEntity entity; |
|
276 entity = new InternalEntity(entityName, entityValue.toCharArray()); |
|
277 entities.put(entityName, entity); |
|
278 } |
|
279 |
|
280 |
|
281 //////////////////////////////////////////////////////////////// |
|
282 // |
|
283 // parsing is by recursive descent, code roughly |
|
284 // following the BNF rules except tweaked for simple |
|
285 // lookahead. rules are more or less in numeric order, |
|
286 // except where code sharing suggests other structures. |
|
287 // |
|
288 // a classic benefit of recursive descent parsers: it's |
|
289 // relatively easy to get diagnostics that make sense. |
|
290 // |
|
291 //////////////////////////////////////////////////////////////// |
|
292 |
|
293 |
|
294 private void parseInternal(InputSource input) |
|
295 throws IOException, SAXException { |
|
296 |
|
297 if (input == null) |
|
298 fatal("P-000"); |
|
299 |
|
300 try { |
|
301 in = InputEntity.getInputEntity(dtdHandler, locale); |
|
302 in.init(input, null, null, false); |
|
303 |
|
304 dtdHandler.startDTD(in); |
|
305 |
|
306 // [30] extSubset ::= TextDecl? extSubsetDecl |
|
307 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect |
|
308 // | PEReference | S )* |
|
309 // ... same as [79] extPE, which is where the code is |
|
310 |
|
311 ExternalEntity externalSubset = new ExternalEntity(in); |
|
312 externalParameterEntity(externalSubset); |
|
313 |
|
314 if (!in.isEOF()) { |
|
315 fatal("P-001", new Object[] |
|
316 {Integer.toHexString(((int) getc()))}); |
|
317 } |
|
318 afterRoot(); |
|
319 dtdHandler.endDTD(); |
|
320 |
|
321 } catch (EndOfInputException e) { |
|
322 if (!in.isDocument()) { |
|
323 String name = in.getName(); |
|
324 do { // force a relevant URI and line number |
|
325 in = in.pop(); |
|
326 } while (in.isInternal()); |
|
327 fatal("P-002", new Object[]{name}); |
|
328 } else { |
|
329 fatal("P-003", null); |
|
330 } |
|
331 } catch (RuntimeException e) { |
|
332 // Don't discard location that triggered the exception |
|
333 // ## Should properly wrap exception |
|
334 System.err.print("Internal DTD parser error: "); // ## |
|
335 e.printStackTrace(); |
|
336 throw new SAXParseException(e.getMessage() != null |
|
337 ? e.getMessage() : e.getClass().getName(), |
|
338 getPublicId(), getSystemId(), |
|
339 getLineNumber(), getColumnNumber()); |
|
340 |
|
341 } finally { |
|
342 // recycle temporary data used during parsing |
|
343 strTmp = null; |
|
344 nameTmp = null; |
|
345 nameCache = null; |
|
346 |
|
347 // ditto input sources etc |
|
348 if (in != null) { |
|
349 in.close(); |
|
350 in = null; |
|
351 } |
|
352 |
|
353 // get rid of all DTD info ... some of it would be |
|
354 // useful for editors etc, investigate later. |
|
355 |
|
356 params.clear(); |
|
357 entities.clear(); |
|
358 notations.clear(); |
|
359 declaredElements.clear(); |
|
360 // elements.clear(); |
|
361 ids.clear(); |
|
362 } |
|
363 } |
|
364 |
|
365 void afterRoot() throws SAXException { |
|
366 // Make sure all IDREFs match declared ID attributes. We scan |
|
367 // after the document element is parsed, since XML allows forward |
|
368 // references, and only now can we know if they're all resolved. |
|
369 |
|
370 for (Enumeration e = ids.keys(); |
|
371 e.hasMoreElements(); |
|
372 ) { |
|
373 String id = (String) e.nextElement(); |
|
374 Boolean value = (Boolean) ids.get(id); |
|
375 if (Boolean.FALSE == value) |
|
376 error("V-024", new Object[]{id}); |
|
377 } |
|
378 } |
|
379 |
|
380 |
|
381 // role is for diagnostics |
|
382 private void whitespace(String roleId) |
|
383 throws IOException, SAXException { |
|
384 |
|
385 // [3] S ::= (#x20 | #x9 | #xd | #xa)+ |
|
386 if (!maybeWhitespace()) { |
|
387 fatal("P-004", new Object[] |
|
388 {messages.getMessage(locale, roleId)}); |
|
389 } |
|
390 } |
|
391 |
|
392 // S? |
|
393 private boolean maybeWhitespace() |
|
394 throws IOException, SAXException { |
|
395 |
|
396 if (!doLexicalPE) |
|
397 return in.maybeWhitespace(); |
|
398 |
|
399 // see getc() for the PE logic -- this lets us splice |
|
400 // expansions of PEs in "anywhere". getc() has smarts, |
|
401 // so for external PEs we don't bypass it. |
|
402 |
|
403 // XXX we can marginally speed PE handling, and certainly |
|
404 // be cleaner (hence potentially more correct), by using |
|
405 // the observations that expanded PEs only start and stop |
|
406 // where whitespace is allowed. getc wouldn't need any |
|
407 // "lexical" PE expansion logic, and no other method needs |
|
408 // to handle termination of PEs. (parsing of literals would |
|
409 // still need to pop entities, but not parsing of references |
|
410 // in content.) |
|
411 |
|
412 char c = getc(); |
|
413 boolean saw = false; |
|
414 |
|
415 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
|
416 saw = true; |
|
417 |
|
418 // this gracefully ends things when we stop playing |
|
419 // with internal parameters. caller should have a |
|
420 // grammar rule allowing whitespace at end of entity. |
|
421 if (in.isEOF() && !in.isInternal()) |
|
422 return saw; |
|
423 c = getc(); |
|
424 } |
|
425 ungetc(); |
|
426 return saw; |
|
427 } |
|
428 |
|
429 private String maybeGetName() |
|
430 throws IOException, SAXException { |
|
431 |
|
432 NameCacheEntry entry = maybeGetNameCacheEntry(); |
|
433 return (entry == null) ? null : entry.name; |
|
434 } |
|
435 |
|
436 private NameCacheEntry maybeGetNameCacheEntry() |
|
437 throws IOException, SAXException { |
|
438 |
|
439 // [5] Name ::= (Letter|'_'|':') (Namechar)* |
|
440 char c = getc(); |
|
441 |
|
442 if (!XmlChars.isLetter(c) && c != ':' && c != '_') { |
|
443 ungetc(); |
|
444 return null; |
|
445 } |
|
446 return nameCharString(c); |
|
447 } |
|
448 |
|
449 // Used when parsing enumerations |
|
450 private String getNmtoken() |
|
451 throws IOException, SAXException { |
|
452 |
|
453 // [7] Nmtoken ::= (Namechar)+ |
|
454 char c = getc(); |
|
455 if (!XmlChars.isNameChar(c)) |
|
456 fatal("P-006", new Object[]{new Character(c)}); |
|
457 return nameCharString(c).name; |
|
458 } |
|
459 |
|
460 // n.b. this gets used when parsing attribute values (for |
|
461 // internal references) so we can't use strTmp; it's also |
|
462 // a hotspot for CPU and memory in the parser (called at least |
|
463 // once for each element) so this has been optimized a bit. |
|
464 |
|
465 private NameCacheEntry nameCharString(char c) |
|
466 throws IOException, SAXException { |
|
467 |
|
468 int i = 1; |
|
469 |
|
470 nameTmp[0] = c; |
|
471 for (; ;) { |
|
472 if ((c = in.getNameChar()) == 0) |
|
473 break; |
|
474 if (i >= nameTmp.length) { |
|
475 char tmp [] = new char[nameTmp.length + 10]; |
|
476 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); |
|
477 nameTmp = tmp; |
|
478 } |
|
479 nameTmp[i++] = c; |
|
480 } |
|
481 return nameCache.lookupEntry(nameTmp, i); |
|
482 } |
|
483 |
|
484 // |
|
485 // much similarity between parsing entity values in DTD |
|
486 // and attribute values (in DTD or content) ... both follow |
|
487 // literal parsing rules, newline canonicalization, etc |
|
488 // |
|
489 // leaves value in 'strTmp' ... either a "replacement text" (4.5), |
|
490 // or else partially normalized attribute value (the first bit |
|
491 // of 3.3.3's spec, without the "if not CDATA" bits). |
|
492 // |
|
493 private void parseLiteral(boolean isEntityValue) |
|
494 throws IOException, SAXException { |
|
495 |
|
496 // [9] EntityValue ::= |
|
497 // '"' ([^"&%] | Reference | PEReference)* '"' |
|
498 // | "'" ([^'&%] | Reference | PEReference)* "'" |
|
499 // [10] AttValue ::= |
|
500 // '"' ([^"&] | Reference )* '"' |
|
501 // | "'" ([^'&] | Reference )* "'" |
|
502 char quote = getc(); |
|
503 char c; |
|
504 InputEntity source = in; |
|
505 |
|
506 if (quote != '\'' && quote != '"') { |
|
507 fatal("P-007"); |
|
508 } |
|
509 |
|
510 // don't report entity expansions within attributes, |
|
511 // they're reported "fully expanded" via SAX |
|
512 // isInAttribute = !isEntityValue; |
|
513 |
|
514 // get value into strTmp |
|
515 strTmp = new StringBuffer(); |
|
516 |
|
517 // scan, allowing entity push/pop wherever ... |
|
518 // expanded entities can't terminate the literal! |
|
519 for (; ;) { |
|
520 if (in != source && in.isEOF()) { |
|
521 // we don't report end of parsed entities |
|
522 // within attributes (no SAX hooks) |
|
523 in = in.pop(); |
|
524 continue; |
|
525 } |
|
526 if ((c = getc()) == quote && in == source) { |
|
527 break; |
|
528 } |
|
529 |
|
530 // |
|
531 // Basically the "reference in attribute value" |
|
532 // row of the chart in section 4.4 of the spec |
|
533 // |
|
534 if (c == '&') { |
|
535 String entityName = maybeGetName(); |
|
536 |
|
537 if (entityName != null) { |
|
538 nextChar(';', "F-020", entityName); |
|
539 |
|
540 // 4.4 says: bypass these here ... we'll catch |
|
541 // forbidden refs to unparsed entities on use |
|
542 if (isEntityValue) { |
|
543 strTmp.append('&'); |
|
544 strTmp.append(entityName); |
|
545 strTmp.append(';'); |
|
546 continue; |
|
547 } |
|
548 expandEntityInLiteral(entityName, entities, isEntityValue); |
|
549 |
|
550 |
|
551 // character references are always included immediately |
|
552 } else if ((c = getc()) == '#') { |
|
553 int tmp = parseCharNumber(); |
|
554 |
|
555 if (tmp > 0xffff) { |
|
556 tmp = surrogatesToCharTmp(tmp); |
|
557 strTmp.append(charTmp[0]); |
|
558 if (tmp == 2) |
|
559 strTmp.append(charTmp[1]); |
|
560 } else |
|
561 strTmp.append((char) tmp); |
|
562 } else |
|
563 fatal("P-009"); |
|
564 continue; |
|
565 |
|
566 } |
|
567 |
|
568 // expand parameter entities only within entity value literals |
|
569 if (c == '%' && isEntityValue) { |
|
570 String entityName = maybeGetName(); |
|
571 |
|
572 if (entityName != null) { |
|
573 nextChar(';', "F-021", entityName); |
|
574 expandEntityInLiteral(entityName, params, isEntityValue); |
|
575 continue; |
|
576 } else |
|
577 fatal("P-011"); |
|
578 } |
|
579 |
|
580 // For attribute values ... |
|
581 if (!isEntityValue) { |
|
582 // 3.3.3 says whitespace normalizes to space... |
|
583 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
|
584 strTmp.append(' '); |
|
585 continue; |
|
586 } |
|
587 |
|
588 // "<" not legal in parsed literals ... |
|
589 if (c == '<') |
|
590 fatal("P-012"); |
|
591 } |
|
592 |
|
593 strTmp.append(c); |
|
594 } |
|
595 // isInAttribute = false; |
|
596 } |
|
597 |
|
598 // does a SINGLE expansion of the entity (often reparsed later) |
|
599 private void expandEntityInLiteral(String name, SimpleHashtable table, |
|
600 boolean isEntityValue) |
|
601 throws IOException, SAXException { |
|
602 |
|
603 Object entity = table.get(name); |
|
604 |
|
605 if (entity instanceof InternalEntity) { |
|
606 InternalEntity value = (InternalEntity) entity; |
|
607 pushReader(value.buf, name, !value.isPE); |
|
608 |
|
609 } else if (entity instanceof ExternalEntity) { |
|
610 if (!isEntityValue) // must be a PE ... |
|
611 fatal("P-013", new Object[]{name}); |
|
612 // XXX if this returns false ... |
|
613 pushReader((ExternalEntity) entity); |
|
614 |
|
615 } else if (entity == null) { |
|
616 // |
|
617 // Note: much confusion about whether spec requires such |
|
618 // errors to be fatal in many cases, but none about whether |
|
619 // it allows "normal" errors to be unrecoverable! |
|
620 // |
|
621 fatal((table == params) ? "V-022" : "P-014", |
|
622 new Object[]{name}); |
|
623 } |
|
624 } |
|
625 |
|
626 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
|
627 // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>' |
|
628 |
|
629 // NOTE: XML spec should explicitly say that PE ref syntax is |
|
630 // ignored in PIs, comments, SystemLiterals, and Pubid Literal |
|
631 // values ... can't process the XML spec's own DTD without doing |
|
632 // that for comments. |
|
633 |
|
634 private String getQuotedString(String type, String extra) |
|
635 throws IOException, SAXException { |
|
636 |
|
637 // use in.getc to bypass PE processing |
|
638 char quote = in.getc(); |
|
639 |
|
640 if (quote != '\'' && quote != '"') |
|
641 fatal("P-015", new Object[]{ |
|
642 messages.getMessage(locale, type, new Object[]{extra}) |
|
643 }); |
|
644 |
|
645 char c; |
|
646 |
|
647 strTmp = new StringBuffer(); |
|
648 while ((c = in.getc()) != quote) |
|
649 strTmp.append((char) c); |
|
650 return strTmp.toString(); |
|
651 } |
|
652 |
|
653 |
|
654 private String parsePublicId() throws IOException, SAXException { |
|
655 |
|
656 // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") |
|
657 // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] |
|
658 String retval = getQuotedString("F-033", null); |
|
659 for (int i = 0; i < retval.length(); i++) { |
|
660 char c = retval.charAt(i); |
|
661 if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 |
|
662 && !(c >= 'A' && c <= 'Z') |
|
663 && !(c >= 'a' && c <= 'z')) |
|
664 fatal("P-016", new Object[]{new Character(c)}); |
|
665 } |
|
666 strTmp = new StringBuffer(); |
|
667 strTmp.append(retval); |
|
668 return normalize(false); |
|
669 } |
|
670 |
|
671 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
|
672 // handled by: InputEntity.parsedContent() |
|
673 |
|
674 private boolean maybeComment(boolean skipStart) |
|
675 throws IOException, SAXException { |
|
676 |
|
677 // [15] Comment ::= '<!--' |
|
678 // ( (Char - '-') | ('-' (Char - '-'))* |
|
679 // '-->' |
|
680 if (!in.peek(skipStart ? "!--" : "<!--", null)) |
|
681 return false; |
|
682 |
|
683 boolean savedLexicalPE = doLexicalPE; |
|
684 boolean saveCommentText; |
|
685 |
|
686 doLexicalPE = false; |
|
687 saveCommentText = false; |
|
688 if (saveCommentText) |
|
689 strTmp = new StringBuffer(); |
|
690 |
|
691 oneComment: |
|
692 for (; ;) { |
|
693 try { |
|
694 // bypass PE expansion, but permit PEs |
|
695 // to complete ... valid docs won't care. |
|
696 for (; ;) { |
|
697 int c = getc(); |
|
698 if (c == '-') { |
|
699 c = getc(); |
|
700 if (c != '-') { |
|
701 if (saveCommentText) |
|
702 strTmp.append('-'); |
|
703 ungetc(); |
|
704 continue; |
|
705 } |
|
706 nextChar('>', "F-022", null); |
|
707 break oneComment; |
|
708 } |
|
709 if (saveCommentText) |
|
710 strTmp.append((char) c); |
|
711 } |
|
712 } catch (EndOfInputException e) { |
|
713 // |
|
714 // This is fatal EXCEPT when we're processing a PE... |
|
715 // in which case a validating processor reports an error. |
|
716 // External PEs are easy to detect; internal ones we |
|
717 // infer by being an internal entity outside an element. |
|
718 // |
|
719 if (in.isInternal()) { |
|
720 error("V-021", null); |
|
721 } |
|
722 fatal("P-017"); |
|
723 } |
|
724 } |
|
725 doLexicalPE = savedLexicalPE; |
|
726 if (saveCommentText) |
|
727 dtdHandler.comment(strTmp.toString()); |
|
728 return true; |
|
729 } |
|
730 |
|
731 private boolean maybePI(boolean skipStart) |
|
732 throws IOException, SAXException { |
|
733 |
|
734 // [16] PI ::= '<?' PITarget |
|
735 // (S (Char* - (Char* '?>' Char*)))? |
|
736 // '?>' |
|
737 // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l') |
|
738 boolean savedLexicalPE = doLexicalPE; |
|
739 |
|
740 if (!in.peek(skipStart ? "?" : "<?", null)) |
|
741 return false; |
|
742 doLexicalPE = false; |
|
743 |
|
744 String target = maybeGetName(); |
|
745 |
|
746 if (target == null) { |
|
747 fatal("P-018"); |
|
748 } |
|
749 if ("xml".equals(target)) { |
|
750 fatal("P-019"); |
|
751 } |
|
752 if ("xml".equalsIgnoreCase(target)) { |
|
753 fatal("P-020", new Object[]{target}); |
|
754 } |
|
755 |
|
756 if (maybeWhitespace()) { |
|
757 strTmp = new StringBuffer(); |
|
758 try { |
|
759 for (; ;) { |
|
760 // use in.getc to bypass PE processing |
|
761 char c = in.getc(); |
|
762 //Reached the end of PI. |
|
763 if (c == '?' && in.peekc('>')) |
|
764 break; |
|
765 strTmp.append(c); |
|
766 } |
|
767 } catch (EndOfInputException e) { |
|
768 fatal("P-021"); |
|
769 } |
|
770 dtdHandler.processingInstruction(target, strTmp.toString()); |
|
771 } else { |
|
772 if (!in.peek("?>", null)) { |
|
773 fatal("P-022"); |
|
774 } |
|
775 dtdHandler.processingInstruction(target, ""); |
|
776 } |
|
777 |
|
778 doLexicalPE = savedLexicalPE; |
|
779 return true; |
|
780 } |
|
781 |
|
782 // [18] CDSect ::= CDStart CData CDEnd |
|
783 // [19] CDStart ::= '<![CDATA[' |
|
784 // [20] CData ::= (Char* - (Char* ']]>' Char*)) |
|
785 // [21] CDEnd ::= ']]>' |
|
786 // |
|
787 // ... handled by InputEntity.unparsedContent() |
|
788 |
|
789 // collapsing several rules together ... |
|
790 // simpler than attribute literals -- no reference parsing! |
|
791 private String maybeReadAttribute(String name, boolean must) |
|
792 throws IOException, SAXException { |
|
793 |
|
794 // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\" |
|
795 // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\" |
|
796 // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\" |
|
797 if (!maybeWhitespace()) { |
|
798 if (!must) { |
|
799 return null; |
|
800 } |
|
801 fatal("P-024", new Object[]{name}); |
|
802 // NOTREACHED |
|
803 } |
|
804 |
|
805 if (!peek(name)) { |
|
806 if (must) { |
|
807 fatal("P-024", new Object[]{name}); |
|
808 } else { |
|
809 // To ensure that the whitespace is there so that when we |
|
810 // check for the next attribute we assure that the |
|
811 // whitespace still exists. |
|
812 ungetc(); |
|
813 return null; |
|
814 } |
|
815 } |
|
816 |
|
817 // [25] Eq ::= S? '=' S? |
|
818 maybeWhitespace(); |
|
819 nextChar('=', "F-023", null); |
|
820 maybeWhitespace(); |
|
821 |
|
822 return getQuotedString("F-035", name); |
|
823 } |
|
824 |
|
825 private void readVersion(boolean must, String versionNum) |
|
826 throws IOException, SAXException { |
|
827 |
|
828 String value = maybeReadAttribute("version", must); |
|
829 |
|
830 // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+ |
|
831 |
|
832 if (must && value == null) |
|
833 fatal("P-025", new Object[]{versionNum}); |
|
834 if (value != null) { |
|
835 int length = value.length(); |
|
836 for (int i = 0; i < length; i++) { |
|
837 char c = value.charAt(i); |
|
838 if (!((c >= '0' && c <= '9') |
|
839 || c == '_' || c == '.' |
|
840 || (c >= 'a' && c <= 'z') |
|
841 || (c >= 'A' && c <= 'Z') |
|
842 || c == ':' || c == '-') |
|
843 ) |
|
844 fatal("P-026", new Object[]{value}); |
|
845 } |
|
846 } |
|
847 if (value != null && !value.equals(versionNum)) |
|
848 error("P-027", new Object[]{versionNum, value}); |
|
849 } |
|
850 |
|
851 // common code used by most markup declarations |
|
852 // ... S (Q)Name ... |
|
853 private String getMarkupDeclname(String roleId, boolean qname) |
|
854 throws IOException, SAXException { |
|
855 |
|
856 String name; |
|
857 |
|
858 whitespace(roleId); |
|
859 name = maybeGetName(); |
|
860 if (name == null) |
|
861 fatal("P-005", new Object[] |
|
862 {messages.getMessage(locale, roleId)}); |
|
863 return name; |
|
864 } |
|
865 |
|
866 private boolean maybeMarkupDecl() |
|
867 throws IOException, SAXException { |
|
868 |
|
869 // [29] markupdecl ::= elementdecl | Attlistdecl |
|
870 // | EntityDecl | NotationDecl | PI | Comment |
|
871 return maybeElementDecl() |
|
872 || maybeAttlistDecl() |
|
873 || maybeEntityDecl() |
|
874 || maybeNotationDecl() |
|
875 || maybePI(false) |
|
876 || maybeComment(false); |
|
877 } |
|
878 |
|
879 private static final String XmlLang = "xml:lang"; |
|
880 |
|
881 private boolean isXmlLang(String value) { |
|
882 |
|
883 // [33] LanguageId ::= Langcode ('-' Subcode)* |
|
884 // [34] Langcode ::= ISO639Code | IanaCode | UserCode |
|
885 // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z] |
|
886 // [36] IanaCode ::= [iI] '-' SubCode |
|
887 // [37] UserCode ::= [xX] '-' SubCode |
|
888 // [38] SubCode ::= [a-zA-Z]+ |
|
889 |
|
890 // the ISO and IANA codes (and subcodes) are registered, |
|
891 // but that's neither a WF nor a validity constraint. |
|
892 |
|
893 int nextSuffix; |
|
894 char c; |
|
895 |
|
896 if (value.length() < 2) |
|
897 return false; |
|
898 c = value.charAt(1); |
|
899 if (c == '-') { // IANA, or user, code |
|
900 c = value.charAt(0); |
|
901 if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) |
|
902 return false; |
|
903 nextSuffix = 1; |
|
904 } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
|
905 // 2 letter ISO code, or error |
|
906 c = value.charAt(0); |
|
907 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) |
|
908 return false; |
|
909 nextSuffix = 2; |
|
910 } else |
|
911 return false; |
|
912 |
|
913 // here "suffix" ::= '-' [a-zA-Z]+ suffix* |
|
914 while (nextSuffix < value.length()) { |
|
915 c = value.charAt(nextSuffix); |
|
916 if (c != '-') |
|
917 break; |
|
918 while (++nextSuffix < value.length()) { |
|
919 c = value.charAt(nextSuffix); |
|
920 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) |
|
921 break; |
|
922 } |
|
923 } |
|
924 return value.length() == nextSuffix && c != '-'; |
|
925 } |
|
926 |
|
927 |
|
928 // |
|
929 // CHAPTER 3: Logical Structures |
|
930 // |
|
931 |
|
932 /** |
|
933 * To validate, subclassers should at this time make sure that |
|
934 * values are of the declared types:<UL> |
|
935 * <LI> ID and IDREF(S) values are Names |
|
936 * <LI> NMTOKEN(S) are Nmtokens |
|
937 * <LI> ENUMERATION values match one of the tokens |
|
938 * <LI> NOTATION values match a notation name |
|
939 * <LI> ENTITIY(IES) values match an unparsed external entity |
|
940 * </UL> |
|
941 * <p/> |
|
942 * <P> Separately, make sure IDREF values match some ID |
|
943 * provided in the document (in the afterRoot method). |
|
944 */ |
|
945 /* void validateAttributeSyntax (Attribute attr, String value) |
|
946 throws DTDParseException { |
|
947 // ID, IDREF(S) ... values are Names |
|
948 if (Attribute.ID == attr.type()) { |
|
949 if (!XmlNames.isName (value)) |
|
950 error ("V-025", new Object [] { value }); |
|
951 |
|
952 Boolean b = (Boolean) ids.getNonInterned (value); |
|
953 if (b == null || b.equals (Boolean.FALSE)) |
|
954 ids.put (value.intern (), Boolean.TRUE); |
|
955 else |
|
956 error ("V-026", new Object [] { value }); |
|
957 |
|
958 } else if (Attribute.IDREF == attr.type()) { |
|
959 if (!XmlNames.isName (value)) |
|
960 error ("V-027", new Object [] { value }); |
|
961 |
|
962 Boolean b = (Boolean) ids.getNonInterned (value); |
|
963 if (b == null) |
|
964 ids.put (value.intern (), Boolean.FALSE); |
|
965 |
|
966 } else if (Attribute.IDREFS == attr.type()) { |
|
967 StringTokenizer tokenizer = new StringTokenizer (value); |
|
968 Boolean b; |
|
969 boolean sawValue = false; |
|
970 |
|
971 while (tokenizer.hasMoreTokens ()) { |
|
972 value = tokenizer.nextToken (); |
|
973 if (!XmlNames.isName (value)) |
|
974 error ("V-027", new Object [] { value }); |
|
975 b = (Boolean) ids.getNonInterned (value); |
|
976 if (b == null) |
|
977 ids.put (value.intern (), Boolean.FALSE); |
|
978 sawValue = true; |
|
979 } |
|
980 if (!sawValue) |
|
981 error ("V-039", null); |
|
982 |
|
983 |
|
984 // NMTOKEN(S) ... values are Nmtoken(s) |
|
985 } else if (Attribute.NMTOKEN == attr.type()) { |
|
986 if (!XmlNames.isNmtoken (value)) |
|
987 error ("V-028", new Object [] { value }); |
|
988 |
|
989 } else if (Attribute.NMTOKENS == attr.type()) { |
|
990 StringTokenizer tokenizer = new StringTokenizer (value); |
|
991 boolean sawValue = false; |
|
992 |
|
993 while (tokenizer.hasMoreTokens ()) { |
|
994 value = tokenizer.nextToken (); |
|
995 if (!XmlNames.isNmtoken (value)) |
|
996 error ("V-028", new Object [] { value }); |
|
997 sawValue = true; |
|
998 } |
|
999 if (!sawValue) |
|
1000 error ("V-032", null); |
|
1001 |
|
1002 // ENUMERATION ... values match one of the tokens |
|
1003 } else if (Attribute.ENUMERATION == attr.type()) { |
|
1004 for (int i = 0; i < attr.values().length; i++) |
|
1005 if (value.equals (attr.values()[i])) |
|
1006 return; |
|
1007 error ("V-029", new Object [] { value }); |
|
1008 |
|
1009 // NOTATION values match a notation name |
|
1010 } else if (Attribute.NOTATION == attr.type()) { |
|
1011 // |
|
1012 // XXX XML 1.0 spec should probably list references to |
|
1013 // externally defined notations in standalone docs as |
|
1014 // validity errors. Ditto externally defined unparsed |
|
1015 // entities; neither should show up in attributes, else |
|
1016 // one needs to read the external declarations in order |
|
1017 // to make sense of the document (exactly what tagging |
|
1018 // a doc as "standalone" intends you won't need to do). |
|
1019 // |
|
1020 for (int i = 0; i < attr.values().length; i++) |
|
1021 if (value.equals (attr.values()[i])) |
|
1022 return; |
|
1023 error ("V-030", new Object [] { value }); |
|
1024 |
|
1025 // ENTITY(IES) values match an unparsed entity(ies) |
|
1026 } else if (Attribute.ENTITY == attr.type()) { |
|
1027 // see note above re standalone |
|
1028 if (!isUnparsedEntity (value)) |
|
1029 error ("V-031", new Object [] { value }); |
|
1030 |
|
1031 } else if (Attribute.ENTITIES == attr.type()) { |
|
1032 StringTokenizer tokenizer = new StringTokenizer (value); |
|
1033 boolean sawValue = false; |
|
1034 |
|
1035 while (tokenizer.hasMoreTokens ()) { |
|
1036 value = tokenizer.nextToken (); |
|
1037 // see note above re standalone |
|
1038 if (!isUnparsedEntity (value)) |
|
1039 error ("V-031", new Object [] { value }); |
|
1040 sawValue = true; |
|
1041 } |
|
1042 if (!sawValue) |
|
1043 error ("V-040", null); |
|
1044 |
|
1045 } else if (Attribute.CDATA != attr.type()) |
|
1046 throw new InternalError (attr.type()); |
|
1047 } |
|
1048 */ |
|
1049 /* |
|
1050 private boolean isUnparsedEntity (String name) |
|
1051 { |
|
1052 Object e = entities.getNonInterned (name); |
|
1053 if (e == null || !(e instanceof ExternalEntity)) |
|
1054 return false; |
|
1055 return ((ExternalEntity)e).notation != null; |
|
1056 } |
|
1057 */ |
|
1058 private boolean maybeElementDecl() |
|
1059 throws IOException, SAXException { |
|
1060 |
|
1061 // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>' |
|
1062 // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children |
|
1063 InputEntity start = peekDeclaration("!ELEMENT"); |
|
1064 |
|
1065 if (start == null) |
|
1066 return false; |
|
1067 |
|
1068 // n.b. for content models where inter-element whitespace is |
|
1069 // ignorable, we mark that fact here. |
|
1070 String name = getMarkupDeclname("F-015", true); |
|
1071 // Element element = (Element) elements.get (name); |
|
1072 // boolean declEffective = false; |
|
1073 |
|
1074 /* |
|
1075 if (element != null) { |
|
1076 if (element.contentModel() != null) { |
|
1077 error ("V-012", new Object [] { name }); |
|
1078 } // else <!ATTLIST name ...> came first |
|
1079 } else { |
|
1080 element = new Element(name); |
|
1081 elements.put (element.name(), element); |
|
1082 declEffective = true; |
|
1083 } |
|
1084 */ |
|
1085 if (declaredElements.contains(name)) |
|
1086 error("V-012", new Object[]{name}); |
|
1087 else { |
|
1088 declaredElements.add(name); |
|
1089 // declEffective = true; |
|
1090 } |
|
1091 |
|
1092 short modelType; |
|
1093 whitespace("F-000"); |
|
1094 if (peek(strEMPTY)) { |
|
1095 /// // leave element.contentModel as null for this case. |
|
1096 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY); |
|
1097 } else if (peek(strANY)) { |
|
1098 /// element.setContentModel(new StringModel(StringModelType.ANY)); |
|
1099 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY); |
|
1100 } else { |
|
1101 modelType = getMixedOrChildren(name); |
|
1102 } |
|
1103 |
|
1104 dtdHandler.endContentModel(name, modelType); |
|
1105 |
|
1106 maybeWhitespace(); |
|
1107 char c = getc(); |
|
1108 if (c != '>') |
|
1109 fatal("P-036", new Object[]{name, new Character(c)}); |
|
1110 if (start != in) |
|
1111 error("V-013", null); |
|
1112 |
|
1113 /// dtdHandler.elementDecl(element); |
|
1114 |
|
1115 return true; |
|
1116 } |
|
1117 |
|
1118 // We're leaving the content model as a regular expression; |
|
1119 // it's an efficient natural way to express such things, and |
|
1120 // libraries often interpret them. No whitespace in the |
|
1121 // model we store, though! |
|
1122 |
|
1123 /** |
|
1124 * returns content model type. |
|
1125 */ |
|
1126 private short getMixedOrChildren(String elementName/*Element element*/) |
|
1127 throws IOException, SAXException { |
|
1128 |
|
1129 InputEntity start; |
|
1130 |
|
1131 // [47] children ::= (choice|seq) ('?'|'*'|'+')? |
|
1132 strTmp = new StringBuffer(); |
|
1133 |
|
1134 nextChar('(', "F-028", elementName); |
|
1135 start = in; |
|
1136 maybeWhitespace(); |
|
1137 strTmp.append('('); |
|
1138 |
|
1139 short modelType; |
|
1140 if (peek("#PCDATA")) { |
|
1141 strTmp.append("#PCDATA"); |
|
1142 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED); |
|
1143 getMixed(elementName, start); |
|
1144 } else { |
|
1145 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN); |
|
1146 getcps(elementName, start); |
|
1147 } |
|
1148 |
|
1149 return modelType; |
|
1150 } |
|
1151 |
|
1152 // '(' S? already consumed |
|
1153 // matching ')' must be in "start" entity if validating |
|
1154 private void getcps(/*Element element,*/String elementName, InputEntity start) |
|
1155 throws IOException, SAXException { |
|
1156 |
|
1157 // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')? |
|
1158 // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')' |
|
1159 // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' |
|
1160 boolean decided = false; |
|
1161 char type = 0; |
|
1162 // ContentModel retval, temp, current; |
|
1163 |
|
1164 // retval = temp = current = null; |
|
1165 |
|
1166 dtdHandler.startModelGroup(); |
|
1167 |
|
1168 do { |
|
1169 String tag; |
|
1170 |
|
1171 tag = maybeGetName(); |
|
1172 if (tag != null) { |
|
1173 strTmp.append(tag); |
|
1174 // temp = new ElementModel(tag); |
|
1175 // getFrequency((RepeatableContent)temp); |
|
1176 ///-> |
|
1177 dtdHandler.childElement(tag, getFrequency()); |
|
1178 ///<- |
|
1179 } else if (peek("(")) { |
|
1180 InputEntity next = in; |
|
1181 strTmp.append('('); |
|
1182 maybeWhitespace(); |
|
1183 // temp = getcps(element, next); |
|
1184 // getFrequency(temp); |
|
1185 ///-> |
|
1186 getcps(elementName, next); |
|
1187 /// getFrequency(); <- this looks like a bug |
|
1188 ///<- |
|
1189 } else |
|
1190 fatal((type == 0) ? "P-039" : |
|
1191 ((type == ',') ? "P-037" : "P-038"), |
|
1192 new Object[]{new Character(getc())}); |
|
1193 |
|
1194 maybeWhitespace(); |
|
1195 if (decided) { |
|
1196 char c = getc(); |
|
1197 |
|
1198 // if (current != null) { |
|
1199 // current.addChild(temp); |
|
1200 // } |
|
1201 if (c == type) { |
|
1202 strTmp.append(type); |
|
1203 maybeWhitespace(); |
|
1204 reportConnector(type); |
|
1205 continue; |
|
1206 } else if (c == '\u0029') { // rparen |
|
1207 ungetc(); |
|
1208 continue; |
|
1209 } else { |
|
1210 fatal((type == 0) ? "P-041" : "P-040", |
|
1211 new Object[]{ |
|
1212 new Character(c), |
|
1213 new Character(type) |
|
1214 }); |
|
1215 } |
|
1216 } else { |
|
1217 type = getc(); |
|
1218 switch (type) { |
|
1219 case '|': |
|
1220 case ',': |
|
1221 reportConnector(type); |
|
1222 break; |
|
1223 default: |
|
1224 // retval = temp; |
|
1225 ungetc(); |
|
1226 continue; |
|
1227 } |
|
1228 // retval = (ContentModel)current; |
|
1229 decided = true; |
|
1230 // current.addChild(temp); |
|
1231 strTmp.append(type); |
|
1232 } |
|
1233 maybeWhitespace(); |
|
1234 } while (!peek(")")); |
|
1235 |
|
1236 if (in != start) |
|
1237 error("V-014", new Object[]{elementName}); |
|
1238 strTmp.append(')'); |
|
1239 |
|
1240 dtdHandler.endModelGroup(getFrequency()); |
|
1241 // return retval; |
|
1242 } |
|
1243 |
|
1244 private void reportConnector(char type) throws SAXException { |
|
1245 switch (type) { |
|
1246 case '|': |
|
1247 dtdHandler.connector(DTDEventListener.CHOICE); ///<- |
|
1248 return; |
|
1249 case ',': |
|
1250 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<- |
|
1251 return; |
|
1252 default: |
|
1253 throw new Error(); //assertion failed. |
|
1254 } |
|
1255 } |
|
1256 |
|
1257 private short getFrequency() |
|
1258 throws IOException, SAXException { |
|
1259 |
|
1260 final char c = getc(); |
|
1261 |
|
1262 if (c == '?') { |
|
1263 strTmp.append(c); |
|
1264 return DTDEventListener.OCCURENCE_ZERO_OR_ONE; |
|
1265 // original.setRepeat(Repeat.ZERO_OR_ONE); |
|
1266 } else if (c == '+') { |
|
1267 strTmp.append(c); |
|
1268 return DTDEventListener.OCCURENCE_ONE_OR_MORE; |
|
1269 // original.setRepeat(Repeat.ONE_OR_MORE); |
|
1270 } else if (c == '*') { |
|
1271 strTmp.append(c); |
|
1272 return DTDEventListener.OCCURENCE_ZERO_OR_MORE; |
|
1273 // original.setRepeat(Repeat.ZERO_OR_MORE); |
|
1274 } else { |
|
1275 ungetc(); |
|
1276 return DTDEventListener.OCCURENCE_ONCE; |
|
1277 } |
|
1278 } |
|
1279 |
|
1280 // '(' S? '#PCDATA' already consumed |
|
1281 // matching ')' must be in "start" entity if validating |
|
1282 private void getMixed(String elementName, /*Element element,*/ InputEntity start) |
|
1283 throws IOException, SAXException { |
|
1284 |
|
1285 // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
|
1286 // | '(' S? '#PCDATA' S? ')' |
|
1287 maybeWhitespace(); |
|
1288 if (peek("\u0029*") || peek("\u0029")) { |
|
1289 if (in != start) |
|
1290 error("V-014", new Object[]{elementName}); |
|
1291 strTmp.append(')'); |
|
1292 // element.setContentModel(new StringModel(StringModelType.PCDATA)); |
|
1293 return; |
|
1294 } |
|
1295 |
|
1296 ArrayList l = new ArrayList(); |
|
1297 // l.add(new StringModel(StringModelType.PCDATA)); |
|
1298 |
|
1299 |
|
1300 while (peek("|")) { |
|
1301 String name; |
|
1302 |
|
1303 strTmp.append('|'); |
|
1304 maybeWhitespace(); |
|
1305 |
|
1306 doLexicalPE = true; |
|
1307 name = maybeGetName(); |
|
1308 if (name == null) |
|
1309 fatal("P-042", new Object[] |
|
1310 {elementName, Integer.toHexString(getc())}); |
|
1311 if (l.contains(name)) { |
|
1312 error("V-015", new Object[]{name}); |
|
1313 } else { |
|
1314 l.add(name); |
|
1315 dtdHandler.mixedElement(name); |
|
1316 } |
|
1317 strTmp.append(name); |
|
1318 maybeWhitespace(); |
|
1319 } |
|
1320 |
|
1321 if (!peek("\u0029*")) // right paren |
|
1322 fatal("P-043", new Object[] |
|
1323 {elementName, new Character(getc())}); |
|
1324 if (in != start) |
|
1325 error("V-014", new Object[]{elementName}); |
|
1326 strTmp.append(')'); |
|
1327 // ChoiceModel cm = new ChoiceModel((Collection)l); |
|
1328 // cm.setRepeat(Repeat.ZERO_OR_MORE); |
|
1329 // element.setContentModel(cm); |
|
1330 } |
|
1331 |
|
1332 private boolean maybeAttlistDecl() |
|
1333 throws IOException, SAXException { |
|
1334 |
|
1335 // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' |
|
1336 InputEntity start = peekDeclaration("!ATTLIST"); |
|
1337 |
|
1338 if (start == null) |
|
1339 return false; |
|
1340 |
|
1341 String elementName = getMarkupDeclname("F-016", true); |
|
1342 // Element element = (Element) elements.get (name); |
|
1343 |
|
1344 // if (element == null) { |
|
1345 // // not yet declared -- no problem. |
|
1346 // element = new Element(name); |
|
1347 // elements.put(name, element); |
|
1348 // } |
|
1349 |
|
1350 while (!peek(">")) { |
|
1351 |
|
1352 // [53] AttDef ::= S Name S AttType S DefaultDecl |
|
1353 // [54] AttType ::= StringType | TokenizedType | EnumeratedType |
|
1354 |
|
1355 // look for global attribute definitions, don't expand for now... |
|
1356 maybeWhitespace(); |
|
1357 char c = getc(); |
|
1358 if (c == '%') { |
|
1359 String entityName = maybeGetName(); |
|
1360 if (entityName != null) { |
|
1361 nextChar(';', "F-021", entityName); |
|
1362 whitespace("F-021"); |
|
1363 continue; |
|
1364 } else |
|
1365 fatal("P-011"); |
|
1366 } |
|
1367 |
|
1368 ungetc(); |
|
1369 // look for attribute name otherwise |
|
1370 String attName = maybeGetName(); |
|
1371 if (attName == null) { |
|
1372 fatal("P-044", new Object[]{new Character(getc())}); |
|
1373 } |
|
1374 whitespace("F-001"); |
|
1375 |
|
1376 /// Attribute a = new Attribute (name); |
|
1377 |
|
1378 String typeName; |
|
1379 Vector values = null; // notation/enumeration values |
|
1380 |
|
1381 // Note: use the type constants from Attribute |
|
1382 // so that "==" may be used (faster) |
|
1383 |
|
1384 // [55] StringType ::= 'CDATA' |
|
1385 if (peek(TYPE_CDATA)) |
|
1386 /// a.setType(Attribute.CDATA); |
|
1387 typeName = TYPE_CDATA; |
|
1388 |
|
1389 // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' |
|
1390 // | 'ENTITY' | 'ENTITIES' |
|
1391 // | 'NMTOKEN' | 'NMTOKENS' |
|
1392 // n.b. if "IDREFS" is there, both "ID" and "IDREF" |
|
1393 // match peekahead ... so this order matters! |
|
1394 else if (peek(TYPE_IDREFS)) |
|
1395 typeName = TYPE_IDREFS; |
|
1396 else if (peek(TYPE_IDREF)) |
|
1397 typeName = TYPE_IDREF; |
|
1398 else if (peek(TYPE_ID)) { |
|
1399 typeName = TYPE_ID; |
|
1400 // TODO: should implement this error check? |
|
1401 /// if (element.id() != null) { |
|
1402 /// error ("V-016", new Object [] { element.id() }); |
|
1403 /// } else |
|
1404 /// element.setId(name); |
|
1405 } else if (peek(TYPE_ENTITY)) |
|
1406 typeName = TYPE_ENTITY; |
|
1407 else if (peek(TYPE_ENTITIES)) |
|
1408 typeName = TYPE_ENTITIES; |
|
1409 else if (peek(TYPE_NMTOKENS)) |
|
1410 typeName = TYPE_NMTOKENS; |
|
1411 else if (peek(TYPE_NMTOKEN)) |
|
1412 typeName = TYPE_NMTOKEN; |
|
1413 |
|
1414 // [57] EnumeratedType ::= NotationType | Enumeration |
|
1415 // [58] NotationType ::= 'NOTATION' S '(' S? Name |
|
1416 // (S? '|' S? Name)* S? ')' |
|
1417 else if (peek(TYPE_NOTATION)) { |
|
1418 typeName = TYPE_NOTATION; |
|
1419 whitespace("F-002"); |
|
1420 nextChar('(', "F-029", null); |
|
1421 maybeWhitespace(); |
|
1422 |
|
1423 values = new Vector(); |
|
1424 do { |
|
1425 String name; |
|
1426 if ((name = maybeGetName()) == null) |
|
1427 fatal("P-068"); |
|
1428 // permit deferred declarations |
|
1429 if (notations.get(name) == null) |
|
1430 notations.put(name, name); |
|
1431 values.addElement(name); |
|
1432 maybeWhitespace(); |
|
1433 if (peek("|")) |
|
1434 maybeWhitespace(); |
|
1435 } while (!peek(")")); |
|
1436 /// a.setValues(new String [v.size ()]); |
|
1437 /// for (int i = 0; i < v.size (); i++) |
|
1438 /// a.setValue(i, (String)v.elementAt(i)); |
|
1439 |
|
1440 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')' |
|
1441 } else if (peek("(")) { |
|
1442 /// a.setType(Attribute.ENUMERATION); |
|
1443 typeName = TYPE_ENUMERATION; |
|
1444 |
|
1445 maybeWhitespace(); |
|
1446 |
|
1447 /// Vector v = new Vector (); |
|
1448 values = new Vector(); |
|
1449 do { |
|
1450 String name = getNmtoken(); |
|
1451 /// v.addElement (name); |
|
1452 values.addElement(name); |
|
1453 maybeWhitespace(); |
|
1454 if (peek("|")) |
|
1455 maybeWhitespace(); |
|
1456 } while (!peek(")")); |
|
1457 /// a.setValues(new String [v.size ()]); |
|
1458 /// for (int i = 0; i < v.size (); i++) |
|
1459 /// a.setValue(i, (String)v.elementAt(i)); |
|
1460 } else { |
|
1461 fatal("P-045", |
|
1462 new Object[]{attName, new Character(getc())}); |
|
1463 typeName = null; |
|
1464 } |
|
1465 |
|
1466 short attributeUse; |
|
1467 String defaultValue = null; |
|
1468 |
|
1469 // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' |
|
1470 // | (('#FIXED' S)? AttValue) |
|
1471 whitespace("F-003"); |
|
1472 if (peek("#REQUIRED")) |
|
1473 attributeUse = DTDEventListener.USE_REQUIRED; |
|
1474 /// a.setIsRequired(true); |
|
1475 else if (peek("#FIXED")) { |
|
1476 /// if (a.type() == Attribute.ID) |
|
1477 if (typeName == TYPE_ID) |
|
1478 error("V-017", new Object[]{attName}); |
|
1479 /// a.setIsFixed(true); |
|
1480 attributeUse = DTDEventListener.USE_FIXED; |
|
1481 whitespace("F-004"); |
|
1482 parseLiteral(false); |
|
1483 /// if (a.type() != Attribute.CDATA) |
|
1484 /// a.setDefaultValue(normalize(false)); |
|
1485 /// else |
|
1486 /// a.setDefaultValue(strTmp.toString()); |
|
1487 |
|
1488 if (typeName == TYPE_CDATA) |
|
1489 defaultValue = normalize(false); |
|
1490 else |
|
1491 defaultValue = strTmp.toString(); |
|
1492 |
|
1493 // TODO: implement this check |
|
1494 /// if (a.type() != Attribute.CDATA) |
|
1495 /// validateAttributeSyntax (a, a.defaultValue()); |
|
1496 } else if (!peek("#IMPLIED")) { |
|
1497 attributeUse = DTDEventListener.USE_IMPLIED; |
|
1498 |
|
1499 /// if (a.type() == Attribute.ID) |
|
1500 if (typeName == TYPE_ID) |
|
1501 error("V-018", new Object[]{attName}); |
|
1502 parseLiteral(false); |
|
1503 /// if (a.type() != Attribute.CDATA) |
|
1504 /// a.setDefaultValue(normalize(false)); |
|
1505 /// else |
|
1506 /// a.setDefaultValue(strTmp.toString()); |
|
1507 if (typeName == TYPE_CDATA) |
|
1508 defaultValue = normalize(false); |
|
1509 else |
|
1510 defaultValue = strTmp.toString(); |
|
1511 |
|
1512 // TODO: implement this check |
|
1513 /// if (a.type() != Attribute.CDATA) |
|
1514 /// validateAttributeSyntax (a, a.defaultValue()); |
|
1515 } else { |
|
1516 // TODO: this looks like an fatal error. |
|
1517 attributeUse = DTDEventListener.USE_NORMAL; |
|
1518 } |
|
1519 |
|
1520 if (XmlLang.equals(attName) |
|
1521 && defaultValue/* a.defaultValue()*/ != null |
|
1522 && !isXmlLang(defaultValue/*a.defaultValue()*/)) |
|
1523 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/}); |
|
1524 |
|
1525 // TODO: isn't it an error to specify the same attribute twice? |
|
1526 /// if (!element.attributes().contains(a)) { |
|
1527 /// element.addAttribute(a); |
|
1528 /// dtdHandler.attributeDecl(a); |
|
1529 /// } |
|
1530 |
|
1531 String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null; |
|
1532 dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue); |
|
1533 maybeWhitespace(); |
|
1534 } |
|
1535 if (start != in) |
|
1536 error("V-013", null); |
|
1537 return true; |
|
1538 } |
|
1539 |
|
1540 // used when parsing literal attribute values, |
|
1541 // or public identifiers. |
|
1542 // |
|
1543 // input in strTmp |
|
1544 private String normalize(boolean invalidIfNeeded) { |
|
1545 |
|
1546 // this can allocate an extra string... |
|
1547 |
|
1548 String s = strTmp.toString(); |
|
1549 String s2 = s.trim(); |
|
1550 boolean didStrip = false; |
|
1551 |
|
1552 if (s != s2) { |
|
1553 s = s2; |
|
1554 s2 = null; |
|
1555 didStrip = true; |
|
1556 } |
|
1557 strTmp = new StringBuffer(); |
|
1558 for (int i = 0; i < s.length(); i++) { |
|
1559 char c = s.charAt(i); |
|
1560 if (!XmlChars.isSpace(c)) { |
|
1561 strTmp.append(c); |
|
1562 continue; |
|
1563 } |
|
1564 strTmp.append(' '); |
|
1565 while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) |
|
1566 didStrip = true; |
|
1567 i--; |
|
1568 } |
|
1569 if (didStrip) |
|
1570 return strTmp.toString(); |
|
1571 else |
|
1572 return s; |
|
1573 } |
|
1574 |
|
1575 private boolean maybeConditionalSect() |
|
1576 throws IOException, SAXException { |
|
1577 |
|
1578 // [61] conditionalSect ::= includeSect | ignoreSect |
|
1579 |
|
1580 if (!peek("<![")) |
|
1581 return false; |
|
1582 |
|
1583 String keyword; |
|
1584 InputEntity start = in; |
|
1585 |
|
1586 maybeWhitespace(); |
|
1587 |
|
1588 if ((keyword = maybeGetName()) == null) |
|
1589 fatal("P-046"); |
|
1590 maybeWhitespace(); |
|
1591 nextChar('[', "F-030", null); |
|
1592 |
|
1593 // [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' |
|
1594 // extSubsetDecl ']]>' |
|
1595 if ("INCLUDE".equals(keyword)) { |
|
1596 for (; ;) { |
|
1597 while (in.isEOF() && in != start) |
|
1598 in = in.pop(); |
|
1599 if (in.isEOF()) { |
|
1600 error("V-020", null); |
|
1601 } |
|
1602 if (peek("]]>")) |
|
1603 break; |
|
1604 |
|
1605 doLexicalPE = false; |
|
1606 if (maybeWhitespace()) |
|
1607 continue; |
|
1608 if (maybePEReference()) |
|
1609 continue; |
|
1610 doLexicalPE = true; |
|
1611 if (maybeMarkupDecl() || maybeConditionalSect()) |
|
1612 continue; |
|
1613 |
|
1614 fatal("P-047"); |
|
1615 } |
|
1616 |
|
1617 // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' |
|
1618 // ignoreSectcontents ']]>' |
|
1619 // [64] ignoreSectcontents ::= Ignore ('<![' |
|
1620 // ignoreSectcontents ']]>' Ignore)* |
|
1621 // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) |
|
1622 } else if ("IGNORE".equals(keyword)) { |
|
1623 int nestlevel = 1; |
|
1624 // ignoreSectcontents |
|
1625 doLexicalPE = false; |
|
1626 while (nestlevel > 0) { |
|
1627 char c = getc(); // will pop input entities |
|
1628 if (c == '<') { |
|
1629 if (peek("![")) |
|
1630 nestlevel++; |
|
1631 } else if (c == ']') { |
|
1632 if (peek("]>")) |
|
1633 nestlevel--; |
|
1634 } else |
|
1635 continue; |
|
1636 } |
|
1637 } else |
|
1638 fatal("P-048", new Object[]{keyword}); |
|
1639 return true; |
|
1640 } |
|
1641 |
|
1642 |
|
1643 // |
|
1644 // CHAPTER 4: Physical Structures |
|
1645 // |
|
1646 |
|
1647 // parse decimal or hex numeric character reference |
|
1648 private int parseCharNumber() |
|
1649 throws IOException, SAXException { |
|
1650 |
|
1651 char c; |
|
1652 int retval = 0; |
|
1653 |
|
1654 // n.b. we ignore overflow ... |
|
1655 if (getc() != 'x') { |
|
1656 ungetc(); |
|
1657 for (; ;) { |
|
1658 c = getc(); |
|
1659 if (c >= '0' && c <= '9') { |
|
1660 retval *= 10; |
|
1661 retval += (c - '0'); |
|
1662 continue; |
|
1663 } |
|
1664 if (c == ';') |
|
1665 return retval; |
|
1666 fatal("P-049"); |
|
1667 } |
|
1668 } else |
|
1669 for (; ;) { |
|
1670 c = getc(); |
|
1671 if (c >= '0' && c <= '9') { |
|
1672 retval <<= 4; |
|
1673 retval += (c - '0'); |
|
1674 continue; |
|
1675 } |
|
1676 if (c >= 'a' && c <= 'f') { |
|
1677 retval <<= 4; |
|
1678 retval += 10 + (c - 'a'); |
|
1679 continue; |
|
1680 } |
|
1681 if (c >= 'A' && c <= 'F') { |
|
1682 retval <<= 4; |
|
1683 retval += 10 + (c - 'A'); |
|
1684 continue; |
|
1685 } |
|
1686 if (c == ';') |
|
1687 return retval; |
|
1688 fatal("P-050"); |
|
1689 } |
|
1690 } |
|
1691 |
|
1692 // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE, |
|
1693 // though still subject to the 'Char' construct in XML |
|
1694 private int surrogatesToCharTmp(int ucs4) |
|
1695 throws SAXException { |
|
1696 |
|
1697 if (ucs4 <= 0xffff) { |
|
1698 if (XmlChars.isChar(ucs4)) { |
|
1699 charTmp[0] = (char) ucs4; |
|
1700 return 1; |
|
1701 } |
|
1702 } else if (ucs4 <= 0x0010ffff) { |
|
1703 // we represent these as UNICODE surrogate pairs |
|
1704 ucs4 -= 0x10000; |
|
1705 charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff)); |
|
1706 charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff)); |
|
1707 return 2; |
|
1708 } |
|
1709 fatal("P-051", new Object[]{Integer.toHexString(ucs4)}); |
|
1710 // NOTREACHED |
|
1711 return -1; |
|
1712 } |
|
1713 |
|
1714 private boolean maybePEReference() |
|
1715 throws IOException, SAXException { |
|
1716 |
|
1717 // This is the SYNTACTIC version of this construct. |
|
1718 // When processing external entities, there is also |
|
1719 // a LEXICAL version; see getc() and doLexicalPE. |
|
1720 |
|
1721 // [69] PEReference ::= '%' Name ';' |
|
1722 if (!in.peekc('%')) |
|
1723 return false; |
|
1724 |
|
1725 String name = maybeGetName(); |
|
1726 Object entity; |
|
1727 |
|
1728 if (name == null) |
|
1729 fatal("P-011"); |
|
1730 nextChar(';', "F-021", name); |
|
1731 entity = params.get(name); |
|
1732 |
|
1733 if (entity instanceof InternalEntity) { |
|
1734 InternalEntity value = (InternalEntity) entity; |
|
1735 pushReader(value.buf, name, false); |
|
1736 |
|
1737 } else if (entity instanceof ExternalEntity) { |
|
1738 pushReader((ExternalEntity) entity); |
|
1739 externalParameterEntity((ExternalEntity) entity); |
|
1740 |
|
1741 } else if (entity == null) { |
|
1742 error("V-022", new Object[]{name}); |
|
1743 } |
|
1744 return true; |
|
1745 } |
|
1746 |
|
1747 private boolean maybeEntityDecl() |
|
1748 throws IOException, SAXException { |
|
1749 |
|
1750 // [70] EntityDecl ::= GEDecl | PEDecl |
|
1751 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
|
1752 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>' |
|
1753 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) |
|
1754 // [74] PEDef ::= EntityValue | ExternalID |
|
1755 // |
|
1756 InputEntity start = peekDeclaration("!ENTITY"); |
|
1757 |
|
1758 if (start == null) |
|
1759 return false; |
|
1760 |
|
1761 String entityName; |
|
1762 SimpleHashtable defns; |
|
1763 ExternalEntity externalId; |
|
1764 boolean doStore; |
|
1765 |
|
1766 // PE expansion gets selectively turned off several places: |
|
1767 // in ENTITY declarations (here), in comments, in PIs. |
|
1768 |
|
1769 // Here, we allow PE entities to be declared, and allows |
|
1770 // literals to include PE refs without the added spaces |
|
1771 // required with their expansion in markup decls. |
|
1772 |
|
1773 doLexicalPE = false; |
|
1774 whitespace("F-005"); |
|
1775 if (in.peekc('%')) { |
|
1776 whitespace("F-006"); |
|
1777 defns = params; |
|
1778 } else |
|
1779 defns = entities; |
|
1780 |
|
1781 ungetc(); // leave some whitespace |
|
1782 doLexicalPE = true; |
|
1783 entityName = getMarkupDeclname("F-017", false); |
|
1784 whitespace("F-007"); |
|
1785 externalId = maybeExternalID(); |
|
1786 |
|
1787 // |
|
1788 // first definition sticks ... e.g. internal subset PEs are used |
|
1789 // to override DTD defaults. It's also an "error" to incorrectly |
|
1790 // redefine builtin internal entities, but since reporting such |
|
1791 // errors is optional we only give warnings ("just in case") for |
|
1792 // non-parameter entities. |
|
1793 // |
|
1794 doStore = (defns.get(entityName) == null); |
|
1795 if (!doStore && defns == entities) |
|
1796 warning("P-054", new Object[]{entityName}); |
|
1797 |
|
1798 // internal entities |
|
1799 if (externalId == null) { |
|
1800 char value []; |
|
1801 InternalEntity entity; |
|
1802 |
|
1803 doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd" |
|
1804 parseLiteral(true); |
|
1805 doLexicalPE = true; |
|
1806 if (doStore) { |
|
1807 value = new char[strTmp.length()]; |
|
1808 if (value.length != 0) |
|
1809 strTmp.getChars(0, value.length, value, 0); |
|
1810 entity = new InternalEntity(entityName, value); |
|
1811 entity.isPE = (defns == params); |
|
1812 entity.isFromInternalSubset = false; |
|
1813 defns.put(entityName, entity); |
|
1814 if (defns == entities) |
|
1815 dtdHandler.internalGeneralEntityDecl(entityName, |
|
1816 new String(value)); |
|
1817 } |
|
1818 |
|
1819 // external entities (including unparsed) |
|
1820 } else { |
|
1821 // [76] NDataDecl ::= S 'NDATA' S Name |
|
1822 if (defns == entities && maybeWhitespace() |
|
1823 && peek("NDATA")) { |
|
1824 externalId.notation = getMarkupDeclname("F-018", false); |
|
1825 |
|
1826 // flag undeclared notation for checking after |
|
1827 // the DTD is fully processed |
|
1828 if (notations.get(externalId.notation) == null) |
|
1829 notations.put(externalId.notation, Boolean.TRUE); |
|
1830 } |
|
1831 externalId.name = entityName; |
|
1832 externalId.isPE = (defns == params); |
|
1833 externalId.isFromInternalSubset = false; |
|
1834 if (doStore) { |
|
1835 defns.put(entityName, externalId); |
|
1836 if (externalId.notation != null) |
|
1837 dtdHandler.unparsedEntityDecl(entityName, |
|
1838 externalId.publicId, externalId.systemId, |
|
1839 externalId.notation); |
|
1840 else if (defns == entities) |
|
1841 dtdHandler.externalGeneralEntityDecl(entityName, |
|
1842 externalId.publicId, externalId.systemId); |
|
1843 } |
|
1844 } |
|
1845 maybeWhitespace(); |
|
1846 nextChar('>', "F-031", entityName); |
|
1847 if (start != in) |
|
1848 error("V-013", null); |
|
1849 return true; |
|
1850 } |
|
1851 |
|
1852 private ExternalEntity maybeExternalID() |
|
1853 throws IOException, SAXException { |
|
1854 |
|
1855 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral |
|
1856 // | 'PUBLIC' S' PubidLiteral S Systemliteral |
|
1857 String temp = null; |
|
1858 ExternalEntity retval; |
|
1859 |
|
1860 if (peek("PUBLIC")) { |
|
1861 whitespace("F-009"); |
|
1862 temp = parsePublicId(); |
|
1863 } else if (!peek("SYSTEM")) |
|
1864 return null; |
|
1865 |
|
1866 retval = new ExternalEntity(in); |
|
1867 retval.publicId = temp; |
|
1868 whitespace("F-008"); |
|
1869 retval.systemId = parseSystemId(); |
|
1870 return retval; |
|
1871 } |
|
1872 |
|
1873 private String parseSystemId() |
|
1874 throws IOException, SAXException { |
|
1875 |
|
1876 String uri = getQuotedString("F-034", null); |
|
1877 int temp = uri.indexOf(':'); |
|
1878 |
|
1879 // resolve relative URIs ... must do it here since |
|
1880 // it's relative to the source file holding the URI! |
|
1881 |
|
1882 // "new java.net.URL (URL, string)" conforms to RFC 1630, |
|
1883 // but we can't use that except when the URI is a URL. |
|
1884 // The entity resolver is allowed to handle URIs that are |
|
1885 // not URLs, so we pass URIs through with scheme intact |
|
1886 if (temp == -1 || uri.indexOf('/') < temp) { |
|
1887 String baseURI; |
|
1888 |
|
1889 baseURI = in.getSystemId(); |
|
1890 if (baseURI == null) |
|
1891 fatal("P-055", new Object[]{uri}); |
|
1892 if (uri.length() == 0) |
|
1893 uri = "."; |
|
1894 baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1); |
|
1895 if (uri.charAt(0) != '/') |
|
1896 uri = baseURI + uri; |
|
1897 else { |
|
1898 // XXX slashes at the beginning of a relative URI are |
|
1899 // a special case we don't handle. |
|
1900 throw new InternalError(); |
|
1901 } |
|
1902 |
|
1903 // letting other code map any "/xxx/../" or "/./" to "/", |
|
1904 // since all URIs must handle it the same. |
|
1905 } |
|
1906 // check for fragment ID in URI |
|
1907 if (uri.indexOf('#') != -1) |
|
1908 error("P-056", new Object[]{uri}); |
|
1909 return uri; |
|
1910 } |
|
1911 |
|
1912 private void maybeTextDecl() |
|
1913 throws IOException, SAXException { |
|
1914 |
|
1915 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' |
|
1916 if (peek("<?xml")) { |
|
1917 readVersion(false, "1.0"); |
|
1918 readEncoding(true); |
|
1919 maybeWhitespace(); |
|
1920 if (!peek("?>")) |
|
1921 fatal("P-057"); |
|
1922 } |
|
1923 } |
|
1924 |
|
1925 private void externalParameterEntity(ExternalEntity next) |
|
1926 throws IOException, SAXException { |
|
1927 |
|
1928 // |
|
1929 // Reap the intended benefits of standalone declarations: |
|
1930 // don't deal with external parameter entities, except to |
|
1931 // validate the standalone declaration. |
|
1932 // |
|
1933 |
|
1934 // n.b. "in external parameter entities" (and external |
|
1935 // DTD subset, same grammar) parameter references can |
|
1936 // occur "within" markup declarations ... expansions can |
|
1937 // cross syntax rules. Flagged here; affects getc(). |
|
1938 |
|
1939 // [79] ExtPE ::= TextDecl? extSubsetDecl |
|
1940 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect |
|
1941 // | PEReference | S )* |
|
1942 InputEntity pe; |
|
1943 |
|
1944 // XXX if this returns false ... |
|
1945 |
|
1946 pe = in; |
|
1947 maybeTextDecl(); |
|
1948 while (!pe.isEOF()) { |
|
1949 // pop internal PEs (and whitespace before/after) |
|
1950 if (in.isEOF()) { |
|
1951 in = in.pop(); |
|
1952 continue; |
|
1953 } |
|
1954 doLexicalPE = false; |
|
1955 if (maybeWhitespace()) |
|
1956 continue; |
|
1957 if (maybePEReference()) |
|
1958 continue; |
|
1959 doLexicalPE = true; |
|
1960 if (maybeMarkupDecl() || maybeConditionalSect()) |
|
1961 continue; |
|
1962 break; |
|
1963 } |
|
1964 // if (in != pe) throw new InternalError("who popped my PE?"); |
|
1965 if (!pe.isEOF()) |
|
1966 fatal("P-059", new Object[]{in.getName()}); |
|
1967 } |
|
1968 |
|
1969 private void readEncoding(boolean must) |
|
1970 throws IOException, SAXException { |
|
1971 |
|
1972 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* |
|
1973 String name = maybeReadAttribute("encoding", must); |
|
1974 |
|
1975 if (name == null) |
|
1976 return; |
|
1977 for (int i = 0; i < name.length(); i++) { |
|
1978 char c = name.charAt(i); |
|
1979 if ((c >= 'A' && c <= 'Z') |
|
1980 || (c >= 'a' && c <= 'z')) |
|
1981 continue; |
|
1982 if (i != 0 |
|
1983 && ((c >= '0' && c <= '9') |
|
1984 || c == '-' |
|
1985 || c == '_' |
|
1986 || c == '.' |
|
1987 )) |
|
1988 continue; |
|
1989 fatal("P-060", new Object[]{new Character(c)}); |
|
1990 } |
|
1991 |
|
1992 // |
|
1993 // This should be the encoding in use, and it's even an error for |
|
1994 // it to be anything else (in certain cases that are impractical to |
|
1995 // to test, and may even be insufficient). So, we do the best we |
|
1996 // can, and warn if things look suspicious. Note that Java doesn't |
|
1997 // uniformly expose the encodings, and that the names it uses |
|
1998 // internally are nonstandard. Also, that the XML spec allows |
|
1999 // such "errors" not to be reported at all. |
|
2000 // |
|
2001 String currentEncoding = in.getEncoding(); |
|
2002 |
|
2003 if (currentEncoding != null |
|
2004 && !name.equalsIgnoreCase(currentEncoding)) |
|
2005 warning("P-061", new Object[]{name, currentEncoding}); |
|
2006 } |
|
2007 |
|
2008 private boolean maybeNotationDecl() |
|
2009 throws IOException, SAXException { |
|
2010 |
|
2011 // [82] NotationDecl ::= '<!NOTATION' S Name S |
|
2012 // (ExternalID | PublicID) S? '>' |
|
2013 // [83] PublicID ::= 'PUBLIC' S PubidLiteral |
|
2014 InputEntity start = peekDeclaration("!NOTATION"); |
|
2015 |
|
2016 if (start == null) |
|
2017 return false; |
|
2018 |
|
2019 String name = getMarkupDeclname("F-019", false); |
|
2020 ExternalEntity entity = new ExternalEntity(in); |
|
2021 |
|
2022 whitespace("F-011"); |
|
2023 if (peek("PUBLIC")) { |
|
2024 whitespace("F-009"); |
|
2025 entity.publicId = parsePublicId(); |
|
2026 if (maybeWhitespace()) { |
|
2027 if (!peek(">")) |
|
2028 entity.systemId = parseSystemId(); |
|
2029 else |
|
2030 ungetc(); |
|
2031 } |
|
2032 } else if (peek("SYSTEM")) { |
|
2033 whitespace("F-008"); |
|
2034 entity.systemId = parseSystemId(); |
|
2035 } else |
|
2036 fatal("P-062"); |
|
2037 maybeWhitespace(); |
|
2038 nextChar('>', "F-032", name); |
|
2039 if (start != in) |
|
2040 error("V-013", null); |
|
2041 if (entity.systemId != null && entity.systemId.indexOf('#') != -1) |
|
2042 error("P-056", new Object[]{entity.systemId}); |
|
2043 |
|
2044 Object value = notations.get(name); |
|
2045 if (value != null && value instanceof ExternalEntity) |
|
2046 warning("P-063", new Object[]{name}); |
|
2047 |
|
2048 else { |
|
2049 notations.put(name, entity); |
|
2050 dtdHandler.notationDecl(name, entity.publicId, |
|
2051 entity.systemId); |
|
2052 } |
|
2053 return true; |
|
2054 } |
|
2055 |
|
2056 |
|
2057 //////////////////////////////////////////////////////////////// |
|
2058 // |
|
2059 // UTILITIES |
|
2060 // |
|
2061 //////////////////////////////////////////////////////////////// |
|
2062 |
|
2063 private char getc() throws IOException, SAXException { |
|
2064 |
|
2065 if (!doLexicalPE) { |
|
2066 char c = in.getc(); |
|
2067 return c; |
|
2068 } |
|
2069 |
|
2070 // |
|
2071 // External parameter entities get funky processing of '%param;' |
|
2072 // references. It's not clearly defined in the XML spec; but it |
|
2073 // boils down to having those refs be _lexical_ in most cases to |
|
2074 // include partial syntax productions. It also needs selective |
|
2075 // enabling; "<!ENTITY % foo ...>" must work, for example, and |
|
2076 // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd" |
|
2077 // if it's expanded in a literal, else "ab cd". PEs also do |
|
2078 // not expand within comments or PIs, and external PEs are only |
|
2079 // allowed to have markup decls (and so aren't handled lexically). |
|
2080 // |
|
2081 // This PE handling should be merged into maybeWhitespace, where |
|
2082 // it can be dealt with more consistently. |
|
2083 // |
|
2084 // Also, there are some validity constraints in this area. |
|
2085 // |
|
2086 char c; |
|
2087 |
|
2088 while (in.isEOF()) { |
|
2089 if (in.isInternal() || (doLexicalPE && !in.isDocument())) |
|
2090 in = in.pop(); |
|
2091 else { |
|
2092 fatal("P-064", new Object[]{in.getName()}); |
|
2093 } |
|
2094 } |
|
2095 if ((c = in.getc()) == '%' && doLexicalPE) { |
|
2096 // PE ref ::= '%' name ';' |
|
2097 String name = maybeGetName(); |
|
2098 Object entity; |
|
2099 |
|
2100 if (name == null) |
|
2101 fatal("P-011"); |
|
2102 nextChar(';', "F-021", name); |
|
2103 entity = params.get(name); |
|
2104 |
|
2105 // push a magic "entity" before and after the |
|
2106 // real one, so ungetc() behaves uniformly |
|
2107 pushReader(" ".toCharArray(), null, false); |
|
2108 if (entity instanceof InternalEntity) |
|
2109 pushReader(((InternalEntity) entity).buf, name, false); |
|
2110 else if (entity instanceof ExternalEntity) |
|
2111 // PEs can't be unparsed! |
|
2112 // XXX if this returns false ... |
|
2113 pushReader((ExternalEntity) entity); |
|
2114 else if (entity == null) |
|
2115 // see note in maybePEReference re making this be nonfatal. |
|
2116 fatal("V-022"); |
|
2117 else |
|
2118 throw new InternalError(); |
|
2119 pushReader(" ".toCharArray(), null, false); |
|
2120 return in.getc(); |
|
2121 } |
|
2122 return c; |
|
2123 } |
|
2124 |
|
2125 private void ungetc() { |
|
2126 |
|
2127 in.ungetc(); |
|
2128 } |
|
2129 |
|
2130 private boolean peek(String s) |
|
2131 throws IOException, SAXException { |
|
2132 |
|
2133 return in.peek(s, null); |
|
2134 } |
|
2135 |
|
2136 // Return the entity starting the specified declaration |
|
2137 // (for validating declaration nesting) else null. |
|
2138 |
|
2139 private InputEntity peekDeclaration(String s) |
|
2140 throws IOException, SAXException { |
|
2141 |
|
2142 InputEntity start; |
|
2143 |
|
2144 if (!in.peekc('<')) |
|
2145 return null; |
|
2146 start = in; |
|
2147 if (in.peek(s, null)) |
|
2148 return start; |
|
2149 in.ungetc(); |
|
2150 return null; |
|
2151 } |
|
2152 |
|
2153 private void nextChar(char c, String location, String near) |
|
2154 throws IOException, SAXException { |
|
2155 |
|
2156 while (in.isEOF() && !in.isDocument()) |
|
2157 in = in.pop(); |
|
2158 if (!in.peekc(c)) |
|
2159 fatal("P-008", new Object[] |
|
2160 {new Character(c), |
|
2161 messages.getMessage(locale, location), |
|
2162 (near == null ? "" : ('"' + near + '"'))}); |
|
2163 } |
|
2164 |
|
2165 |
|
2166 private void pushReader(char buf [], String name, boolean isGeneral) |
|
2167 throws SAXException { |
|
2168 |
|
2169 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); |
|
2170 r.init(buf, name, in, !isGeneral); |
|
2171 in = r; |
|
2172 } |
|
2173 |
|
2174 private boolean pushReader(ExternalEntity next) |
|
2175 throws IOException, SAXException { |
|
2176 |
|
2177 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); |
|
2178 InputSource s; |
|
2179 try { |
|
2180 s = next.getInputSource(resolver); |
|
2181 } catch (IOException e) { |
|
2182 String msg = |
|
2183 "unable to open the external entity from :" + next.systemId; |
|
2184 if (next.publicId != null) |
|
2185 msg += " (public id:" + next.publicId + ")"; |
|
2186 |
|
2187 SAXParseException spe = new SAXParseException(msg, |
|
2188 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e); |
|
2189 dtdHandler.fatalError(spe); |
|
2190 throw e; |
|
2191 } |
|
2192 |
|
2193 r.init(s, next.name, in, next.isPE); |
|
2194 in = r; |
|
2195 return true; |
|
2196 } |
|
2197 |
|
2198 public String getPublicId() { |
|
2199 |
|
2200 return (in == null) ? null : in.getPublicId(); |
|
2201 } |
|
2202 |
|
2203 public String getSystemId() { |
|
2204 |
|
2205 return (in == null) ? null : in.getSystemId(); |
|
2206 } |
|
2207 |
|
2208 public int getLineNumber() { |
|
2209 |
|
2210 return (in == null) ? -1 : in.getLineNumber(); |
|
2211 } |
|
2212 |
|
2213 public int getColumnNumber() { |
|
2214 |
|
2215 return (in == null) ? -1 : in.getColumnNumber(); |
|
2216 } |
|
2217 |
|
2218 // error handling convenience routines |
|
2219 |
|
2220 private void warning(String messageId, Object parameters []) |
|
2221 throws SAXException { |
|
2222 |
|
2223 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
|
2224 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
|
2225 |
|
2226 dtdHandler.warning(e); |
|
2227 } |
|
2228 |
|
2229 void error(String messageId, Object parameters []) |
|
2230 throws SAXException { |
|
2231 |
|
2232 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
|
2233 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
|
2234 |
|
2235 dtdHandler.error(e); |
|
2236 } |
|
2237 |
|
2238 private void fatal(String messageId) throws SAXException { |
|
2239 |
|
2240 fatal(messageId, null); |
|
2241 } |
|
2242 |
|
2243 private void fatal(String messageId, Object parameters []) |
|
2244 throws SAXException { |
|
2245 |
|
2246 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), |
|
2247 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); |
|
2248 |
|
2249 dtdHandler.fatalError(e); |
|
2250 |
|
2251 throw e; |
|
2252 } |
|
2253 |
|
2254 // |
|
2255 // Map char arrays to strings ... cuts down both on memory and |
|
2256 // CPU usage for element/attribute/other names that are reused. |
|
2257 // |
|
2258 // Documents typically repeat names a lot, so we more or less |
|
2259 // intern all the strings within the document; since some strings |
|
2260 // are repeated in multiple documents (e.g. stylesheets) we go |
|
2261 // a bit further, and intern globally. |
|
2262 // |
|
2263 static class NameCache { |
|
2264 // |
|
2265 // Unless we auto-grow this, the default size should be a |
|
2266 // reasonable bit larger than needed for most XML files |
|
2267 // we've yet seen (and be prime). If it's too small, the |
|
2268 // penalty is just excess cache collisions. |
|
2269 // |
|
2270 NameCacheEntry hashtable [] = new NameCacheEntry[541]; |
|
2271 |
|
2272 // |
|
2273 // Usually we just want to get the 'symbol' for these chars |
|
2274 // |
|
2275 String lookup(char value [], int len) { |
|
2276 |
|
2277 return lookupEntry(value, len).name; |
|
2278 } |
|
2279 |
|
2280 // |
|
2281 // Sometimes we need to scan the chars in the resulting |
|
2282 // string, so there's an accessor which exposes them. |
|
2283 // (Mostly for element end tags.) |
|
2284 // |
|
2285 NameCacheEntry lookupEntry(char value [], int len) { |
|
2286 |
|
2287 int index = 0; |
|
2288 NameCacheEntry entry; |
|
2289 |
|
2290 // hashing to get index |
|
2291 for (int i = 0; i < len; i++) |
|
2292 index = index * 31 + value[i]; |
|
2293 index &= 0x7fffffff; |
|
2294 index %= hashtable.length; |
|
2295 |
|
2296 // return entry if one's there ... |
|
2297 for (entry = hashtable[index]; |
|
2298 entry != null; |
|
2299 entry = entry.next) { |
|
2300 if (entry.matches(value, len)) |
|
2301 return entry; |
|
2302 } |
|
2303 |
|
2304 // else create new one |
|
2305 entry = new NameCacheEntry(); |
|
2306 entry.chars = new char[len]; |
|
2307 System.arraycopy(value, 0, entry.chars, 0, len); |
|
2308 entry.name = new String(entry.chars); |
|
2309 // |
|
2310 // NOTE: JDK 1.1 has a fixed size string intern table, |
|
2311 // with non-GC'd entries. It can panic here; that's a |
|
2312 // JDK problem, use 1.2 or later with many identifiers. |
|
2313 // |
|
2314 entry.name = entry.name.intern(); // "global" intern |
|
2315 entry.next = hashtable[index]; |
|
2316 hashtable[index] = entry; |
|
2317 return entry; |
|
2318 } |
|
2319 } |
|
2320 |
|
2321 static class NameCacheEntry { |
|
2322 |
|
2323 String name; |
|
2324 char chars []; |
|
2325 NameCacheEntry next; |
|
2326 |
|
2327 boolean matches(char value [], int len) { |
|
2328 |
|
2329 if (chars.length != len) |
|
2330 return false; |
|
2331 for (int i = 0; i < len; i++) |
|
2332 if (value[i] != chars[i]) |
|
2333 return false; |
|
2334 return true; |
|
2335 } |
|
2336 } |
|
2337 |
|
2338 // |
|
2339 // Message catalog for diagnostics. |
|
2340 // |
|
2341 static final Catalog messages = new Catalog(); |
|
2342 |
|
2343 static final class Catalog extends MessageCatalog { |
|
2344 |
|
2345 Catalog() { |
|
2346 super(DTDParser.class); |
|
2347 } |
|
2348 } |
|
2349 |
|
2350 } |