src/share/jaxws_classes/com/sun/xml/internal/dtdparser/InputEntity.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
equal deleted inserted replaced
-1:000000000000 0:373ffda63c9a
1 /*
2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.xml.internal.dtdparser;
27
28 import org.xml.sax.InputSource;
29 import org.xml.sax.SAXException;
30 import org.xml.sax.SAXParseException;
31
32 import java.io.CharConversionException;
33 import java.io.IOException;
34 import java.io.InputStream;
35 import java.io.InputStreamReader;
36 import java.io.Reader;
37 import java.io.UnsupportedEncodingException;
38 import java.net.URL;
39 import java.util.Locale;
40
41 /**
42 * This is how the parser talks to its input entities, of all kinds.
43 * The entities are in a stack.
44 * <p/>
45 * <P> For internal entities, the character arrays are referenced here,
46 * and read from as needed (they're read-only). External entities have
47 * mutable buffers, that are read into as needed.
48 * <p/>
49 * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
50 * whether it's in an external (parsed) entity or not. The XML 1.0 spec
51 * is inconsistent in explaining EOL handling; this is the sensible way.
52 *
53 * @author David Brownell
54 * @author Janet Koenig
55 * @version 1.4 00/08/05
56 */
57 public class InputEntity {
58 private int start, finish;
59 private char buf [];
60 private int lineNumber = 1;
61 private boolean returnedFirstHalf = false;
62 private boolean maybeInCRLF = false;
63
64 // name of entity (never main document or unnamed DTD PE)
65 private String name;
66
67 private InputEntity next;
68
69 // for system and public IDs in diagnostics
70 private InputSource input;
71
72 // this is a buffer; some buffers can be replenished.
73 private Reader reader;
74 private boolean isClosed;
75
76 private DTDEventListener errHandler;
77 private Locale locale;
78
79 private StringBuffer rememberedText;
80 private int startRemember;
81
82 // record if this is a PE, so endParsedEntity won't be called
83 private boolean isPE;
84
85 // InputStreamReader throws an internal per-read exception, so
86 // we minimize reads. We also add a byte to compensate for the
87 // "ungetc" byte we keep, so that our downstream reads are as
88 // nicely sized as we can make them.
89 final private static int BUFSIZ = 8 * 1024 + 1;
90
91 final private static char newline [] = {'\n'};
92
93 public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
94 InputEntity retval = new InputEntity();
95 retval.errHandler = h;
96 retval.locale = l;
97 return retval;
98 }
99
100 private InputEntity() {
101 }
102
103 //
104 // predicate: return true iff this is an internal entity reader,
105 // and so may safely be "popped" as needed. external entities have
106 // syntax to uphold; internal parameter entities have at most validity
107 // constraints to monitor. also, only external entities get decent
108 // location diagnostics.
109 //
110 public boolean isInternal() {
111 return reader == null;
112 }
113
114 //
115 // predicate: return true iff this is the toplevel document
116 //
117 public boolean isDocument() {
118 return next == null;
119 }
120
121 //
122 // predicate: return true iff this is a PE expansion (so that
123 // LexicalEventListner.endParsedEntity won't be called)
124 //
125 public boolean isParameterEntity() {
126 return isPE;
127 }
128
129 //
130 // return name of current entity
131 //
132 public String getName() {
133 return name;
134 }
135
136 //
137 // use this for an external parsed entity
138 //
139 public void init(InputSource in, String name, InputEntity stack,
140 boolean isPE)
141 throws IOException, SAXException {
142
143 input = in;
144 this.isPE = isPE;
145 reader = in.getCharacterStream();
146
147 if (reader == null) {
148 InputStream bytes = in.getByteStream();
149
150 if (bytes == null)
151 reader = XmlReader.createReader(new URL(in.getSystemId())
152 .openStream());
153 else if (in.getEncoding() != null)
154 reader = XmlReader.createReader(in.getByteStream(),
155 in.getEncoding());
156 else
157 reader = XmlReader.createReader(in.getByteStream());
158 }
159 next = stack;
160 buf = new char[BUFSIZ];
161 this.name = name;
162 checkRecursion(stack);
163 }
164
165 //
166 // use this for an internal parsed entity; buffer is readonly
167 //
168 public void init(char b [], String name, InputEntity stack, boolean isPE)
169 throws SAXException {
170
171 next = stack;
172 buf = b;
173 finish = b.length;
174 this.name = name;
175 this.isPE = isPE;
176 checkRecursion(stack);
177 }
178
179 private void checkRecursion(InputEntity stack)
180 throws SAXException {
181
182 if (stack == null)
183 return;
184 for (stack = stack.next; stack != null; stack = stack.next) {
185 if (stack.name != null && stack.name.equals(name))
186 fatal("P-069", new Object[]{name});
187 }
188 }
189
190 public InputEntity pop() throws IOException {
191
192 // caller has ensured there's nothing left to read
193 close();
194 return next;
195 }
196
197 /**
198 * returns true iff there's no more data to consume ...
199 */
200 public boolean isEOF() throws IOException, SAXException {
201
202 // called to ensure WF-ness of included entities and to pop
203 // input entities appropriately ... EOF is not always legal.
204 if (start >= finish) {
205 fillbuf();
206 return start >= finish;
207 } else
208 return false;
209 }
210
211 /**
212 * Returns the name of the encoding in use, else null; the name
213 * returned is in as standard a form as we can get.
214 */
215 public String getEncoding() {
216
217 if (reader == null)
218 return null;
219 if (reader instanceof XmlReader)
220 return ((XmlReader) reader).getEncoding();
221
222 // XXX prefer a java2std() call to normalize names...
223
224 if (reader instanceof InputStreamReader)
225 return ((InputStreamReader) reader).getEncoding();
226 return null;
227 }
228
229
230 /**
231 * returns the next name char, or NUL ... faster than getc(),
232 * and the common "name or nmtoken must be next" case won't
233 * need ungetc().
234 */
235 public char getNameChar() throws IOException, SAXException {
236
237 if (finish <= start)
238 fillbuf();
239 if (finish > start) {
240 char c = buf[start++];
241 if (XmlChars.isNameChar(c))
242 return c;
243 start--;
244 }
245 return 0;
246 }
247
248 /**
249 * gets the next Java character -- might be part of an XML
250 * text character represented by a surrogate pair, or be
251 * the end of the entity.
252 */
253 public char getc() throws IOException, SAXException {
254
255 if (finish <= start)
256 fillbuf();
257 if (finish > start) {
258 char c = buf[start++];
259
260 // [2] Char ::= #x0009 | #x000A | #x000D
261 // | [#x0020-#xD7FF]
262 // | [#xE000-#xFFFD]
263 // plus surrogate _pairs_ representing [#x10000-#x10ffff]
264 if (returnedFirstHalf) {
265 if (c >= 0xdc00 && c <= 0xdfff) {
266 returnedFirstHalf = false;
267 return c;
268 } else
269 fatal("P-070", new Object[]{Integer.toHexString(c)});
270 }
271 if ((c >= 0x0020 && c <= 0xD7FF)
272 || c == 0x0009
273 // no surrogates!
274 || (c >= 0xE000 && c <= 0xFFFD))
275 return c;
276
277 //
278 // CRLF and CR are both line ends; map both to LF, and
279 // keep line count correct.
280 //
281 else if (c == '\r' && !isInternal()) {
282 maybeInCRLF = true;
283 c = getc();
284 if (c != '\n')
285 ungetc();
286 maybeInCRLF = false;
287
288 lineNumber++;
289 return '\n';
290
291 } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
292 if (!isInternal() && !maybeInCRLF)
293 lineNumber++;
294 return c;
295 }
296
297 // surrogates...
298 if (c >= 0xd800 && c < 0xdc00) {
299 returnedFirstHalf = true;
300 return c;
301 }
302
303 fatal("P-071", new Object[]{Integer.toHexString(c)});
304 }
305 throw new EndOfInputException();
306 }
307
308
309 /**
310 * lookahead one character
311 */
312 public boolean peekc(char c) throws IOException, SAXException {
313
314 if (finish <= start)
315 fillbuf();
316 if (finish > start) {
317 if (buf[start] == c) {
318 start++;
319 return true;
320 } else
321 return false;
322 }
323 return false;
324 }
325
326
327 /**
328 * two character pushback is guaranteed
329 */
330 public void ungetc() {
331
332 if (start == 0)
333 throw new InternalError("ungetc");
334 start--;
335
336 if (buf[start] == '\n' || buf[start] == '\r') {
337 if (!isInternal())
338 lineNumber--;
339 } else if (returnedFirstHalf)
340 returnedFirstHalf = false;
341 }
342
343
344 /**
345 * optional grammatical whitespace (discarded)
346 */
347 public boolean maybeWhitespace()
348 throws IOException, SAXException {
349
350 char c;
351 boolean isSpace = false;
352 boolean sawCR = false;
353
354 // [3] S ::= #20 | #09 | #0D | #0A
355 for (; ;) {
356 if (finish <= start)
357 fillbuf();
358 if (finish <= start)
359 return isSpace;
360
361 c = buf[start++];
362 if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
363 isSpace = true;
364
365 //
366 // CR, LF are line endings ... CLRF is one, not two!
367 //
368 if ((c == '\n' || c == '\r') && !isInternal()) {
369 if (!(c == '\n' && sawCR)) {
370 lineNumber++;
371 sawCR = false;
372 }
373 if (c == '\r')
374 sawCR = true;
375 }
376 } else {
377 start--;
378 return isSpace;
379 }
380 }
381 }
382
383
384 /**
385 * normal content; whitespace in markup may be handled
386 * specially if the parser uses the content model.
387 * <p/>
388 * <P> content terminates with markup delimiter characters,
389 * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
390 * <p/>
391 * <P> the document handler's characters() method is called
392 * on all the content found
393 */
394 public boolean parsedContent(DTDEventListener docHandler
395 /*ElementValidator validator*/)
396 throws IOException, SAXException {
397
398 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
399
400 int first; // first char to return
401 int last; // last char to return
402 boolean sawContent; // sent any chars?
403 char c;
404
405 // deliver right out of the buffer, until delimiter, EOF,
406 // or error, refilling as we go
407 for (first = last = start, sawContent = false; ; last++) {
408
409 // buffer empty?
410 if (last >= finish) {
411 if (last > first) {
412 // validator.text ();
413 docHandler.characters(buf, first, last - first);
414 sawContent = true;
415 start = last;
416 }
417 if (isEOF()) // calls fillbuf
418 return sawContent;
419 first = start;
420 last = first - 1; // incremented in loop
421 continue;
422 }
423
424 c = buf[last];
425
426 //
427 // pass most chars through ASAP; this inlines the code of
428 // [2] !XmlChars.isChar(c) leaving only characters needing
429 // special treatment ... line ends, surrogates, and:
430 // 0x0026 == '&'
431 // 0x003C == '<'
432 // 0x005D == ']'
433 // Comparisons ordered for speed on 'typical' text
434 //
435 if ((c > 0x005D && c <= 0xD7FF) // a-z and more
436 || (c < 0x0026 && c >= 0x0020) // space & punct
437 || (c > 0x003C && c < 0x005D) // A-Z & punct
438 || (c > 0x0026 && c < 0x003C) // 0-9 & punct
439 || c == 0x0009
440 || (c >= 0xE000 && c <= 0xFFFD)
441 )
442 continue;
443
444 // terminate on markup delimiters
445 if (c == '<' || c == '&')
446 break;
447
448 // count lines
449 if (c == '\n') {
450 if (!isInternal())
451 lineNumber++;
452 continue;
453 }
454
455 // External entities get CR, CRLF --> LF mapping
456 // Internal ones got it already, and we can't repeat
457 // else we break char ref handling!!
458 if (c == '\r') {
459 if (isInternal())
460 continue;
461
462 docHandler.characters(buf, first, last - first);
463 docHandler.characters(newline, 0, 1);
464 sawContent = true;
465 lineNumber++;
466 if (finish > (last + 1)) {
467 if (buf[last + 1] == '\n')
468 last++;
469 } else { // CR at end of buffer
470 // XXX case not yet handled: CRLF here will look like two lines
471 }
472 first = start = last + 1;
473 continue;
474 }
475
476 // ']]>' is a WF error -- must fail if we see it
477 if (c == ']') {
478 switch (finish - last) {
479 // for suspicious end-of-buffer cases, get more data
480 // into the buffer to rule out this sequence.
481 case 2:
482 if (buf[last + 1] != ']')
483 continue;
484 // FALLTHROUGH
485
486 case 1:
487 if (reader == null || isClosed)
488 continue;
489 if (last == first)
490 throw new InternalError("fillbuf");
491 last--;
492 if (last > first) {
493 // validator.text ();
494 docHandler.characters(buf, first, last - first);
495 sawContent = true;
496 start = last;
497 }
498 fillbuf();
499 first = last = start;
500 continue;
501
502 // otherwise any "]]>" would be buffered, and we can
503 // see right away if that's what we have
504 default:
505 if (buf[last + 1] == ']' && buf[last + 2] == '>')
506 fatal("P-072", null);
507 continue;
508 }
509 }
510
511 // correctly paired surrogates are OK
512 if (c >= 0xd800 && c <= 0xdfff) {
513 if ((last + 1) >= finish) {
514 if (last > first) {
515 // validator.text ();
516 docHandler.characters(buf, first, last - first);
517 sawContent = true;
518 start = last + 1;
519 }
520 if (isEOF()) { // calls fillbuf
521 fatal("P-081",
522 new Object[]{Integer.toHexString(c)});
523 }
524 first = start;
525 last = first;
526 continue;
527 }
528 if (checkSurrogatePair(last))
529 last++;
530 else {
531 last--;
532 // also terminate on surrogate pair oddities
533 break;
534 }
535 continue;
536 }
537
538 fatal("P-071", new Object[]{Integer.toHexString(c)});
539 }
540 if (last == first)
541 return sawContent;
542 // validator.text ();
543 docHandler.characters(buf, first, last - first);
544 start = last;
545 return true;
546 }
547
548
549 /**
550 * CDATA -- character data, terminated by "]]>" and optionally
551 * including unescaped markup delimiters (ampersand and left angle
552 * bracket). This should otherwise be exactly like character data,
553 * modulo differences in error report details.
554 * <p/>
555 * <P> The document handler's characters() or ignorableWhitespace()
556 * methods are invoked on all the character data found
557 *
558 * @param docHandler gets callbacks for character data
559 * @param ignorableWhitespace if true, whitespace characters will
560 * be reported using docHandler.ignorableWhitespace(); implicitly,
561 * non-whitespace characters will cause validation errors
562 * @param whitespaceInvalidMessage if true, ignorable whitespace
563 * causes a validity error report as well as a callback
564 */
565 public boolean unparsedContent(DTDEventListener docHandler,
566 /*ElementValidator validator,*/
567 boolean ignorableWhitespace,
568 String whitespaceInvalidMessage)
569 throws IOException, SAXException {
570
571 // [18] CDSect ::= CDStart CData CDEnd
572 // [19] CDStart ::= '<![CDATA['
573 // [20] CData ::= (Char* - (Char* ']]>' Char*))
574 // [21] CDEnd ::= ']]>'
575
576 // caller peeked the leading '<' ...
577 if (!peek("![CDATA[", null))
578 return false;
579 docHandler.startCDATA();
580
581 // only a literal ']]>' stops this ...
582 int last;
583
584 for (; ;) { // until ']]>' seen
585 boolean done = false;
586 char c;
587
588 // don't report ignorable whitespace as "text" for
589 // validation purposes.
590 boolean white = ignorableWhitespace;
591
592 for (last = start; last < finish; last++) {
593 c = buf[last];
594
595 //
596 // Reject illegal characters.
597 //
598 if (!XmlChars.isChar(c)) {
599 white = false;
600 if (c >= 0xd800 && c <= 0xdfff) {
601 if (checkSurrogatePair(last)) {
602 last++;
603 continue;
604 } else {
605 last--;
606 break;
607 }
608 }
609 fatal("P-071", new Object[]
610 {Integer.toHexString(buf[last])});
611 }
612 if (c == '\n') {
613 if (!isInternal())
614 lineNumber++;
615 continue;
616 }
617 if (c == '\r') {
618 // As above, we can't repeat CR/CRLF --> LF mapping
619 if (isInternal())
620 continue;
621
622 if (white) {
623 if (whitespaceInvalidMessage != null)
624 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
625 whitespaceInvalidMessage), null));
626 docHandler.ignorableWhitespace(buf, start,
627 last - start);
628 docHandler.ignorableWhitespace(newline, 0, 1);
629 } else {
630 // validator.text ();
631 docHandler.characters(buf, start, last - start);
632 docHandler.characters(newline, 0, 1);
633 }
634 lineNumber++;
635 if (finish > (last + 1)) {
636 if (buf[last + 1] == '\n')
637 last++;
638 } else { // CR at end of buffer
639 // XXX case not yet handled ... as above
640 }
641 start = last + 1;
642 continue;
643 }
644 if (c != ']') {
645 if (c != ' ' && c != '\t')
646 white = false;
647 continue;
648 }
649 if ((last + 2) < finish) {
650 if (buf[last + 1] == ']' && buf[last + 2] == '>') {
651 done = true;
652 break;
653 }
654 white = false;
655 continue;
656 } else {
657 //last--;
658 break;
659 }
660 }
661 if (white) {
662 if (whitespaceInvalidMessage != null)
663 errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
664 whitespaceInvalidMessage), null));
665 docHandler.ignorableWhitespace(buf, start, last - start);
666 } else {
667 // validator.text ();
668 docHandler.characters(buf, start, last - start);
669 }
670 if (done) {
671 start = last + 3;
672 break;
673 }
674 start = last;
675 if (isEOF())
676 fatal("P-073", null);
677 }
678 docHandler.endCDATA();
679 return true;
680 }
681
682 // return false to backstep at end of buffer)
683 private boolean checkSurrogatePair(int offset)
684 throws SAXException {
685
686 if ((offset + 1) >= finish)
687 return false;
688
689 char c1 = buf[offset++];
690 char c2 = buf[offset];
691
692 if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
693 return true;
694 fatal("P-074", new Object[]{
695 Integer.toHexString(c1 & 0x0ffff),
696 Integer.toHexString(c2 & 0x0ffff)
697 });
698 return false;
699 }
700
701
702 /**
703 * whitespace in markup (flagged to app, discardable)
704 * <p/>
705 * <P> the document handler's ignorableWhitespace() method
706 * is called on all the whitespace found
707 */
708 public boolean ignorableWhitespace(DTDEventListener handler)
709 throws IOException, SAXException {
710
711 char c;
712 boolean isSpace = false;
713 int first;
714
715 // [3] S ::= #20 | #09 | #0D | #0A
716 for (first = start; ;) {
717 if (finish <= start) {
718 if (isSpace)
719 handler.ignorableWhitespace(buf, first, start - first);
720 fillbuf();
721 first = start;
722 }
723 if (finish <= start)
724 return isSpace;
725
726 c = buf[start++];
727 switch (c) {
728 case '\n':
729 if (!isInternal())
730 lineNumber++;
731 // XXX handles Macintosh line endings wrong
732 // fallthrough
733 case 0x09:
734 case 0x20:
735 isSpace = true;
736 continue;
737
738 case '\r':
739 isSpace = true;
740 if (!isInternal())
741 lineNumber++;
742 handler.ignorableWhitespace(buf, first,
743 (start - 1) - first);
744 handler.ignorableWhitespace(newline, 0, 1);
745 if (start < finish && buf[start] == '\n')
746 ++start;
747 first = start;
748 continue;
749
750 default:
751 ungetc();
752 if (isSpace)
753 handler.ignorableWhitespace(buf, first, start - first);
754 return isSpace;
755 }
756 }
757 }
758
759 /**
760 * returns false iff 'next' string isn't as provided,
761 * else skips that text and returns true.
762 * <p/>
763 * <P> NOTE: two alternative string representations are
764 * both passed in, since one is faster.
765 */
766 public boolean peek(String next, char chars [])
767 throws IOException, SAXException {
768
769 int len;
770 int i;
771
772 if (chars != null)
773 len = chars.length;
774 else
775 len = next.length();
776
777 // buffer should hold the whole thing ... give it a
778 // chance for the end-of-buffer case and cope with EOF
779 // by letting fillbuf compact and fill
780 if (finish <= start || (finish - start) < len)
781 fillbuf();
782
783 // can't peek past EOF
784 if (finish <= start)
785 return false;
786
787 // compare the string; consume iff it matches
788 if (chars != null) {
789 for (i = 0; i < len && (start + i) < finish; i++) {
790 if (buf[start + i] != chars[i])
791 return false;
792 }
793 } else {
794 for (i = 0; i < len && (start + i) < finish; i++) {
795 if (buf[start + i] != next.charAt(i))
796 return false;
797 }
798 }
799
800 // if the first fillbuf didn't get enough data, give
801 // fillbuf another chance to read
802 if (i < len) {
803 if (reader == null || isClosed)
804 return false;
805
806 //
807 // This diagnostic "knows" that the only way big strings would
808 // fail to be peeked is where it's a symbol ... e.g. for an
809 // </EndTag> construct. That knowledge could also be applied
810 // to get rid of the symbol length constraint, since having
811 // the wrong symbol is a fatal error anyway ...
812 //
813 if (len > buf.length)
814 fatal("P-077", new Object[]{new Integer(buf.length)});
815
816 fillbuf();
817 return peek(next, chars);
818 }
819
820 start += len;
821 return true;
822 }
823
824
825 //
826 // Support for reporting the internal DTD subset, so <!DOCTYPE...>
827 // declarations can be recreated. This is collected as a single
828 // string; such subsets are normally small, and many applications
829 // don't even care about this.
830 //
831 public void startRemembering() {
832
833 if (startRemember != 0)
834 throw new InternalError();
835 startRemember = start;
836 }
837
838 public String rememberText() {
839
840 String retval;
841
842 // If the internal subset crossed a buffer boundary, we
843 // created a temporary buffer.
844 if (rememberedText != null) {
845 rememberedText.append(buf, startRemember,
846 start - startRemember);
847 retval = rememberedText.toString();
848 } else
849 retval = new String(buf, startRemember,
850 start - startRemember);
851
852 startRemember = 0;
853 rememberedText = null;
854 return retval;
855 }
856
857 private InputEntity getTopEntity() {
858
859 InputEntity current = this;
860
861 // don't report locations within internal entities!
862
863 while (current != null && current.input == null)
864 current = current.next;
865 return current == null ? this : current;
866 }
867
868 /**
869 * Returns the public ID of this input source, if known
870 */
871 public String getPublicId() {
872
873 InputEntity where = getTopEntity();
874 if (where == this)
875 return input.getPublicId();
876 return where.getPublicId();
877 }
878
879 /**
880 * Returns the system ID of this input source, if known
881 */
882 public String getSystemId() {
883
884 InputEntity where = getTopEntity();
885 if (where == this)
886 return input.getSystemId();
887 return where.getSystemId();
888 }
889
890 /**
891 * Returns the current line number in this input source
892 */
893 public int getLineNumber() {
894
895 InputEntity where = getTopEntity();
896 if (where == this)
897 return lineNumber;
898 return where.getLineNumber();
899 }
900
901 /**
902 * returns -1; maintaining column numbers hurts performance
903 */
904 public int getColumnNumber() {
905
906 return -1; // not maintained (speed)
907 }
908
909
910 //
911 // n.b. for non-EOF end-of-buffer cases, reader should return
912 // at least a handful of bytes so various lookaheads behave.
913 //
914 // two character pushback exists except at first; characters
915 // represented by surrogate pairs can't be pushed back (they'd
916 // only be in character data anyway).
917 //
918 // DTD exception thrown on char conversion problems; line number
919 // will be low, as a rule.
920 //
921 private void fillbuf() throws IOException, SAXException {
922
923 // don't touched fixed buffers, that'll usually
924 // change entity values (and isn't needed anyway)
925 // likewise, ignore closed streams
926 if (reader == null || isClosed)
927 return;
928
929 // if remembering DTD text, copy!
930 if (startRemember != 0) {
931 if (rememberedText == null)
932 rememberedText = new StringBuffer(buf.length);
933 rememberedText.append(buf, startRemember,
934 start - startRemember);
935 }
936
937 boolean extra = (finish > 0) && (start > 0);
938 int len;
939
940 if (extra) // extra pushback
941 start--;
942 len = finish - start;
943
944 System.arraycopy(buf, start, buf, 0, len);
945 start = 0;
946 finish = len;
947
948 try {
949 len = buf.length - len;
950 len = reader.read(buf, finish, len);
951 } catch (UnsupportedEncodingException e) {
952 fatal("P-075", new Object[]{e.getMessage()});
953 } catch (CharConversionException e) {
954 fatal("P-076", new Object[]{e.getMessage()});
955 }
956 if (len >= 0)
957 finish += len;
958 else
959 close();
960 if (extra) // extra pushback
961 start++;
962
963 if (startRemember != 0)
964 // assert extra == true
965 startRemember = 1;
966 }
967
968 public void close() {
969
970 try {
971 if (reader != null && !isClosed)
972 reader.close();
973 isClosed = true;
974 } catch (IOException e) {
975 /* NOTHING */
976 }
977 }
978
979
980 private void fatal(String messageId, Object params [])
981 throws SAXException {
982
983 SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);
984
985 // not continuable ... e.g. WF errors
986 close();
987 errHandler.fatalError(x);
988 throw x;
989 }
990 }

mercurial