src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
equal deleted inserted replaced
-1:000000000000 0:373ffda63c9a
1 /*
2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.xml.internal.dtdparser;
27
28 import org.xml.sax.EntityResolver;
29 import org.xml.sax.InputSource;
30
31 import java.io.File;
32 import java.io.FileInputStream;
33 import java.io.IOException;
34 import java.io.InputStream;
35 import java.net.URL;
36 import java.net.URLConnection;
37 import java.util.Hashtable;
38
39 /**
40 * This entity resolver class provides a number of utilities which can help
41 * managment of external parsed entities in XML. These are commonly used
42 * to hold markup declarations that are to be used as part of a Document
43 * Type Declaration (DTD), or to hold text marked up with XML.
44 * <p/>
45 * <P> Features include: <UL>
46 * <p/>
47 * <LI> Static factory methods are provided for constructing SAX InputSource
48 * objects from Files, URLs, or MIME objects. This eliminates a class of
49 * error-prone coding in applications.
50 * <p/>
51 * <LI> Character encodings for XML documents are correctly supported: <UL>
52 * <p/>
53 * <LI> The encodings defined in the RFCs for MIME content types
54 * (2046 for general MIME, and 2376 for XML in particular), are
55 * supported, handling <em>charset=...</em> attributes and accepting
56 * content types which are known to be safe for use with XML;
57 * <p/>
58 * <LI> The character encoding autodetection algorithm identified
59 * in the XML specification is used, and leverages all of
60 * the JDK 1.1 (and later) character encoding support.
61 * <p/>
62 * <LI> The use of MIME typing may optionally be disabled, forcing the
63 * use of autodetection, to support web servers which don't correctly
64 * report MIME types for XML. For example, they may report text that
65 * is encoded in EUC-JP as being US-ASCII text, leading to fatal
66 * errors during parsing.
67 * <p/>
68 * <LI> The InputSource objects returned by this class always
69 * have a <code>java.io.Reader</code> available as the "character
70 * stream" property.
71 * <p/>
72 * </UL>
73 * <p/>
74 * <LI> Catalog entries can map public identifiers to Java resources or
75 * to local URLs. These are used to reduce network dependencies and loads,
76 * and will often be used for external DTD components. For example, packages
77 * shipping DTD files as resources in JAR files can eliminate network traffic
78 * when accessing them, and sites may provide local caches of common DTDs.
79 * Note that no particular catalog syntax is supported by this class, only
80 * the notion of a set of entries.
81 * <p/>
82 * </UL>
83 * <p/>
84 * <P> Subclasses can perform tasks such as supporting new URI schemes for
85 * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing
86 * MIME entities which are part of a <em>multipart/related</em> group
87 * (see RFC 2387). They may also be used to support particular catalog
88 * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm">
89 * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal
90 * Public Identifiers (FPIs).
91 *
92 * @author David Brownell
93 * @author Janet Koenig
94 * @version 1.3 00/02/24
95 */
96 public class Resolver implements EntityResolver {
97 private boolean ignoringMIME;
98
99 // table mapping public IDs to (local) URIs
100 private Hashtable id2uri;
101
102 // tables mapping public IDs to resources and classloaders
103 private Hashtable id2resource;
104 private Hashtable id2loader;
105
106 //
107 // table of MIME content types (less attributes!) known
108 // to be mostly "OK" to use with XML MIME entities. the
109 // idea is to rule out obvious braindamage ("image/jpg")
110 // not the subtle stuff ("text/html") that might actually
111 // be (or become) safe.
112 //
113 private static final String types [] = {
114 "application/xml",
115 "text/xml",
116 "text/plain",
117 "text/html", // commonly mis-inferred
118 "application/x-netcdf", // this is often illegal XML
119 "content/unknown"
120 };
121
122 /**
123 * Constructs a resolver.
124 */
125 public Resolver() {
126 }
127
128 /**
129 * Returns an input source, using the MIME type information and URL
130 * scheme to statically determine the correct character encoding if
131 * possible and otherwise autodetecting it. MIME carefully specifies
132 * the character encoding defaults, and how attributes of the content
133 * type can change it. XML further specifies two mandatory encodings
134 * (UTF-8 and UTF-16), and includes an XML declaration which can be
135 * used to internally label most documents encoded using US-ASCII
136 * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and
137 * more).
138 * <p/>
139 * <P> This method can be used to access XML documents which do not
140 * have URIs (such as servlet input streams, or most JavaMail message
141 * entities) and to support access methods such as HTTP POST or PUT.
142 * (URLs normally return content using the GET method.)
143 * <p/>
144 * <P> <em> The caller should set the system ID in order for relative URIs
145 * found in this document to be interpreted correctly.</em> In some cases,
146 * a custom resolver will need to be used; for example, documents
147 * may be grouped in a single MIME "multipart/related" bundle, and
148 * relative URLs would refer to other documents in that bundle.
149 *
150 * @param contentType The MIME content type for the source for which
151 * an InputSource is desired, such as <em>text/xml;charset=utf-8</em>.
152 * @param stream The input byte stream for the input source.
153 * @param checkType If true, this verifies that the content type is known
154 * to support XML documents, such as <em>application/xml</em>.
155 * @param scheme Unless this is "file", unspecified MIME types
156 * default to US-ASCII. Files are always autodetected since most
157 * file systems discard character encoding information.
158 */
159 public static InputSource createInputSource(String contentType,
160 InputStream stream,
161 boolean checkType,
162 String scheme) throws IOException {
163 InputSource retval;
164 String charset = null;
165
166 if (contentType != null) {
167 int index;
168
169 contentType = contentType.toLowerCase();
170 index = contentType.indexOf(';');
171 if (index != -1) {
172 String attributes;
173
174 attributes = contentType.substring(index + 1);
175 contentType = contentType.substring(0, index);
176
177 // use "charset=..." if it's available
178 index = attributes.indexOf("charset");
179 if (index != -1) {
180 attributes = attributes.substring(index + 7);
181 // strip out subsequent attributes
182 if ((index = attributes.indexOf(';')) != -1)
183 attributes = attributes.substring(0, index);
184 // find start of value
185 if ((index = attributes.indexOf('=')) != -1) {
186 attributes = attributes.substring(index + 1);
187 // strip out rfc822 comments
188 if ((index = attributes.indexOf('(')) != -1)
189 attributes = attributes.substring(0, index);
190 // double quotes are optional
191 if ((index = attributes.indexOf('"')) != -1) {
192 attributes = attributes.substring(index + 1);
193 attributes = attributes.substring(0,
194 attributes.indexOf('"'));
195 }
196 charset = attributes.trim();
197 // XXX "\;", "\)" etc were mishandled above
198 }
199 }
200 }
201
202 //
203 // Check MIME type.
204 //
205 if (checkType) {
206 boolean isOK = false;
207 for (int i = 0; i < types.length; i++)
208 if (types[i].equals(contentType)) {
209 isOK = true;
210 break;
211 }
212 if (!isOK)
213 throw new IOException("Not XML: " + contentType);
214 }
215
216 //
217 // "text/*" MIME types have hard-wired character set
218 // defaults, as specified in the RFCs. For XML, we
219 // ignore the system "file.encoding" property since
220 // autodetection is more correct.
221 //
222 if (charset == null) {
223 contentType = contentType.trim();
224 if (contentType.startsWith("text/")) {
225 if (!"file".equalsIgnoreCase(scheme))
226 charset = "US-ASCII";
227 }
228 // "application/*" has no default
229 }
230 }
231
232 retval = new InputSource(XmlReader.createReader(stream, charset));
233 retval.setByteStream(stream);
234 retval.setEncoding(charset);
235 return retval;
236 }
237
238
239 /**
240 * Creates an input source from a given URI.
241 *
242 * @param uri the URI (system ID) for the entity
243 * @param checkType if true, the MIME content type for the entity
244 * is checked for document type and character set encoding.
245 */
246 static public InputSource createInputSource(URL uri, boolean checkType)
247 throws IOException {
248
249 URLConnection conn = uri.openConnection();
250 InputSource retval;
251
252 if (checkType) {
253 String contentType = conn.getContentType();
254 retval = createInputSource(contentType, conn.getInputStream(),
255 false, uri.getProtocol());
256 } else {
257 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
258 }
259 retval.setSystemId(conn.getURL().toString());
260 return retval;
261 }
262
263
264 /**
265 * Creates an input source from a given file, autodetecting
266 * the character encoding.
267 */
268 static public InputSource createInputSource(File file)
269 throws IOException {
270 InputSource retval;
271 String path;
272
273 retval = new InputSource(XmlReader.createReader(new FileInputStream(file)));
274
275 // On JDK 1.2 and later, simplify this:
276 // "path = file.toURL ().toString ()".
277 path = file.getAbsolutePath();
278 if (File.separatorChar != '/')
279 path = path.replace(File.separatorChar, '/');
280 if (!path.startsWith("/"))
281 path = "/" + path;
282 if (!path.endsWith("/") && file.isDirectory())
283 path = path + "/";
284
285 retval.setSystemId("file:" + path);
286 return retval;
287 }
288
289
290 /**
291 * <b>SAX:</b>
292 * Resolve the given entity into an input source. If the name can't
293 * be mapped to a preferred form of the entity, the URI is used. To
294 * resolve the entity, first a local catalog mapping names to URIs is
295 * consulted. If no mapping is found there, a catalog mapping names
296 * to java resources is consulted. Finally, if neither mapping found
297 * a copy of the entity, the specified URI is used.
298 * <p/>
299 * <P> When a URI is used, <a href="#createInputSource">
300 * createInputSource</a> is used to correctly deduce the character
301 * encoding used by this entity. No MIME type checking is done.
302 *
303 * @param name Used to find alternate copies of the entity, when
304 * this value is non-null; this is the XML "public ID".
305 * @param uri Used when no alternate copy of the entity is found;
306 * this is the XML "system ID", normally a URI.
307 */
308 public InputSource resolveEntity(String name, String uri)
309 throws IOException {
310 InputSource retval;
311 String mappedURI = name2uri(name);
312 InputStream stream;
313
314 // prefer explicit URI mappings, then bundled resources...
315 if (mappedURI == null && (stream = mapResource(name)) != null) {
316 uri = "java:resource:" + (String) id2resource.get(name);
317 retval = new InputSource(XmlReader.createReader(stream));
318
319 // ...and treat all URIs the same (as URLs for now).
320 } else {
321 URL url;
322 URLConnection conn;
323
324 if (mappedURI != null)
325 uri = mappedURI;
326 else if (uri == null)
327 return null;
328
329 url = new URL(uri);
330 conn = url.openConnection();
331 uri = conn.getURL().toString();
332 // System.out.println ("++ URI: " + url);
333 if (ignoringMIME)
334 retval = new InputSource(XmlReader.createReader(conn.getInputStream()));
335 else {
336 String contentType = conn.getContentType();
337 retval = createInputSource(contentType,
338 conn.getInputStream(),
339 false, url.getProtocol());
340 }
341 }
342 retval.setSystemId(uri);
343 retval.setPublicId(name);
344 return retval;
345 }
346
347
348 /**
349 * Returns true if this resolver is ignoring MIME types in the documents
350 * it returns, to work around bugs in how servers have reported the
351 * documents' MIME types.
352 */
353 public boolean isIgnoringMIME() {
354 return ignoringMIME;
355 }
356
357 /**
358 * Tells the resolver whether to ignore MIME types in the documents it
359 * retrieves. Many web servers incorrectly assign text documents a
360 * default character encoding, even when that is incorrect. For example,
361 * all HTTP text documents default to use ISO-8859-1 (used for Western
362 * European languages), and other MIME sources default text documents
363 * to use US-ASCII (a seven bit encoding). For XML documents which
364 * include text encoding declarations (as most should do), these server
365 * bugs can be worked around by ignoring the MIME type entirely.
366 */
367 public void setIgnoringMIME(boolean value) {
368 ignoringMIME = value;
369 }
370
371
372 // maps the public ID to an alternate URI, if one is registered
373 private String name2uri(String publicId) {
374 if (publicId == null || id2uri == null)
375 return null;
376 return (String) id2uri.get(publicId);
377 }
378
379
380 /**
381 * Registers the given public ID as corresponding to a particular
382 * URI, typically a local copy. This URI will be used in preference
383 * to ones provided as system IDs in XML entity declarations. This
384 * mechanism would most typically be used for Document Type Definitions
385 * (DTDs), where the public IDs are formally managed and versioned.
386 *
387 * @param publicId The managed public ID being mapped
388 * @param uri The URI of the preferred copy of that entity
389 */
390 public void registerCatalogEntry(String publicId,
391 String uri) {
392 if (id2uri == null)
393 id2uri = new Hashtable(17);
394 id2uri.put(publicId, uri);
395 }
396
397
398 // return the resource as a stream
399 private InputStream mapResource(String publicId) {
400 // System.out.println ("++ PUBLIC: " + publicId);
401 if (publicId == null || id2resource == null)
402 return null;
403
404 String resourceName = (String) id2resource.get(publicId);
405 ClassLoader loader = null;
406
407 if (resourceName == null)
408 return null;
409 // System.out.println ("++ Resource: " + resourceName);
410
411 if (id2loader != null)
412 loader = (ClassLoader) id2loader.get(publicId);
413 // System.out.println ("++ Loader: " + loader);
414 if (loader == null)
415 return ClassLoader.getSystemResourceAsStream(resourceName);
416 return loader.getResourceAsStream(resourceName);
417 }
418
419 /**
420 * Registers a given public ID as corresponding to a particular Java
421 * resource in a given class loader, typically distributed with a
422 * software package. This resource will be preferred over system IDs
423 * included in XML documents. This mechanism should most typically be
424 * used for Document Type Definitions (DTDs), where the public IDs are
425 * formally managed and versioned.
426 * <p/>
427 * <P> If a mapping to a URI has been provided, that mapping takes
428 * precedence over this one.
429 *
430 * @param publicId The managed public ID being mapped
431 * @param resourceName The name of the Java resource
432 * @param loader The class loader holding the resource, or null if
433 * it is a system resource.
434 */
435 public void registerCatalogEntry(String publicId,
436 String resourceName,
437 ClassLoader loader) {
438 if (id2resource == null)
439 id2resource = new Hashtable(17);
440 id2resource.put(publicId, resourceName);
441
442 if (loader != null) {
443 if (id2loader == null)
444 id2loader = new Hashtable(17);
445 id2loader.put(publicId, loader);
446 }
447 }
448 }

mercurial