1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/dtdparser/Resolver.java Wed Apr 27 01:27:09 2016 +0800 1.3 @@ -0,0 +1,448 @@ 1.4 +/* 1.5 + * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Oracle designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Oracle in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.25 + * or visit www.oracle.com if you need additional information or have any 1.26 + * questions. 1.27 + */ 1.28 + 1.29 +package com.sun.xml.internal.dtdparser; 1.30 + 1.31 +import org.xml.sax.EntityResolver; 1.32 +import org.xml.sax.InputSource; 1.33 + 1.34 +import java.io.File; 1.35 +import java.io.FileInputStream; 1.36 +import java.io.IOException; 1.37 +import java.io.InputStream; 1.38 +import java.net.URL; 1.39 +import java.net.URLConnection; 1.40 +import java.util.Hashtable; 1.41 + 1.42 +/** 1.43 + * This entity resolver class provides a number of utilities which can help 1.44 + * managment of external parsed entities in XML. These are commonly used 1.45 + * to hold markup declarations that are to be used as part of a Document 1.46 + * Type Declaration (DTD), or to hold text marked up with XML. 1.47 + * <p/> 1.48 + * <P> Features include: <UL> 1.49 + * <p/> 1.50 + * <LI> Static factory methods are provided for constructing SAX InputSource 1.51 + * objects from Files, URLs, or MIME objects. This eliminates a class of 1.52 + * error-prone coding in applications. 1.53 + * <p/> 1.54 + * <LI> Character encodings for XML documents are correctly supported: <UL> 1.55 + * <p/> 1.56 + * <LI> The encodings defined in the RFCs for MIME content types 1.57 + * (2046 for general MIME, and 2376 for XML in particular), are 1.58 + * supported, handling <em>charset=...</em> attributes and accepting 1.59 + * content types which are known to be safe for use with XML; 1.60 + * <p/> 1.61 + * <LI> The character encoding autodetection algorithm identified 1.62 + * in the XML specification is used, and leverages all of 1.63 + * the JDK 1.1 (and later) character encoding support. 1.64 + * <p/> 1.65 + * <LI> The use of MIME typing may optionally be disabled, forcing the 1.66 + * use of autodetection, to support web servers which don't correctly 1.67 + * report MIME types for XML. For example, they may report text that 1.68 + * is encoded in EUC-JP as being US-ASCII text, leading to fatal 1.69 + * errors during parsing. 1.70 + * <p/> 1.71 + * <LI> The InputSource objects returned by this class always 1.72 + * have a <code>java.io.Reader</code> available as the "character 1.73 + * stream" property. 1.74 + * <p/> 1.75 + * </UL> 1.76 + * <p/> 1.77 + * <LI> Catalog entries can map public identifiers to Java resources or 1.78 + * to local URLs. These are used to reduce network dependencies and loads, 1.79 + * and will often be used for external DTD components. For example, packages 1.80 + * shipping DTD files as resources in JAR files can eliminate network traffic 1.81 + * when accessing them, and sites may provide local caches of common DTDs. 1.82 + * Note that no particular catalog syntax is supported by this class, only 1.83 + * the notion of a set of entries. 1.84 + * <p/> 1.85 + * </UL> 1.86 + * <p/> 1.87 + * <P> Subclasses can perform tasks such as supporting new URI schemes for 1.88 + * URIs which are not URLs, such as URNs (see RFC 2396) or for accessing 1.89 + * MIME entities which are part of a <em>multipart/related</em> group 1.90 + * (see RFC 2387). They may also be used to support particular catalog 1.91 + * syntaxes, such as the <a href="http://www.oasis-open.org/html/a401.htm"> 1.92 + * SGML/Open Catalog (SOCAT)</a> which supports the SGML notion of "Formal 1.93 + * Public Identifiers (FPIs). 1.94 + * 1.95 + * @author David Brownell 1.96 + * @author Janet Koenig 1.97 + * @version 1.3 00/02/24 1.98 + */ 1.99 +public class Resolver implements EntityResolver { 1.100 + private boolean ignoringMIME; 1.101 + 1.102 + // table mapping public IDs to (local) URIs 1.103 + private Hashtable id2uri; 1.104 + 1.105 + // tables mapping public IDs to resources and classloaders 1.106 + private Hashtable id2resource; 1.107 + private Hashtable id2loader; 1.108 + 1.109 + // 1.110 + // table of MIME content types (less attributes!) known 1.111 + // to be mostly "OK" to use with XML MIME entities. the 1.112 + // idea is to rule out obvious braindamage ("image/jpg") 1.113 + // not the subtle stuff ("text/html") that might actually 1.114 + // be (or become) safe. 1.115 + // 1.116 + private static final String types [] = { 1.117 + "application/xml", 1.118 + "text/xml", 1.119 + "text/plain", 1.120 + "text/html", // commonly mis-inferred 1.121 + "application/x-netcdf", // this is often illegal XML 1.122 + "content/unknown" 1.123 + }; 1.124 + 1.125 + /** 1.126 + * Constructs a resolver. 1.127 + */ 1.128 + public Resolver() { 1.129 + } 1.130 + 1.131 + /** 1.132 + * Returns an input source, using the MIME type information and URL 1.133 + * scheme to statically determine the correct character encoding if 1.134 + * possible and otherwise autodetecting it. MIME carefully specifies 1.135 + * the character encoding defaults, and how attributes of the content 1.136 + * type can change it. XML further specifies two mandatory encodings 1.137 + * (UTF-8 and UTF-16), and includes an XML declaration which can be 1.138 + * used to internally label most documents encoded using US-ASCII 1.139 + * supersets (such as Shift_JIS, EUC-JP, ISO-2022-*, ISO-8859-*, and 1.140 + * more). 1.141 + * <p/> 1.142 + * <P> This method can be used to access XML documents which do not 1.143 + * have URIs (such as servlet input streams, or most JavaMail message 1.144 + * entities) and to support access methods such as HTTP POST or PUT. 1.145 + * (URLs normally return content using the GET method.) 1.146 + * <p/> 1.147 + * <P> <em> The caller should set the system ID in order for relative URIs 1.148 + * found in this document to be interpreted correctly.</em> In some cases, 1.149 + * a custom resolver will need to be used; for example, documents 1.150 + * may be grouped in a single MIME "multipart/related" bundle, and 1.151 + * relative URLs would refer to other documents in that bundle. 1.152 + * 1.153 + * @param contentType The MIME content type for the source for which 1.154 + * an InputSource is desired, such as <em>text/xml;charset=utf-8</em>. 1.155 + * @param stream The input byte stream for the input source. 1.156 + * @param checkType If true, this verifies that the content type is known 1.157 + * to support XML documents, such as <em>application/xml</em>. 1.158 + * @param scheme Unless this is "file", unspecified MIME types 1.159 + * default to US-ASCII. Files are always autodetected since most 1.160 + * file systems discard character encoding information. 1.161 + */ 1.162 + public static InputSource createInputSource(String contentType, 1.163 + InputStream stream, 1.164 + boolean checkType, 1.165 + String scheme) throws IOException { 1.166 + InputSource retval; 1.167 + String charset = null; 1.168 + 1.169 + if (contentType != null) { 1.170 + int index; 1.171 + 1.172 + contentType = contentType.toLowerCase(); 1.173 + index = contentType.indexOf(';'); 1.174 + if (index != -1) { 1.175 + String attributes; 1.176 + 1.177 + attributes = contentType.substring(index + 1); 1.178 + contentType = contentType.substring(0, index); 1.179 + 1.180 + // use "charset=..." if it's available 1.181 + index = attributes.indexOf("charset"); 1.182 + if (index != -1) { 1.183 + attributes = attributes.substring(index + 7); 1.184 + // strip out subsequent attributes 1.185 + if ((index = attributes.indexOf(';')) != -1) 1.186 + attributes = attributes.substring(0, index); 1.187 + // find start of value 1.188 + if ((index = attributes.indexOf('=')) != -1) { 1.189 + attributes = attributes.substring(index + 1); 1.190 + // strip out rfc822 comments 1.191 + if ((index = attributes.indexOf('(')) != -1) 1.192 + attributes = attributes.substring(0, index); 1.193 + // double quotes are optional 1.194 + if ((index = attributes.indexOf('"')) != -1) { 1.195 + attributes = attributes.substring(index + 1); 1.196 + attributes = attributes.substring(0, 1.197 + attributes.indexOf('"')); 1.198 + } 1.199 + charset = attributes.trim(); 1.200 + // XXX "\;", "\)" etc were mishandled above 1.201 + } 1.202 + } 1.203 + } 1.204 + 1.205 + // 1.206 + // Check MIME type. 1.207 + // 1.208 + if (checkType) { 1.209 + boolean isOK = false; 1.210 + for (int i = 0; i < types.length; i++) 1.211 + if (types[i].equals(contentType)) { 1.212 + isOK = true; 1.213 + break; 1.214 + } 1.215 + if (!isOK) 1.216 + throw new IOException("Not XML: " + contentType); 1.217 + } 1.218 + 1.219 + // 1.220 + // "text/*" MIME types have hard-wired character set 1.221 + // defaults, as specified in the RFCs. For XML, we 1.222 + // ignore the system "file.encoding" property since 1.223 + // autodetection is more correct. 1.224 + // 1.225 + if (charset == null) { 1.226 + contentType = contentType.trim(); 1.227 + if (contentType.startsWith("text/")) { 1.228 + if (!"file".equalsIgnoreCase(scheme)) 1.229 + charset = "US-ASCII"; 1.230 + } 1.231 + // "application/*" has no default 1.232 + } 1.233 + } 1.234 + 1.235 + retval = new InputSource(XmlReader.createReader(stream, charset)); 1.236 + retval.setByteStream(stream); 1.237 + retval.setEncoding(charset); 1.238 + return retval; 1.239 + } 1.240 + 1.241 + 1.242 + /** 1.243 + * Creates an input source from a given URI. 1.244 + * 1.245 + * @param uri the URI (system ID) for the entity 1.246 + * @param checkType if true, the MIME content type for the entity 1.247 + * is checked for document type and character set encoding. 1.248 + */ 1.249 + static public InputSource createInputSource(URL uri, boolean checkType) 1.250 + throws IOException { 1.251 + 1.252 + URLConnection conn = uri.openConnection(); 1.253 + InputSource retval; 1.254 + 1.255 + if (checkType) { 1.256 + String contentType = conn.getContentType(); 1.257 + retval = createInputSource(contentType, conn.getInputStream(), 1.258 + false, uri.getProtocol()); 1.259 + } else { 1.260 + retval = new InputSource(XmlReader.createReader(conn.getInputStream())); 1.261 + } 1.262 + retval.setSystemId(conn.getURL().toString()); 1.263 + return retval; 1.264 + } 1.265 + 1.266 + 1.267 + /** 1.268 + * Creates an input source from a given file, autodetecting 1.269 + * the character encoding. 1.270 + */ 1.271 + static public InputSource createInputSource(File file) 1.272 + throws IOException { 1.273 + InputSource retval; 1.274 + String path; 1.275 + 1.276 + retval = new InputSource(XmlReader.createReader(new FileInputStream(file))); 1.277 + 1.278 + // On JDK 1.2 and later, simplify this: 1.279 + // "path = file.toURL ().toString ()". 1.280 + path = file.getAbsolutePath(); 1.281 + if (File.separatorChar != '/') 1.282 + path = path.replace(File.separatorChar, '/'); 1.283 + if (!path.startsWith("/")) 1.284 + path = "/" + path; 1.285 + if (!path.endsWith("/") && file.isDirectory()) 1.286 + path = path + "/"; 1.287 + 1.288 + retval.setSystemId("file:" + path); 1.289 + return retval; 1.290 + } 1.291 + 1.292 + 1.293 + /** 1.294 + * <b>SAX:</b> 1.295 + * Resolve the given entity into an input source. If the name can't 1.296 + * be mapped to a preferred form of the entity, the URI is used. To 1.297 + * resolve the entity, first a local catalog mapping names to URIs is 1.298 + * consulted. If no mapping is found there, a catalog mapping names 1.299 + * to java resources is consulted. Finally, if neither mapping found 1.300 + * a copy of the entity, the specified URI is used. 1.301 + * <p/> 1.302 + * <P> When a URI is used, <a href="#createInputSource"> 1.303 + * createInputSource</a> is used to correctly deduce the character 1.304 + * encoding used by this entity. No MIME type checking is done. 1.305 + * 1.306 + * @param name Used to find alternate copies of the entity, when 1.307 + * this value is non-null; this is the XML "public ID". 1.308 + * @param uri Used when no alternate copy of the entity is found; 1.309 + * this is the XML "system ID", normally a URI. 1.310 + */ 1.311 + public InputSource resolveEntity(String name, String uri) 1.312 + throws IOException { 1.313 + InputSource retval; 1.314 + String mappedURI = name2uri(name); 1.315 + InputStream stream; 1.316 + 1.317 + // prefer explicit URI mappings, then bundled resources... 1.318 + if (mappedURI == null && (stream = mapResource(name)) != null) { 1.319 + uri = "java:resource:" + (String) id2resource.get(name); 1.320 + retval = new InputSource(XmlReader.createReader(stream)); 1.321 + 1.322 + // ...and treat all URIs the same (as URLs for now). 1.323 + } else { 1.324 + URL url; 1.325 + URLConnection conn; 1.326 + 1.327 + if (mappedURI != null) 1.328 + uri = mappedURI; 1.329 + else if (uri == null) 1.330 + return null; 1.331 + 1.332 + url = new URL(uri); 1.333 + conn = url.openConnection(); 1.334 + uri = conn.getURL().toString(); 1.335 + // System.out.println ("++ URI: " + url); 1.336 + if (ignoringMIME) 1.337 + retval = new InputSource(XmlReader.createReader(conn.getInputStream())); 1.338 + else { 1.339 + String contentType = conn.getContentType(); 1.340 + retval = createInputSource(contentType, 1.341 + conn.getInputStream(), 1.342 + false, url.getProtocol()); 1.343 + } 1.344 + } 1.345 + retval.setSystemId(uri); 1.346 + retval.setPublicId(name); 1.347 + return retval; 1.348 + } 1.349 + 1.350 + 1.351 + /** 1.352 + * Returns true if this resolver is ignoring MIME types in the documents 1.353 + * it returns, to work around bugs in how servers have reported the 1.354 + * documents' MIME types. 1.355 + */ 1.356 + public boolean isIgnoringMIME() { 1.357 + return ignoringMIME; 1.358 + } 1.359 + 1.360 + /** 1.361 + * Tells the resolver whether to ignore MIME types in the documents it 1.362 + * retrieves. Many web servers incorrectly assign text documents a 1.363 + * default character encoding, even when that is incorrect. For example, 1.364 + * all HTTP text documents default to use ISO-8859-1 (used for Western 1.365 + * European languages), and other MIME sources default text documents 1.366 + * to use US-ASCII (a seven bit encoding). For XML documents which 1.367 + * include text encoding declarations (as most should do), these server 1.368 + * bugs can be worked around by ignoring the MIME type entirely. 1.369 + */ 1.370 + public void setIgnoringMIME(boolean value) { 1.371 + ignoringMIME = value; 1.372 + } 1.373 + 1.374 + 1.375 + // maps the public ID to an alternate URI, if one is registered 1.376 + private String name2uri(String publicId) { 1.377 + if (publicId == null || id2uri == null) 1.378 + return null; 1.379 + return (String) id2uri.get(publicId); 1.380 + } 1.381 + 1.382 + 1.383 + /** 1.384 + * Registers the given public ID as corresponding to a particular 1.385 + * URI, typically a local copy. This URI will be used in preference 1.386 + * to ones provided as system IDs in XML entity declarations. This 1.387 + * mechanism would most typically be used for Document Type Definitions 1.388 + * (DTDs), where the public IDs are formally managed and versioned. 1.389 + * 1.390 + * @param publicId The managed public ID being mapped 1.391 + * @param uri The URI of the preferred copy of that entity 1.392 + */ 1.393 + public void registerCatalogEntry(String publicId, 1.394 + String uri) { 1.395 + if (id2uri == null) 1.396 + id2uri = new Hashtable(17); 1.397 + id2uri.put(publicId, uri); 1.398 + } 1.399 + 1.400 + 1.401 + // return the resource as a stream 1.402 + private InputStream mapResource(String publicId) { 1.403 + // System.out.println ("++ PUBLIC: " + publicId); 1.404 + if (publicId == null || id2resource == null) 1.405 + return null; 1.406 + 1.407 + String resourceName = (String) id2resource.get(publicId); 1.408 + ClassLoader loader = null; 1.409 + 1.410 + if (resourceName == null) 1.411 + return null; 1.412 + // System.out.println ("++ Resource: " + resourceName); 1.413 + 1.414 + if (id2loader != null) 1.415 + loader = (ClassLoader) id2loader.get(publicId); 1.416 + // System.out.println ("++ Loader: " + loader); 1.417 + if (loader == null) 1.418 + return ClassLoader.getSystemResourceAsStream(resourceName); 1.419 + return loader.getResourceAsStream(resourceName); 1.420 + } 1.421 + 1.422 + /** 1.423 + * Registers a given public ID as corresponding to a particular Java 1.424 + * resource in a given class loader, typically distributed with a 1.425 + * software package. This resource will be preferred over system IDs 1.426 + * included in XML documents. This mechanism should most typically be 1.427 + * used for Document Type Definitions (DTDs), where the public IDs are 1.428 + * formally managed and versioned. 1.429 + * <p/> 1.430 + * <P> If a mapping to a URI has been provided, that mapping takes 1.431 + * precedence over this one. 1.432 + * 1.433 + * @param publicId The managed public ID being mapped 1.434 + * @param resourceName The name of the Java resource 1.435 + * @param loader The class loader holding the resource, or null if 1.436 + * it is a system resource. 1.437 + */ 1.438 + public void registerCatalogEntry(String publicId, 1.439 + String resourceName, 1.440 + ClassLoader loader) { 1.441 + if (id2resource == null) 1.442 + id2resource = new Hashtable(17); 1.443 + id2resource.put(publicId, resourceName); 1.444 + 1.445 + if (loader != null) { 1.446 + if (id2loader == null) 1.447 + id2loader = new Hashtable(17); 1.448 + id2loader.put(publicId, loader); 1.449 + } 1.450 + } 1.451 +}